这是我的第一个GAE项目。我得到了可以在dev_app上使用的序列号(我在Mac上使用的是GoogleAppEngineLauncher)。由于我的代码需要很长时间才能完成,因此我尝试使用mapreduce加快过程。我尝试了以下代码,但始终收到以下错误。我不确定这是否是由于我的代码中的某些错误,或者我是否缺少* yaml文件中的任何语句。请帮助!
class ShuffleDictPipeline(base_handler.PipelineBase):
def run(self, *args, **kwargs):
""" run """
mapper_params = {
"entity_kind": "coremic.RandomDict",
"batch_size": 500,
"filters": [("idx", "=", ndb_custom_key)]
}
reducer_params = {
"mime_type": "text/plain"
}
output = yield mapreduce_pipeline.MapreducePipeline(
"calc_shuff_core_microb",
mapper_spec="coremic.shuffle_dict_coremic_map",
mapper_params=mapper_params,
reducer_spec="coremic.shuffle_dict_coremic_reduce",
reducer_params=reducer_params,
input_reader_spec="mapreduce.input_readers.DatastoreInputReader",
output_writer_spec="mapreduce.output_writers.BlobstoreOutputWriter",
shards=16)
yield StoreOutput(output)
错误:
ERROR 2016-03-05 20:03:21,706 pipeline.py:2432]
Generator mapreduce.mapper_pipeline.MapperPipeline(*(u'calc_shuff_core_microb-map', u'coremic.shuffle_dict_coremic_map', u'mapreduce.input_readers.DatastoreInputReader'), **{'output_writer_spec': u'mapreduce.output_writers._GoogleCloudStorageKeyValueOutputWriter', 'params': {u'batch_size': 500, u'bucket_name': u'app_default_bucket', u'entity_kind': u'coremic.RandomDict',... (324 bytes))#b96dd511c0454fd99413d267b7388857 raised exception. AttributeError: 'NoneType' object has no attribute 'validate_bucket_name'
Traceback (most recent call last):
File "/Users/rr/GAE/coremic/pipeline/pipeline.py", line 2156, in evaluate
self, pipeline_key, root_pipeline_key, caller_output)
File "/Users/rr/GAE/coremic/pipeline/pipeline.py", line 1110, in _run_internal
return self.run(*self.args, **self.kwargs)
File "/Users/rr/GAE/coremic/mapreduce/mapper_pipeline.py", line 102, in run
queue_name=self.queue_name,
File "/Users/rr/GAE/coremic/mapreduce/control.py", line 125, in start_map
in_xg_transaction=in_xg_transaction)
File "/Users/rr/GAE/coremic/mapreduce/handlers.py", line 1730, in _start_map
mapper_output_writer_class.validate(mapper_spec)
File "/Users/rr/GAE/coremic/mapreduce/output_writers.py", line 1075, in validate
return cls.WRITER_CLS.validate(mapper_spec)
File "/Users/rr/GAE/coremic/mapreduce/output_writers.py", line 723, in validate
super(_GoogleCloudStorageOutputWriter, cls).validate(mapper_spec)
File "/Users/rr/GAE/coremic/mapreduce/output_writers.py", line 604, in validate
cloudstorage.validate_bucket_name(
AttributeError: 'NoneType' object has no attribute 'validate_bucket_name'
最佳答案
我仍在努力使所有功能正常工作,但有几件事有所帮助。
1.1在SDK上安装google cloud storage client lib来访问存储桶。云Google com Appengine文档python googlecloudstorageclient
1.2设置(创建)存储桶。
然后按照https://plus.google.com/+EmlynORegan/posts/6NPaRKxMkf3中的步骤进行操作
注意映射器参数如何更改。
2-在mapreduce管道中,替换
“ mapreduce.output_writers.BlobstoreOutputWriter”
与
“ mapreduce.output_writers.GoogleCloudStorageConsistentOutputWriter”
3-将reducer参数更新为:
{
“ mime_type”:“文本/纯文本”,
“ output_writer”:{
“ bucket_name” :、
“ tmp_bucket_name”:
}
}
其他非常有用的链接:
https://gist.github.com/nlathia/ab670053ed460c4ca02f/89178e132b894fe5467c09164d3827f70e4ae2f8