为什么在主apache梁管道文件中定义的类不能在管道中使用,但如果在单独的文件中定义并导入则可以使用?
我的代码是:
import apache_beam as beam
from apache_beam.io import ReadFromText
from apache_beam.io import WriteToText
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
class MyStructure(object):
def __init__(self):
self.a = ''
class Process_Class(beam.DoFn):
def __init__(self):
self.structure = MyStructure()
def process(self, elem):
self.structure.a = str(elem)
yield self.structure
class Other_Process_Class(beam.DoFn):
def process(self, elem):
yield elem.a
pipeline_options = PipelineOptions(runner='DirectRunner')
pipeline_options.view_as(SetupOptions).save_main_session = True
p = beam.Pipeline(options=pipeline_options)
p | ReadFromText('input.txt') | beam.ParDo(Process_Class()) | beam.ParDo(Other_Process_Class()) | WriteToText('output.txt')
p.run().wait_until_finish()
这给了我错误:
cPickle.PicklingError:无法腌制: 它与__main __。MyStructure不同(在运行时) 'ParDo(Process_Class)']
,但是在单独的文件中定义 MyStructure 并将其导入时,不会发生此错误。
如果 MyStructure 的实例化是通过 process 方法而不是中的 __ init __ 方法完成的,则也不会出现Process_Class 类。
全栈跟踪:
Traceback (most recent call last):
File "pickle_test.py", line 28, in <module>
p.run().wait_until_finish()
File "/usr/local/lib/python2.7/site-packages/apache_beam/pipeline.py", line 405, in run
self._options).run(False)
File "/usr/local/lib/python2.7/site-packages/apache_beam/pipeline.py", line 418, in run
return self.runner.run_pipeline(self)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/direct/direct_runner.py", line 139, in run_pipeline
return runner.run_pipeline(pipeline)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 249, in run_pipeline
return self.run_via_runner_api(pipeline.to_runner_api())
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 252, in run_via_runner_api
return self.run_stages(*self.create_stages(pipeline_proto))
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 1061, in run_stages
pcoll_buffers, safe_coders)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 1190, in run_stage
self._progress_frequency).process_bundle(data_input, data_output)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 1500, in process_bundle
result_future = self._controller.control_handler.push(process_bundle)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 1371, in push
response = self.worker.do_instruction(request)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 221, in do_instruction
request.instruction_id)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 237, in process_bundle
bundle_processor.process_bundle(instruction_id)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/worker/bundle_processor.py", line 436, in process_bundle
].process_encoded(data.data)
File "/usr/local/lib/python2.7/site-packages/apache_beam/runners/worker/bundle_processor.py", line 125, in process_encoded
self.output(decoded_value)
File "apache_beam/runners/worker/operations.py", line 182, in apache_beam.runners.worker.operations.Operation.output
File "apache_beam/runners/worker/operations.py", line 183, in apache_beam.runners.worker.operations.Operation.output
File "apache_beam/runners/worker/operations.py", line 89, in apache_beam.runners.worker.operations.ConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 333, in apache_beam.runners.worker.operations.ImpulseReadOperation.process
File "apache_beam/runners/worker/operations.py", line 340, in apache_beam.runners.worker.operations.ImpulseReadOperation.process
File "apache_beam/runners/worker/operations.py", line 183, in apache_beam.runners.worker.operations.Operation.output
File "apache_beam/runners/worker/operations.py", line 89, in apache_beam.runners.worker.operations.ConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 497, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 498, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 680, in apache_beam.runners.common.DoFnRunner.receive
File "apache_beam/runners/common.py", line 686, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 724, in apache_beam.runners.common.DoFnRunner._reraise_augmented
File "apache_beam/runners/common.py", line 684, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 420, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 794, in apache_beam.runners.common._OutputProcessor.process_outputs
File "apache_beam/runners/worker/operations.py", line 87, in apache_beam.runners.worker.operations.ConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 93, in apache_beam.runners.worker.operations.ConsumerSet.update_counters_start
File "apache_beam/runners/worker/opcounters.py", line 195, in apache_beam.runners.worker.opcounters.OperationCounters.update_from
File "apache_beam/runners/worker/opcounters.py", line 213, in apache_beam.runners.worker.opcounters.OperationCounters.do_sample
File "apache_beam/coders/coder_impl.py", line 874, in apache_beam.coders.coder_impl.WindowedValueCoderImpl.get_estimated_size_and_observables
File "apache_beam/coders/coder_impl.py", line 883, in apache_beam.coders.coder_impl.WindowedValueCoderImpl.get_estimated_size_and_observables
File "apache_beam/coders/coder_impl.py", line 294, in apache_beam.coders.coder_impl.FastPrimitivesCoderImpl.get_estimated_size_and_observables
File "apache_beam/coders/coder_impl.py", line 346, in apache_beam.coders.coder_impl.FastPrimitivesCoderImpl.encode_to_stream
File "apache_beam/coders/coder_impl.py", line 184, in apache_beam.coders.coder_impl.CallbackCoderImpl.encode_to_stream
File "/usr/local/lib/python2.7/site-packages/apache_beam/coders/coders.py", line 560, in <lambda>
lambda x: dumps(x, HIGHEST_PROTOCOL), pickle.loads)
cPickle.PicklingError: Can't pickle <class '__main__.MyStructure'>: it's not the same object as __main__.MyStructure [while running 'ParDo(Process_Class)']