python beam.io.Read无法调用方法Estimate_size()和split()。如何使用apache管道调用该方法?
我在CountingSource示例之后添加了该文档,在Apache Beam网站上也没有任何示例。
` CountingSource(iobase.BoundedSource)类:
def __init__(self, count):
self.records_read = Metrics.counter(self.__class__, 'recordsRead')
self._count = count
def estimate_size(self):
print('estimate_size')
return self._count
def get_range_tracker(self, start_position, stop_position):
print('get_range_tracker')
if start_position is None:
start_position = 0
if stop_position is None:
stop_position = self._count
print(start_position, start_position)
return OffsetRangeTracker(start_position, stop_position)
def read(self, range_tracker):
print('read')
for i in range(self._count):
if not range_tracker.try_claim(i):
return
self.records_read.inc()
yield i
def split(self, desired_bundle_size, start_position=None,
stop_position=None):
print('split')
if start_position is None:
start_position = 0
if stop_position is None:
stop_position = self._count
bundle_start = start_position
while bundle_start < self._count:
print(bundle_start)
bundle_stop = max(self._count, bundle_start + desired_bundle_size)
yield iobase.SourceBundle(weight=(bundle_stop - bundle_start),
source=self,
start_position=bundle_start,
stop_position=bundle_stop)
bundle_start = bundle_stop
with beam.Pipeline(options=PipelineOptions()) as p:
(p | 'Read Rows' >> beam.io.Read(CountingSource(100)))
`