我正在尝试分配自定义时间戳并检查允许的延迟是如何工作的。当我在 Interactive() runner 中运行它时,下面的代码工作正常,但是当我切换到 dataflowrunner() 时它开始抛出错误。我注意到错误来自
Map(lambda x: window.TimestampedValue(x, x["timestamp"]))
两种情况下的输入数据相同,即 {'name': 'rou', 'score': 50,'timestamp':1618295060} 。仅在数据流 UI 中,我看不到任何错误,但在任何地方都看不到错误详细信息。我已包含日志记录和异常。我不确定为什么没有记录错误。
class BuildRecordFn(beam.DoFn):
def __init__(self):
super(BuildRecordFn, self).__init__()
def process(self, s, window=beam.DoFn.WindowParam):
#window_start = window.start.to_utc_datetime()
window_end = window.end.to_utc_datetime()
return [dict(name=s[0],score=s[1], timestamp=str(window_end))]
windowed_words = (words_source
| "read" >>
beam.io.ReadFromPubSub(topic="projects/{}/topics/beambasics".format(project))
|"To Dict" >> beam.Map(json.loads)
|"with timestamp">> Map(lambda x: window.TimestampedValue(x, x["timestamp"]))
|"Map" >>Map(lambda x : (x['name'],x['score']))
| "window" >> beam.WindowInto(window.FixedWindows(60),
#trigger=Repeatedly(AfterProcessingTime(1 * 10)),
# accumulation_mode=AccumulationMode.ACCUMULATING,
allowed_lateness=Duration(seconds=1*50))
|"Group">> CombinePerKey(sum)
|"convert to dict">> ParDo(BuildRecordFn())
|"Write To BigQuery" >> WriteToBigQuery(table=table, schema=schema,
create_disposition=BigQueryDisposition.CREATE_IF_NEEDED,
write_disposition=BigQueryDisposition.WRITE_APPEND)
)