嗨,我想使用Kafka Streams合并3个事件流。
我找不到解决并发问题的简单解决方案:
// merged values Ktable
KTable<String, ProdForecastPowerPlantAgg> mergedTable = builder.table(
getMergedValuesTopic(),
[...]);
// records A
// stream
KStream<String, RecordA> recordsAStream = builder.stream(
getRecordATopic(),
[...]);
// rekeyed stream
KStream<String, ProductionRecordValue> recordsABySomeId = recordsAStream
.selectKey((k, v) -> getKey(v);
// records B
// stream
KStream<String, RecordB> recordsBStream = builder.stream(
getRecordBTopic(),
[...]);
// rekeyed stream
KStream<String, RecordB> recordsBBySomeId = recordsBStream
.selectKey((k, v) -> getKey(v);
// records C
// stream
KStream<String, RecordA> recordsCStream = builder.stream(
getRecordCTopic(),
[...]);
// rekeyed stream
KStream<String, ProductionRecordValue> recordsCBySomeId = recordsCStream
.selectKey((k, v) -> getKey(v);
// when a recordA arrives
KStream<String, RecordA> aggFromA = recordsABySomeId
.filter((k, v) -> v != null)
// join recordA and current join result together
.leftJoin(mergedTable, (recA, oldMerge) -> {
if (oldMerge != null) {
return new Merge(recA, oldMerge.B, oldMerge.C);
}
return new Merge(recA, null, null)
},
[...]
);
// when a recordB arrives
KStream<String, RecordB> aggFromB = recordsBBySomeId
.filter((k, v) -> v != null)
// join recordB and current join result together
.leftJoin(mergedTable, (recB, oldMerge) -> {
if (oldMerge != null) {
return new Merge(oldMerge.A, recB, oldMerge.C);
}
return new Merge(null, recB, null)
},
[...]
);
// when a recordC arrives
KStream<String, RecordB> aggFromC = recordsCBySomeId
.filter((k, v) -> v != null)
// join recordB and current join result together
.leftJoin(mergedTable, (recC, oldMerge) -> {
if (oldMerge != null) {
return new Merge(oldMerge.A, oldMerge.B, recC);
}
return new Merge(null, null, recC)
},
[...]
);
// save aggreagtion
aggFromA.merge(aggFromB).merge(aggFromC)
.to(getMergedValuesTopic(), Produced.with(Serdes.String(), aggSerdes));
return builder.build();
实际上,此片段无效:完成搜索后,基于 getMergedValuesTopic的KTable不会反映合并的最新状态: 当两个不同的记录同时到达时,一个更新可以取消另一个(因为查找已过时)。
有人使用Kafka流有解决此问题的简单方法吗?
答案 0 :(得分:2)
我认为简单的聚合应该可以解决问题。聚合执行您描述的操作:“将KTable作为查找和目标”。
在每个到达的记录上,检查聚合表是否匹配。如果找不到匹配项,则使用聚合中定义的Initializer生成新的初始记录:here
可用的文档示例代码:
public class KTableMerge {
protected Topology buildTopology() {
final StreamsBuilder builder = new StreamsBuilder();
//Streams
KStream<String, RecordA> recordAKStream = builder.stream("test-record-a");
KStream<String, RecordB> recordBKStream = builder.stream("test-record-b");
KStream<String, RecordC> recordCKStream = builder.stream("test-record-c");
//Re-key and Merge Streams in parent 'Record' container
KStream<String, Record> mergedStream =
recordAKStream
.selectKey((key, value) -> value.getForeignKey())
.mapValues(value -> (Record) value)
.merge(recordBKStream
.selectKey((key, value) -> value.getForeignKey())
.mapValues(value -> (Record) value))
.merge(recordCKStream
.selectKey((key, value) -> value.getForeignKey())
.mapValues(value -> (Record) value));
//Aggregate
mergedStream
.groupByKey()
.aggregate(
Merge::new,
(key, value, aggregate) -> {
if (value instanceof RecordA) {
aggregate.recordA = (RecordA) value;
} else if (value instanceof RecordB) {
aggregate.recordB = (RecordB) value;
} else if (value instanceof RecordC) {
aggregate.recordC = (RecordC) value;
}
return aggregate;
})
.toStream()
.to("merge-topic");
return builder.build();
}
private static class Merge {
RecordA recordA;
RecordB recordB;
RecordC recordC;
}
private interface Record {
String getForeignKey();
}
private static class RecordA implements Record {
String id;
String foreignKey;
public String getForeignKey() {
return foreignKey;
}
}
private static class RecordB implements Record {
String id;
String foreignKey;
public String getForeignKey() {
return foreignKey;
}
}
private static class RecordC implements Record {
String id;
String foreignKey;
public String getForeignKey() {
return foreignKey;
}
}
}
希望这会有所帮助