一对多KStream-KStream加入

时间:2017-10-25 14:22:35

标签: java kafka-consumer-api apache-kafka-streams

如何在两个kafka KStream之间执行一对多的连接? 下面给出的代码以一对一的方式加入两个Kafka KStream。 有人可以指导如何在KStream之间执行一对多的连接吗? 在该主题中接收的数据是泛型的< String,JsonNode> 在主题中写入的数据是形式的 {“from order”:“test1:,”来自orderitem“:”test2“} {“from order”:“test1:,”来自orderitem“:”test3“}

是否有可能以这种格式获取数据: {“from order”:“test1,{”来自orderitem“:”test2“},{”来自orderitem“:”test3“}}

public class ConsumerThreadPool {

private static final String TOPIC = "jre1";
private static final String NEXTTOPIC ="Kafka";
private static final String FINALTOPIC="jvm1";
private static final Integer NUM_THREADS = 1;
final Serializer<JsonNode> jsonSerializer = new JsonSerializer();
final Deserializer<JsonNode> jsonDeserializer = new JsonDeserializer();

final Serde<JsonNode> jsonSerde = Serdes.serdeFrom(jsonSerializer, jsonDeserializer);
final Serde<String> stringSerde = Serdes.String();


int threadNumber = 0;
@Autowired
private ConsumerConfigFactory consumerConfigFactory;

@SuppressWarnings("unused")
private ConsumerConnector consumer;
private ExecutorService threadPool;

public ConsumerThreadPool() {
    threadPool = Executors.newFixedThreadPool(NUM_THREADS);
}

@PostConstruct
public void startConsuming() {
    ConsumerConfig consumerConfig = consumerConfigFactory.getConsumerConfig();
    consumer = createJavaConsumerConnector(consumerConfig);
    KStreamBuilder builder = new KStreamBuilder();
    /* KTable<String,JsonNode> message = builder.table(stringSerde,jsonSerde,TOPIC);


    KTable<String,JsonNode> orderstream = message

            .filter((k,v)-> v.path("table").asText().equals("TEST.S_ORDER")
                    );              
    KTable<String,JsonNode> orderlist=message.filter((k,v)-> v.path("table").asText().equals("TEST.S_ORDER_ITEM"));
    orderstream.to(stringSerde,jsonSerde,FINALTOPIC);      
    orderlist.to(stringSerde,jsonSerde,FINALTOPIC);    */ 
    KStream<String,JsonNode>streams=builder.stream(TOPIC);

    KStream<String,JsonNode> orderstream=streams.filter((k,v)-> v.path("table").asText().equals("TEST.S_ORDER"))
            .map((k,v)->KeyValue.pair(v.path("after").path("ROW_ID").asText(),v));




    KStream<String, JsonNode> orderlist=streams.filter((k,v)-> v.path("table").asText().equals("TEST.S_ORDER_ITEM"))
            .map((k,v)->KeyValue.pair(v.path("after").path("ORDER_ID").asText(),v));





    KStream<String,JsonNode> nextstream =orderstream.join(orderlist,(new ValueJoiner<JsonNode,JsonNode,JsonNode>(){
        @Override
        public JsonNode apply(JsonNode first,JsonNode second){
            ObjectNode jNode = JsonNodeFactory.instance.objectNode();
            return jNode.put("from order",first.get("op_type").textValue())
                    .put("from orderitem",second.get("op_type").textValue() );
        }
    }),JoinWindows.of(TimeUnit.SECONDS.toMillis(30)),stringSerde,jsonSerde,jsonSerde);

    nextstream.to(stringSerde,jsonSerde,FINALTOPIC);  
    KafkaStreams stream=new KafkaStreams(builder, consumerConfigFactory.getConsumeConfig());
    stream.start();
    consume();
    stream.close();
}

public void consume() {



    @SuppressWarnings("resource")
    KafkaConsumer<String,String> consumer = new KafkaConsumer<>(consumerConfigFactory.createConsume());
    consumer.subscribe(Arrays.asList(FINALTOPIC));

    while (true) {
        ConsumerRecords<String, String> records = consumer.poll(100);
        if(!records.isEmpty()){
            System.out.println("ConsumerRecords object created: "+records);
            threadPool.submit(new MessageConsumer(records, threadNumber));
            threadNumber++;
        }

    }

}    

}

1 个答案:

答案 0 :(得分:2)

正如您已经注意到的,KStream-KStream已经是一对多的连接。您似乎希望将唯一键的所有连接结果聚合到一个记录中。

您可以应用.groupByKey().aggregate()来执行此操作。聚合函数使用空JSON初始化,每次新的连接结果到达时,都会将新记录添加到JSON中。