我尝试通过以下代码进行测试以获得具有最小字段频率的Kafka记录,但最后我得到两条记录而不是一条具有最小字段频率的记录。
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment().setParallelism(1);
env.getConfig().disableSysoutLogging();
env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
env.enableCheckpointing(5000); // create a checkpoint every 5 seconds
env.getConfig().setGlobalJobParameters(parameterTool); // make parameters available in the web interface
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
DataStream<Tuple2<String, Tuple1<Integer>>> input = env
.addSource(
new FlinkKafkaConsumer010<>(
parameterTool.getRequired("input-topic"),
new KafkaEventSchema(),
parameterTool.getProperties())
.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
.keyBy("word")
.window(TumblingProcessingTimeWindows.of(Time.seconds(10)))
.reduce(new MyReduceFunction(), new MyWindowFunction());
input.print()
;
env.execute("Kafka 0.10 Example");
}
private static class MyReduceFunction implements ReduceFunction<KafkaEvent> {
public KafkaEvent reduce(KafkaEvent r1, KafkaEvent r2) {
return r1.getFrequency() > r2.getFrequency() ?
r2: r1;
}
}
private static class MyWindowFunction
implements WindowFunction<KafkaEvent, Tuple2<String, Tuple1<Integer>>, Tuple, TimeWindow> {
public void apply(Tuple key,
TimeWindow window,
Iterable<KafkaEvent> minReadings,
Collector<Tuple2<String, Tuple1<Integer>>> out) {
// Tuple1<Integer> min = minReadings.iterator().next();
KafkaEvent max = minReadings.iterator().next();
//out.collect(new Tuple2<Long, SensorReading>(window.getStart(), max));
out.collect(new Tuple2<String, Tuple1<Integer>>(max.getWord(), new Tuple1(max.getFrequency())));
}
}
public class KafkaEvent {
private String word;
private int frequency;
private long timestamp;
}
我得到了什么:
(?? t word1111,(1))
(?? t word1111,(11))
我的期望:
(?? t word1111,(1))
KAFKA的测试数据:
String word = "word1111";
result = kafkaProducerClientUsage1.sendStringMessageWithByteArray(word, 9);
result = kafkaProducerClientUsage1.sendStringMessageWithByteArray(word, 5);
result = kafkaProducerClientUsage1.sendStringMessageWithByteArray(word, 11);
result = kafkaProducerClientUsage1.sendStringMessageWithByteArray(word, 8);
result = kafkaProducerClientUsage1.sendStringMessageWithByteArray(word, 1);