我的火花应用程序从Kafka主题收到InputMessage
。该消息包含会话ID(我在第一个mapWithState
函数中过滤)和拓扑名称(我根据Zookeeper配置进行过滤,并将其保存在第二个mapWithState
中)。在此之后,我尝试将拓扑名称保存到数据库,但几分钟后应用程序抛出异常。
JavaPairInputDStream<byte[], byte[]> directKafkaStream = KafkaUtils.createDirectStream(ssc, byte[].class, byte[].class,
DefaultDecoder.class, DefaultDecoder.class, kafkaParams, topics);
JavaDStream<InputMessage> inputMessageStream = directKafkaStream.map(avroRecord -> InputMessageTranslator.decodeAvro(avroRecord._2))
.filter(f -> f != null && f.getSessionID() != null && f.getDataName() != null);
inputMessageStream.checkpoint(Durations.seconds(2));
// LOGGER.info("Processing {} messages", inputMessageStream.count());
// JavaDStream<InputMessage> transformed into JavaPairDStream with InputMessageKey as Key and InputMessage as value
JavaPairDStream<InputMessageKey, InputMessage> pairInputMessageStream = inputMessageStream
.mapToPair(w -> new Tuple2<InputMessageKey, InputMessage>(new InputMessageKey(w.getSessionID(), w.getDataName()), w));
pairInputMessageStream.checkpoint(Durations.seconds(2));
// Function for keeping the state of the message received from kafka and how to handle duplicate messages.
Function3<InputMessageKey, Optional<InputMessage>, State<Integer>, Tuple2<InputMessageKey, InputMessage>> mappingFunction = (key,
value, state) -> {
if (state.exists()) {
return null;
} else {
state.update(1);
return new Tuple2<InputMessageKey, InputMessage>(key, value.get());
}
};
Function3<String, Optional<Integer>, State<Integer>, String> mappingFunctionTopology = (key, value, state) -> {
if (state.exists()) {
return null;
} else {
state.update(1);
return key;
}
};
// Stream data is saved in a mapWithState and is deleted after a configured time
JavaMapWithStateDStream<InputMessageKey, InputMessage, Integer, Tuple2<InputMessageKey, InputMessage>> stateDStream = pairInputMessageStream
.mapWithState(StateSpec.function(mappingFunction)
.timeout(Durations.minutes(new Integer(startProps.getProperty(InputPropertyKey.TIMEOUT.toString())))));
stateDStream.checkpoint(Durations.seconds(2));
JavaDStream<Tuple2<InputMessageKey, InputMessage>> mapWithStateDistinctAndFiltered = stateDStream
.transform(rdd -> rdd.distinct().filter(r -> r != null));
mapWithStateDistinctAndFiltered.checkpoint(Durations.seconds(2));
JavaPairDStream<String, Integer> topologyNameStream = mapWithStateDistinctAndFiltered.mapToPair(f -> {
String topologyName = f._2.getDataName();
if (ZookeeperConfigurationReader.topologyNameExists(topologyName, "localhost:2181", "/Topologies")) {
return new Tuple2<>(topologyName, 1);
}
return null;
}).filter(f -> f != null);
topologyNameStream.checkpoint(Durations.seconds(2));
JavaMapWithStateDStream<String, Integer, Integer, String> mappedTopologyNameStream = topologyNameStream
.mapWithState(StateSpec.function(mappingFunctionTopology));
mappedTopologyNameStream.foreachRDD(rdd -> {
rdd.foreachPartition(p -> {
TopologyRestClient rest = new TopologyRestClient("http://localhost:8080/topology");
p.forEachRemaining(r -> {
rest.createObject(r);
});
});
});
投掷
java.lang.StackOverflowError at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1512)
根据纱线ui在distinct()
.....它失败了。
谢谢!