我正在尝试将传感器数据插入到流入数据库中。 我成功完成了上述方案,并且看到来自我的kafka生产者控制台的数据存储在influx数据库中。
下一步是分析窗口中的某些数据。为了实现这一点,我只添加了timeWindow
函数并将原始数据转换为元组,以查看数据是否在基于时间的窗口中正确分组。
当我打印出每个窗口的值时,它运行良好。但是,与第一种情况相比,当我看到流入数据库时,缺少一些原始数据。
下面是我的代码。输入的字符串来自kafka生产者,当我快速键入值时,则缺少值。但是,当我非常缓慢地键入值时(例如,每1秒1个值),那么就不会丢失任何值。
这不是我在代码级别上的错误导致的flink特性吗?
所有环境都在本地计算机上进行了测试。
package org.apache.flink.streaming.connectors.influxdb;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09;
import org.apache.flink.streaming.util.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.functions.windowing.*;
import org.apache.flink.util.Collector;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.TimeUnit;
public class ReadFromKafka {
public static void main(String[] args) throws Exception{
// create execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "localhost:9092");
properties.setProperty("group.id", "flink");
DataStream<String> stream = env
.addSource(new FlinkKafkaConsumer09<>("flinkStreaming3", new SimpleStringSchema(), properties));
// DataStream<String> stream2 = env
// .addSource(new FlinkKafkaConsumer09<>("flinkStreaming3", new SimpleStringSchema(), properties));
env.enableCheckpointing(1000);
DataStream<Tuple2<String,Integer>> process = stream.timeWindowAll(Time.seconds(5)).process(new ProcessAllWindowFunction<String, Tuple2<String,Integer>, TimeWindow>() {
@Override
public void process(Context context, Iterable<String> iterable, Collector<Tuple2<String, Integer>> out) throws Exception {
int cnt = 0;
for (String in : iterable){
out.collect(new Tuple2<String,Integer>(in,cnt++));
}
}
});
process.print();
DataStream<InfluxDBPoint> dataStream = stream.map(new MapFunction<String, InfluxDBPoint>() {
@Override
public InfluxDBPoint map(String s) throws Exception {
String sensorVal = s;
String measurement = "data";
long timestamp = System.currentTimeMillis();
HashMap<String, String> tags = new HashMap<>();
tags.put("host", String.valueOf(measurement.hashCode() % 20));
HashMap<String, Object> fields = new HashMap<>();
fields.put("value", Double.parseDouble(sensorVal));
return new InfluxDBPoint(measurement, timestamp, tags, fields);
}
});
InfluxDBConfig influxDBConfig = InfluxDBConfig.builder("http://localhost:8086", "root", "root", "db_flink_test")
.batchActions(1000)
.flushDuration(10, TimeUnit.MILLISECONDS)
.enableGzip(true)
.build();
// processStream.addSink(new InfluxDBSink(influxDBConfig));
dataStream.addSink(new InfluxDBSink(influxDBConfig));
env.execute("InfluxDB Sink Example");
env.execute();
}
}
谢谢。