我将flink-1.4.2与scala一起使用,并且我想使用Kafka的多个数据流源。我已经使用了合并它们的联合功能,但是我可以使用一个kafka源。
def main(args: Array[String]) {
val kProps = new Properties()
kProps.setProperty("bootstrap.servers", "kafka01.prod.com:9092")
kProps.setProperty("group.id", "test_cg")
kProps.setProperty("enable.auto.commit", "true")
kProps.setProperty("auto.offset.reset", "latest")
val kProps2 = new Properties()
kProps2.setProperty("bootstrap.servers", "kafka04.prod.com:9092")
kProps2.setProperty("group.id", "test_cg_2")
kProps2.setProperty("enable.auto.commit", "true")
kProps2.setProperty("auto.offset.reset", "latest")
val sink = new BucketingSink[SimpleKafkaOutputMsg]("s3://some-bucket/")
sink.setBucketer(new DateTimeBucketer[SimpleKafkaOutputMsg]("yyyy-MM-dd-HH"))
sink.setWriter(new StringWriter[SimpleKafkaOutputMsg])
sink.setBatchSize(350 * 1024 * 1024) // 350 MB
sink.setPendingPrefix("file-")
sink.setPendingSuffix(".csv")
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime)
env.setParallelism(9)
env.setStateBackend(new RocksDBStateBackend("file:///tmp/flink/checkpoints", false))
val topics = List("ROUNDTRIP1")
val inpStream1 = env.addSource(new FlinkKafkaConsumer011(topics.asJava, new IndexedSectorMessagDes(), kProps))
val topics2 = List("ROUNDTRIP2")
val inpStream2 = env.addSource(new FlinkKafkaConsumer011(topics2.asJava, new IndexedSectorMessagDes(), kProps2))
val inpStream = inpStream1.union(inpStream2)
.filter(new InvalidFlightsFilterFunction())
.map(attachUID(_))
.assignTimestampsAndWatermarks(new IngestionTimeExtractor[IndexedSectorMessage]())
val intStream = inpStream.flatMap { s => flattenFlights(s) }
intStream.keyBy(getFlightKey _).process(new KeyedWindowTimeMedianFunction()).addSink(sink)
env.execute("Scala WindowExample Example")`