kafka:kafka_2.11-0.10.2.1
scala:2.12
val TOPIC_EVENT_XXX = "EVENT.xxx.ALL"
import org.apache.spark.sql.Encoders
val schema = Encoders.bean(classOf[Event]).schema
val allEventsDF = spark
.readStream
.format("kafka")
.option("kafka.bootstrap.servers", "Streaming01.simon.com:9090,Streaming02.simon.com:9090,Streaming03.simon.com:9090,Streaming04.simon.com:9090")
.option("subscribe", TOPIC_EVENT_XXX)
.option("startingOffsets", "latest")
.option("maxOffsetsPerTrigger", 5000)//old,1000000
.load()
.select(from_json(col("value").cast("string"), schema).alias("parsed_value"))
.selectExpr("parsed_value.*")
val KAFKA_BOOTSTRAP_SERVERS = "Streaming01.simon.com:9090,Streaming02.simon.com:9090,Streaming03.simon.com:9090,Streaming04.simon.com:9090,Ingest01.simon.com:9090,Ingest02.simon.com:9090,Notify01.simon.com:9090,Notify02.simon.com:9090,Serving01.simon.com:9090,Serving02.simon.com:9090,"
var waybillStatesKafkaSinkQuery = waybillStates.selectExpr("to_json(struct(*)) AS value")
.writeStream
.outputMode("append")
.format("kafka") // can be "orc", "json", "csv",memory,console etc.
.option("kafka.bootstrap.servers", KAFKA_BOOTSTRAP_SERVERS)
.option("topic", TOPIC_TIMECHAIN_WAYBILL) //TIMECHAIN.WAYBILL.ALL //TIMECHAIN.WAYBILL.TL //TOPIC_TIMECHAIN_WAYBILL
.option("checkpointLocation", CHECKPOINT_PATH_TL_EVENT_WAYBILL_STATES)
.option("kafka.max.request.size", "164217728")//134217728//209715200
.option("kafka.buffer.memory", "164217728")
.option("kafka.timeout.ms",180000)
.option("kafka.request.timeout.ms",180000)
.option("kafka.session.timeout.ms",180000)
.option("kafka.heartbeat.interval.ms",120000)
.option("kafka.retries",100)
.option("failOnDataLoss","false")//后添加的【2018-07-11】
.start()
运行上述程序时发生以下错误。:
org.apache.kafka.common.errors.NetworkException:服务器在收到响应之前已断开连接。