我想在火花流中显示日志,但没有。也许需要帮助。
val sparkConf = new SparkConf().setAppName("DirectKafka")
sparkConf.setMaster("yarn-client") //also try local[*]
// sparkConf.set("spark.streaming.kafka.maxRatePerPartition", "5")
// sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
val ssc = new StreamingContext(sparkConf, Seconds(2))
val topicsSet = Set("topic")
val kafkaParams = Map[String, String](
"metadata.broker.list" -> "ip1:9092,ip2:9092",
"group.id" -> "test0107",
"auto.offset.reset" -> "smallest"
)
val messages = KafkaUtils.createDirectStream()
messages.foreachRDD{rdd =>
val spark = SparkSession.builder.config(rdd.sparkContext.getConf).getOrCreate()
import spark.implicits._
val df = spark.read.json(rdd.map(_._2))
print(df.count()) //no stdout
df.createOrReplaceTempView("words")
val wc = spark.sql("select count(1) as id from words ")
wc.write.mode(SaveMode.Overwrite).insertInto("test.rt_test") //no update in hive
wc.show() //no stdout
}
仅输出:
18/01/07 13:28:30 INFO scheduler.JobScheduler:为时间添加了工作 1515302910000 ms 18/01/07 13:28:32 INFO scheduler.JobScheduler:已添加 工作时间1515302912000 ms