使用单个火花流上下文处理多个kafka主题在batchSubmitted上挂起

时间:2017-05-09 05:39:41

标签: apache-spark apache-kafka spark-streaming

object SparkMain extends App {
 System.setProperty("spark.cassandra.connection.host", "127.0.0.1")
 val conf = new SparkConf().setMaster("local[2]").setAppName("kafkaspark").set("spark.streaming.concurrentJobs","4")
 val sc = new SparkContext(conf)
 val ssc = new StreamingContext(sc, Seconds(5))
 val sqlContext= new SQLContext(sc)
 val host = "localhost:2181"
 val topicList = List("test","fb")
 topicList.foreach{
   topic=> val lines =KafkaUtils.createStream(ssc, host, topic, Map(topic -> 1)).map(_._2);
     //configureStream(topic, lines)
     lines.foreachRDD(rdd => rdd.map(test(_)).saveToCassandra("test","rawdata",SomeColumns("key")))
 }
  ssc.addStreamingListener(new StreamingListener {
   override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit = {
     System.out.println("Batch completed, Total delay :" + batchCompleted.batchInfo.totalDelay.get.toString + " ms")
   }
    override def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted): Unit = {
     println("inside onReceiverStarted")
   }
    override def onReceiverError(receiverError: StreamingListenerReceiverError): Unit = {
     println("inside onReceiverError")
   }
    override def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped): Unit = {
     println("inside onReceiverStopped")
   }
    override def onBatchSubmitted(batchSubmitted: StreamingListenerBatchSubmitted): Unit = {
     println("inside onBatchSubmitted")
   }
    override def onBatchStarted(batchStarted: StreamingListenerBatchStarted): Unit = {
     println("inside onBatchStarted")
   }
 })
  ssc.start()
 println("===========================")
 ssc.awaitTermination()
}
case class test(key: String)

如果我一次放置任何一个主题,那么每个主题都有效。但是当主题列表有多个主题时,在从DataStream主题获取kafka后,它会继续打印“在onBatchSubmitted内部。”

2 个答案:

答案 0 :(得分:0)

我的坏。我配错了。 setMaster(“local [*]”)代替setMaster(“local [2]”)。

答案 1 :(得分:0)

本地[2] 更改为本地[*] ,并将其正常工作。

val conf = new SparkConf().setMaster("local[*]").setAppName("kafkaspark").set("spark.streaming.concurrentJobs","4")