我正在尝试以json格式从kafka生产者中读取数据。在开发服务器上,我有1个zookeeper和3个brokers。代码在那儿工作,我得到了结果。但是当我尝试使用3个zookeeper和3个bootstrap进行生产时服务器我得到空指针错误。这是我的代码:
import _root_.kafka.serializer.DefaultDecoder
import _root_.kafka.serializer.StringDecoder
import org.apache.kudu.spark.kudu._
import org.apache.kudu.client._
import collection.JavaConverters._
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.sql.types._
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming._
sc.setLogLevel("ERROR")
val ssc = new StreamingContext(sc, Seconds(5))
val kafkaTopic = "test2"
val topicsSet = kafkaTopic.split(",").toSet
val kafkaParams = Map[String, String] ("metadata.broker.list" -> "xxx:9092,xxx:9092,xxx:9092","zookeeper.connection.timeout.ms" -> "1000")
val stream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)
val dStream_final=stream.map(_._2)
dStream_final.foreachRDD( rdd => {
if(!rdd.isEmpty){
dataFrame = sqlContext.read.json(rdd)
dataFrame.show()
}
})
ssc.start()
这是我正在生成的json对象:
{ "name":"John", "age":30, "car":null }
这是我在运行代码时遇到的异常:
18/09/13 15:26:55 ERROR scheduler.JobScheduler: Error running job streaming job 1536834415000 ms.0
java.lang.NullPointerException
at org.apache.spark.sql.hive.client.ClientWrapper.conf(ClientWrapper.scala:205)
at org.apache.spark.sql.hive.HiveContext.hiveconf$lzycompute(HiveContext.scala:554)
at org.apache.spark.sql.hive.HiveContext.hiveconf(HiveContext.scala:553)
at org.apache.spark.sql.hive.HiveContext$$anonfun$configure$1.apply(HiveContext.scala:540)
at org.apache.spark.sql.hive.HiveContext$$anonfun$configure$1.apply(HiveContext.scala:539)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.immutable.List.foreach(List.scala:318)