融合火花与卡夫卡的例外

时间:2018-03-27 08:06:24

标签: apache-spark apache-kafka spark-streaming

对字数进行Spark-kafka流式传输。用sbt建造了一个罐子。 当我做spark-submit时,抛出以下异常。

Exception in thread "streaming-start" java.lang.NoSuchMethodError: org.apache.hadoop.fs.FileStatus.isDirectory()Z
    at org.apache.spark.streaming.util.FileBasedWriteAheadLog.initializeOrRecover(FileBasedWriteAheadLog.scala:245)
    at org.apache.spark.streaming.util.FileBasedWriteAheadLog.<init>(FileBasedWriteAheadLog.scala:80)
    at org.apache.spark.streaming.util.WriteAheadLogUtils$$anonfun$2.apply(WriteAheadLogUtils.scala:142)
    at org.apache.spark.streaming.util.WriteAheadLogUtils$$anonfun$2.apply(WriteAheadLogUtils.scala:142)
    at scala.Option.getOrElse(Option.scala:121)
    at org.apache.spark.streaming.util.WriteAheadLogUtils$.createLog(WriteAheadLogUtils.scala:141)
    at org.apache.spark.streaming.util.WriteAheadLogUtils$.createLogForDriver(WriteAheadLogUtils.scala:99)
    at org.apache.spark.streaming.scheduler.ReceivedBlockTracker$$anonfun$createWriteAheadLog$1.apply(ReceivedBlockTracker.scala:256)
    at org.apache.spark.streaming.scheduler.ReceivedBlockTracker$$anonfun$createWriteAheadLog$1.apply(ReceivedBlockTracker.scala:254)
    at scala.Option.map(Option.scala:146)
    at org.apache.spark.streaming.scheduler.ReceivedBlockTracker.createWriteAheadLog(ReceivedBlockTracker.scala:254)
    at org.apache.spark.streaming.scheduler.ReceivedBlockTracker.<init>(ReceivedBlockTracker.scala:77)
    at org.apache.spark.streaming.scheduler.ReceiverTracker.<init>(ReceiverTracker.scala:109)
    at org.apache.spark.streaming.scheduler.JobScheduler.start(JobScheduler.scala:87)
    at org.apache.spark.streaming.StreamingContext$$anonfun$liftedTree1$1$1.apply$mcV$sp(StreamingContext.scala:583)
    at org.apache.spark.streaming.StreamingContext$$anonfun$liftedTree1$1$1.apply(StreamingContext.scala:578)
    at org.apache.spark.streaming.StreamingContext$$anonfun$liftedTree1$1$1.apply(StreamingContext.scala:578)
    at org.apache.spark.util.ThreadUtils$$anon$2.run(ThreadUtils.scala:126)
18/03/27 12:43:55 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@12010fd1{/streaming,null,AVAILABLE,@Spark}
18/03/27 12:43:55 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@552ed807{/streaming/json,null,AVAILABLE,@Spark}
18/03/27 12:43:55 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7318daf8{/streaming/batch,null,AVAILABLE,@Spark}
18/03/27 12:43:55 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3f1ddac2{/streaming/batch/json,null,AVAILABLE,@Spark}
18/03/27 12:43:55 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@37864b77{/static/streaming,null,AVAILABLE,@Spark}
18/03/27 12:43:55 INFO streaming.StreamingContext: StreamingContext started

我的火花提交:

spark-submit --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.2.0 --class "KafkaWordCount" --master local[4] scala_project_2.11-1.0.jar  localhost:2181 test-consumer-group word-count 1

scala_version:2.11.8 spark_version:2.2.0 sbt_version:1.0.3

对象KafkaWordCount {

def main(args:Array [String]){

val (zkQuorum, group, topics, numThreads) = ("localhost:2181", "test-consumer-group", "word-count", 1)
val sparkConf = new SparkConf()
  .setMaster("local[*]")
  .setAppName("KafkaWordCount")

val ssc = new StreamingContext(sparkConf, Seconds(2))
ssc.checkpoint("checkpoint")

val topicMap = topics.split(",").map((_, numThreads)).toMap
val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
val words = lines.flatMap(_.split(" "))
  words.foreachRDD(rdd => println("#####################rdd###################### " + rdd.first))
val wordCounts = words.map(x => (x, 1L))
  .reduceByKeyAndWindow(_ + _, _ - _, Minutes(10), Seconds(2), 2)

 wordCounts.print()

ssc.start()
ssc.awaitTermination()

}

}

0 个答案:

没有答案