在InputSplit for Hbase期间,Spark给出Null指针异常

时间:2015-02-13 06:48:27

标签: scala hadoop mapreduce hbase apache-spark

我使用的是Spark 1.2.1,Hbase 0.98.10和Hadoop 2.6.0。从hbase检索数据时,我得到了一个零点异常。 在下面找到堆栈跟踪。

  

[sparkDriver-akka.actor.default-dispatcher-2] DEBUG NewHadoopRDD -   无法使用InputSplit#getLocationInfo。   java.lang.NullPointerException:null at   scala.collection.mutable.ArrayOps $ ofRef $。长度$扩展(ArrayOps.scala:114)   〜[scala-library-2.10.4.jar:na] at   scala.collection.mutable.ArrayOps $ ofRef.length(ArrayOps.scala:114)   〜[scala-library-2.10.4.jar:na] at   scala.collection.IndexedSeqOptimized $ class.foreach(IndexedSeqOptimized.scala:32)   〜[scala-library-2.10.4.jar:na] at   scala.collection.mutable.ArrayOps $ ofRef.foreach(ArrayOps.scala:108)   〜[scala-library-2.10.4.jar:na] at   org.apache.spark.rdd.HadoopRDD $ .convertSplitLocationInfo(HadoopRDD.scala:401)   〜[spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.rdd.NewHadoopRDD.getPreferredLocations(NewHadoopRDD.scala:215)   〜[spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.rdd.RDD $$ anonfun $ preferredLocations $ 2.适用(RDD.scala:234)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.rdd.RDD $$ anonfun $ preferredLocations $ 2.适用(RDD.scala:234)   [spark-core_2.10-1.2.1.jar:1.2.1] at   scala.Option.getOrElse(Option.scala:120)[scala-library-2.10.4.jar:na]     在org.apache.spark.rdd.RDD.preferredLocations(RDD.scala:233)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler.org $阿帕奇$火花$ $调度$$ DAGScheduler getPreferredLocsInternal(DAGScheduler.scala:1326)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ $组织阿帕奇$火花$ $调度$$ DAGScheduler $ getPreferredLocsInternal 2 $$ anonfun $ $应用$ 2.适用$ mcVI SP(DAGScheduler.scala:1336)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ $组织阿帕奇$火花$ $调度$$ DAGScheduler $ getPreferredLocsInternal 2 $$ anonfun $ $适用2.适用(DAGScheduler.scala:1335)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ $组织阿帕奇$火花$ $调度$$ DAGScheduler $ getPreferredLocsInternal 2 $$ anonfun $ $适用2.适用(DAGScheduler.scala:1335)   [spark-core_2.10-1.2.1.jar:1.2.1] at   scala.collection.immutable.List.foreach(List.scala:318)   [scala-library-2.10.4.jar:na] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ $组织阿帕奇$火花$ $调度$$ DAGScheduler $ getPreferredLocsInternal 2.适用(DAGScheduler.scala:1335)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ $组织阿帕奇$火花$ $调度$$ DAGScheduler $ getPreferredLocsInternal 2.适用(DAGScheduler.scala:1333)   [spark-core_2.10-1.2.1.jar:1.2.1] at   scala.collection.immutable.List.foreach(List.scala:318)   [scala-library-2.10.4.jar:na] at   org.apache.spark.scheduler.DAGScheduler.org $阿帕奇$火花$ $调度$$ DAGScheduler getPreferredLocsInternal(DAGScheduler.scala:1333)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ $组织阿帕奇$火花$ $调度$$ DAGScheduler $ getPreferredLocsInternal 2 $$ anonfun $ $应用$ 2.适用$ mcVI SP(DAGScheduler.scala:1336)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ $组织阿帕奇$火花$ $调度$$ DAGScheduler $ getPreferredLocsInternal 2 $$ anonfun $ $适用2.适用(DAGScheduler.scala:1335)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ $组织阿帕奇$火花$ $调度$$ DAGScheduler $ getPreferredLocsInternal 2 $$ anonfun $ $适用2.适用(DAGScheduler.scala:1335)   [spark-core_2.10-1.2.1.jar:1.2.1] at   scala.collection.immutable.List.foreach(List.scala:318)   [scala-library-2.10.4.jar:na] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ $组织阿帕奇$火花$ $调度$$ DAGScheduler $ getPreferredLocsInternal 2.适用(DAGScheduler.scala:1335)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ $组织阿帕奇$火花$ $调度$$ DAGScheduler $ getPreferredLocsInternal 2.适用(DAGScheduler.scala:1333)   [spark-core_2.10-1.2.1.jar:1.2.1] at   scala.collection.immutable.List.foreach(List.scala:318)   [scala-library-2.10.4.jar:na] at   org.apache.spark.scheduler.DAGScheduler.org $阿帕奇$火花$ $调度$$ DAGScheduler getPreferredLocsInternal(DAGScheduler.scala:1333)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:1304)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ 17.apply(DAGScheduler.scala:862)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler $$ anonfun $ 17.apply(DAGScheduler.scala:859)   [spark-core_2.10-1.2.1.jar:1.2.1] at   scala.collection.TraversableLike $$ anonfun $表$ 1.适用(TraversableLike.scala:244)   [scala-library-2.10.4.jar:na] at   scala.collection.TraversableLike $$ anonfun $表$ 1.适用(TraversableLike.scala:244)   [scala-library-2.10.4.jar:na] at   scala.collection.Iterator $ class.foreach(Iterator.scala:727)   [scala-library-2.10.4.jar:na] at   scala.collection.AbstractIterator.foreach(Iterator.scala:1157)   [scala-library-2.10.4.jar:na] at   scala.collection.IterableLike $ class.foreach(IterableLike.scala:72)   [scala-library-2.10.4.jar:na] at   scala.collection.AbstractIterable.foreach(Iterable.scala:54)   [scala-library-2.10.4.jar:na] at   scala.collection.TraversableLike $ class.map(TraversableLike.scala:244)   [scala-library-2.10.4.jar:na] at   scala.collection.AbstractTraversable.map(Traversable.scala:105)   [scala-library-2.10.4.jar:na] at   org.apache.spark.scheduler.DAGScheduler.org $阿帕奇$火花$ $调度$$ DAGScheduler submitMissingTasks(DAGScheduler.scala:859)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler.org $阿帕奇$火花$ $调度$$ DAGScheduler submitStage(DAGScheduler.scala:778)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)   [spark-core_2.10-1.2.1.jar:1.2.1] at   org.apache.spark.scheduler.DAGSchedulerEventProcessActor $$ anonfun $获得$ 2.applyOrElse(DAGScheduler.scala:1389)   [spark-core_2.10-1.2.1.jar:1.2.1] at   akka.actor.Actor $ class.aroundReceive(Actor.scala:465)   [akka-actor_2.10-2.3.4-spark.jar:na] at   org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375)   [spark-core_2.10-1.2.1.jar:1.2.1] at   akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)   [akka-actor_2.10-2.3.4-spark.jar:na] at   akka.actor.ActorCell.invoke(ActorCell.scala:487)   [akka-actor_2.10-2.3.4-spark.jar:na] at   akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238)   [akka-actor_2.10-2.3.4-spark.jar:na] at   akka.dispatch.Mailbox.run(Mailbox.scala:220)   [akka-actor_2.10-2.3.4-spark.jar:na] at   akka.dispatch.ForkJoinExecutorConfigurator $ AkkaForkJoinTask.exec(AbstractDispatcher.scala:393)   [akka-actor_2.10-2.3.4-spark.jar:na] at   scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)   [scala-library-2.10.4.jar:na] at   scala.concurrent.forkjoin.ForkJoinPool $ WorkQueue.runTask(ForkJoinPool.java:1339)   [scala-library-2.10.4.jar:na] at   scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)   [scala-library-2.10.4.jar:na] at   scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)   [阶库-2.10.4.jar:NA]

请为我提供此问题的解决方案。

1 个答案:

答案 0 :(得分:1)

在getPreferredLocations阶段抛出异常,因此如果没有关于你的hbase的更多信息,我建议你看一下hbase.table.name和hbase.master(如果正确定义HMaster,那么最后一个我不知道)根据需要配置