当我尝试使用newAPIHadoopRDD从hbase表中提取数据时出现NullPointerException

时间:2017-08-29 08:35:39

标签: apache-spark hbase

Exception in thread "main" java.lang.RuntimeException: java.lang.NullPointerException
    at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:208)
    at org.apache.hadoop.hbase.client.ClientScanner.call(ClientScanner.java:320)
    at org.apache.hadoop.hbase.client.ClientScanner.nextScanner(ClientScanner.java:295)
    at org.apache.hadoop.hbase.client.ClientScanner.initializeScannerInConstruction(ClientScanner.java:160)
    at org.apache.hadoop.hbase.client.ClientScanner.<init>(ClientScanner.java:155)
    at org.apache.hadoop.hbase.client.HTable.getScanner(HTable.java:821)
    at org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:193)
    at org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:89)
    at org.apache.hadoop.hbase.client.MetaScanner.allTableRegions(MetaScanner.java:324)
    at org.apache.hadoop.hbase.client.HRegionLocator.getAllRegionLocations(HRegionLocator.java:88)
    at org.apache.hadoop.hbase.util.RegionSizeCalculator.init(RegionSizeCalculator.java:94)
    at org.apache.hadoop.hbase.util.RegionSizeCalculator.<init>(RegionSizeCalculator.java:81)
    at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:256)
    at org.apache.hadoop.hbase.mapreduce.TableInputFormat.getSplits(TableInputFormat.java:237)
    at org.apache.spark.rdd.NewHadoopRDD.getPartitions(NewHadoopRDD.scala:120)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:242)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:240)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:240)
    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:242)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:240)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:240)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1953)
    at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:934)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:323)
    at org.apache.spark.rdd.RDD.collect(RDD.scala:933)
    at com.dnvgl.eta.HbaseToCsv$.main(HbaseToCsv.scala:69)
    at com.dnvgl.eta.HbaseToCsv.main(HbaseToCsv.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:738)
    at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
    at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.NullPointerException
    at org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.getMetaReplicaNodes(ZooKeeperWatcher.java:395)
    at org.apache.hadoop.hbase.zookeeper.MetaTableLocator.blockUntilAvailable(MetaTableLocator.java:553)
    at org.apache.hadoop.hbase.client.ZooKeeperRegistry.getMetaRegionLocation(ZooKeeperRegistry.java:61)
    at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateMeta(ConnectionManager.java:1186)
    at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1153)
    at org.apache.hadoop.hbase.client.RpcRetryingCallerWithReadReplicas.getRegionLocations(RpcRetryingCallerWithReadReplicas.java:300)
    at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:155)
    at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:59)
    at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:200)
    ... 40 more

任何人都可以给出一些建议吗?非常感谢你。

1 个答案:

答案 0 :(得分:1)

仅发布堆栈跟踪更难回答。请使用您使用的源代码修改问题。

我怀疑您传递给newAPIHadoopRDD方法的HBase配置对象配置不正确。 Refer to this URL要更好地理解。确保config对象捕获hbase.zookeeper.property.clientPort,hbase.zookeeper.quorum和zookeeper.znode.parent参数