我正在尝试计算JavaSchemaRDD中的行数,但是下面的代码只返回一个空指针异常,即使spark文档指示.collect()应该返回一个长度为一的ArrayList,.size()应该检索那个长度。我想知道确定JavaSchemaRDD大小的正确方法是什么。
JavaRDD<modelTopology> MODEL_TOPOLOGYRDD = TopologyRDD.map(
new Function<Object[], modelTopology>() {
@Override
public modelTopology call(Object[] line) throws Exception {
modelTopology toporow = new modelTopology();
toporow.setA_TYPE(line[0].toString());
toporow.setZ_TYPE(line[1].toString());
toporow.setA_CLLI(line[2].toString());
toporow.setZ_CLLI(line[3].toString());
toporow.setA_HOSTNAME(line[4].toString());
toporow.setZ_HOSTNAME(line[5].toString());
toporow.setA_LOCATION(line[6].toString());
toporow.setA_LOC_TYPE(line[7].toString());
toporow.setZ_LOCATION(line[8].toString());
toporow.setZ_LOC_TYPE(line[9].toString());
toporow.setA_SHELF(line[10].toString());
toporow.setA_SLOT(line[11].toString());
toporow.setA_CARD(line[12].toString());
toporow.setA_PORT(line[13].toString());
toporow.setA_INTERFACE(line[14].toString());
toporow.setA_IF_DESC(line[15].toString());
toporow.setZ_SHELF(line[16].toString());
toporow.setZ_SLOT(line[17].toString());
toporow.setZ_CARD(line[18].toString());
toporow.setZ_PORT(line[19].toString());
toporow.setZ_INTERFACE(line[20].toString());
toporow.setZ_IF_DESC(line[21].toString());
toporow.setA_CARD_NAME(line[22].toString());
toporow.setZ_CARD_NAME(line[23].toString());
toporow.setPHY_CIRCUIT_ID(line[24].toString());
toporow.setLAG_CIRCUIT_ID(line[25].toString());
toporow.setPHY_CIRCUIT_ALIAS(line[26].toString());
toporow.setA_VENDOR(line[27].toString());
toporow.setA_MODEL(line[28].toString());
toporow.setA_TECHNOLOGY(line[29].toString());
toporow.setZ_VENDOR(line[30].toString());
toporow.setZ_MODEL(line[31].toString());
toporow.setZ_TECHNOLOGY(line[32].toString());
toporow.setA_EH_ELEMENT_ID(line[33].toString());
toporow.setA_EH_MACHINE_ID(line[34].toString());
toporow.setZ_EH_ELEMENT_ID(line[35].toString());
toporow.setZ_EH_MACHINE_ID(line[36].toString());
toporow.setA_EH_SPEED(line[37].toString());
toporow.setZ_EH_SPEED(line[38].toString());
toporow.setA_EH_SPEED1(line[39].toString());
toporow.setZ_EH_SPEED1(line[40].toString());
toporow.setA_EH_EHEALTH_DOMAIN(line[41].toString());
toporow.setZ_EH_EHEALTH_DOMAIN(line[42].toString());
toporow.setA_MRTG_HOSTID(line[43].toString());
toporow.setA_MRTG_GRPID(line[44].toString());
toporow.setA_MRTG_IFID(line[45].toString());
toporow.setZ_MRTG_HOSTID(line[46].toString());
toporow.setZ_MRTG_GRPID(line[47].toString());
toporow.setZ_MRTG_IFID(line[48].toString());
toporow.setA_MGMT_IP(line[49].toString());
toporow.setZ_MGMT_IP(line[50].toString());
toporow.setA_IF_INDEX(line[51].toString());
toporow.setZ_IF_INDEX(line[52].toString());
toporow.setIS_PROD(line[53].toString());
toporow.setTOPOLOGY_KEY(line[54].toString());
toporow.setCOMMIT_TS(line[55].toString());
return toporow;
}
});
JavaSchemaRDD schemaTopology = sqlContext.applySchema(MODEL_TOPOLOGYRDD, modelTopology.class);
Integer Size = schemaTopology.collect().size();
堆栈追踪:
java.lang.NullPointerException
at com.verizon.npi.MainApp$1.call(MainApp.java:105)
at com.verizon.npi.MainApp$1.call(MainApp.java:96)
at org.apache.spark.api.java.JavaPairRDD$$anonfun$toScalaFunction$1.apply(JavaPairRDD.scala:999)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
at scala.collection.AbstractIterator.to(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$17.apply(RDD.scala:795)
at org.apache.spark.rdd.RDD$$anonfun$17.apply(RDD.scala:795)
at org.apache.spark.SparkContext$$anonfun$runJob$2.apply(SparkContext.scala:1321)
at org.apache.spark.SparkContext$$anonfun$runJob$2.apply(SparkContext.scala:1321)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
at org.apache.spark.scheduler.Task.run(Task.scala:56)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
16/03/15 02:47:43 WARN TaskSetManager: Lost task 0.0 in stage 1.0 (TID 1, localhost): java.lang.NullPointerException
at com.verizon.npi.MainApp$1.call(MainApp.java:105)
at com.verizon.npi.MainApp$1.call(MainApp.java:96)
at org.apache.spark.api.java.JavaPairRDD$$anonfun$toScalaFunction$1.apply(JavaPairRDD.scala:999)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
at scala.collection.AbstractIterator.to(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$17.apply(RDD.scala:795)
at org.apache.spark.rdd.RDD$$anonfun$17.apply(RDD.scala:795)
at org.apache.spark.SparkContext$$anonfun$runJob$2.apply(SparkContext.scala:1321)
at org.apache.spark.SparkContext$$anonfun$runJob$2.apply(SparkContext.scala:1321)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
at org.apache.spark.scheduler.Task.run(Task.scala:56)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
enter code here