访问RDD [org.apache.spark.sql.Dataframe]时获取空指针异常

时间:2019-03-20 04:18:18

标签: apache-spark-sql

我的代码:

{
val data = sc.wholeTextFiles("file:///home/cloudera/Desktop/sampledata")
data: org.apache.spark.rdd.RDD[(String, String)] = file:///home/cloudera/Desktop/sampledata MapPartitionsRDD[1] at wholeTextFiles at <console>:24

scala> val paths = data.map(_._1)
paths: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[2] at map at <console>:25

scala> val paths1 = paths.filter(_.contains("csv"))
paths1: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[3] at filter at <console>:25

scala> val tables = paths.map{path =>
     | spark.read.format("CSV").option("header","true").load(path)}
tables: org.apache.spark.rdd.RDD[org.apache.spark.sql.DataFrame] = MapPartitionsRDD[4] at map at <console>:25

scala> tables(1)
<console>:26: error: org.apache.spark.rdd.RDD[org.apache.spark.sql.DataFrame]  
 does not take parameters
       tables(1)
             ^

scala> tables.take(1)  
[Stage 0:>                                                          (0 + 1) /  
 1]2019-03-19 20:19:54 ERROR Executor:91 - Exception in task 0.0 in stage 0.0 (TID 0)
java.lang.NullPointerException  
        at 
 $line17.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1.apply(<console>:26)      
        at  
 $line17.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1.apply(<console>:25)      
        at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)      
        at scala.collection.Iterator$$anon$10.next(Iterator.scala:394)`enter        
 code here`
        at scala.collection.Iterator$class.foreach(Iterator.scala:891)       
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1334      
}

0 个答案:

没有答案