以下是在Spark shell中复制的简单步骤:
scala> case class Foo(d: Option[Double])
defined class Foo
scala> val df = spark.createDataFrame(Seq(Foo(None), Foo(Some(1.0))))
df: org.apache.spark.sql.DataFrame = [d: double]
scala> df.as[Double].printSchema
root
|-- d: double (nullable = true)
scala> df.as[Double].collect
java.lang.NullPointerException: Null value appeared in non-nullable field:
- root class: "scala.Double"
If the schema is inferred from a Scala tuple/case class, or a Java bean, please try to use scala.Option[_] or other nullable types (e.g. java.lang.Integer instead of int/scala.Int).
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collectFromPlan$1.apply(Dataset.scala:2864)
at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collectFromPlan$1.apply(Dataset.scala:2861)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2861)
at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2387)
at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2387)
at org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:2842)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2841)
at org.apache.spark.sql.Dataset.collect(Dataset.scala:2387)
... 48 elided
scala> df.as[Tuple1[Double]].printSchema
root
|-- d: double (nullable = true)
scala> df.as[Tuple1[Double]].collect
res26: Array[(Double,)] = Array((-1.0,), (1.0,))