我试图将一个简单的JSON反序列化为Spark中的case类但是获得异常 - 相同的代码在常规环境中工作。
我使用的是以下版本的json4s:
"org.json4s" % "json4s-jackson_2.11" % "3.3.0"
代码:
import org.apache.spark.{SparkContext, SparkConf}
import scala.util.Try
import org.json4s.DefaultFormats
import org.json4s.native.JsonMethods._
import org.json4s.jackson.Serialization.{read, write}
object TestParse {
def run() = {
val sconf = new SparkConf().setAppName("Test").setMaster("local[*]")
val sc = new SparkContext(sconf)
case class TestObj(name: String, value: String)
val testData = sc.parallelize(List.range(1, 10))
val dataObjsRDD = testData.map { val =>
implicit val formats = DefaultFormats // Workaround as DefaultFormats is not serializable
Try { // Always results in Failure
val jsonObj = parse("""{"name": "TheName", "value":"TheValue"}""")
jsonObj.extract[TestObj]
}
}
val d = dataObjsRDD.take(1)
println(d)
}
}
我得到的例外是:
result = {Failure@7770} "Failure(org.json4s.package$MappingException: unknown error)"
exception = {package$MappingException@7773} "org.json4s.package$MappingException: unknown error"
msg = "unknown error"
value = {char[13]@7846}
hash = 0
cause = {NullPointerException@7779} "java.lang.NullPointerException"
detailMessage = null
cause = {NullPointerException@7779} "java.lang.NullPointerException"
stackTrace = {StackTraceElement[40]@7845}
suppressedExceptions = {Collections$UnmodifiableRandomAccessList@7781} size = 0
detailMessage = "unknown error"
value = {char[13]@7846}
hash = 0
Throwable.cause = {NullPointerException@7779} "java.lang.NullPointerException"
detailMessage = null
cause = {NullPointerException@7779} "java.lang.NullPointerException"
stackTrace = {StackTraceElement[40]@7845}
suppressedExceptions = {Collections$UnmodifiableRandomAccessList@7781} size = 0
stackTrace = {StackTraceElement[29]@7780}
0 = {StackTraceElement@7783} "org.json4s.Extraction$.extract(Extraction.scala:47)"
1 = {StackTraceElement@7784} "org.json4s.ExtractableJsonAstNode.extract(ExtractableJsonAstNode.scala:21)"
2 = {StackTraceElement@7785} "TestParse$$anonfun$1$$anonfun$apply$1.apply(TestParse.scala:22)"
3 = {StackTraceElement@7786} "TestParse$$anonfun$1$$anonfun$apply$1.apply(TestParse.scala:20)"
4 = {StackTraceElement@7787} "scala.util.Try$.apply(Try.scala:161)"
5 = {StackTraceElement@7788} "TestParse$$anonfun$1.apply(TestParse.scala:20)"
6 = {StackTraceElement@7789} "TestParse$$anonfun$1.apply(TestParse.scala:18)"
7 = {StackTraceElement@7790} "scala.collection.Iterator$$anon$11.next(Iterator.scala:328)"
8 = {StackTraceElement@7791} "scala.collection.Iterator$class.foreach(Iterator.scala:727)"
9 = {StackTraceElement@7792} "scala.collection.AbstractIterator.foreach(Iterator.scala:1157)"
10 = {StackTraceElement@7793} "scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)"
11 = {StackTraceElement@7794} "scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)"
12 = {StackTraceElement@7795} "scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)"
13 = {StackTraceElement@7796} "scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)"
14 = {StackTraceElement@7797} "scala.collection.AbstractIterator.to(Iterator.scala:1157)"
15 = {StackTraceElement@7798} "scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)"
16 = {StackTraceElement@7799} "scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)"
17 = {StackTraceElement@7800} "scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)"
18 = {StackTraceElement@7801} "scala.collection.AbstractIterator.toArray(Iterator.scala:1157)"
19 = {StackTraceElement@7802} "org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:909)"
20 = {StackTraceElement@7803} "org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$12.apply(RDD.scala:909)"
21 = {StackTraceElement@7804} "org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)"
22 = {StackTraceElement@7805} "org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)"
23 = {StackTraceElement@7806} "org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)"
24 = {StackTraceElement@7807} "org.apache.spark.scheduler.Task.run(Task.scala:88)"
25 = {StackTraceElement@7808} "org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)"
26 = {StackTraceElement@7809} "java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)"
27 = {StackTraceElement@7810} "java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)"
28 = {StackTraceElement@7811} "java.lang.Thread.run(Thread.java:745)"
suppressedExceptions = {Collections$UnmodifiableRandomAccessList@7781} size = 0
答案 0 :(得分:2)
这不是Spark问题,问题是你已经在方法本身中定义了你的case类。如果您在顶级(TestScope
对象之外)定义案例类,它应该可以工作。
有关详细信息,请参阅https://github.com/json4s/json4s/issues/125。