我正在尝试使用以下代码读取json文件,但它返回多个错误:
val df = sqlcontext.read.json("E:/Dataset/Apps_for_Android_5.json")
请提前感谢错误帮助
错误
scala> val df = sqlcontext.read.json("E:/Dataset/Apps_for_Android_5.json")
[Stage 2:> (0 + 4) / 10]
17/01/22 08:15:09 ERROR Executor: Exception in task 2.0 in stage 2.0 (TID 14)
java.util.NoSuchElementException: None.get
at scala.None$.get(Option.scala:347)
at scala.None$.get(Option.scala:345)
at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
17/01/22 08:15:09 WARN TaskSetManager: Lost task 2.0 in stage 2.0 (TID 14, local
host): java.util.NoSuchElementException: None.get
at scala.None$.get(Option.scala:347)
at scala.None$.get(Option.scala:345)
at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
17/01/22 08:15:09 ERROR TaskSetManager: Task 2 in stage 2.0 failed 1 times; abor
ting job
17/01/22 08:15:09 ERROR Executor: Exception in task 1.0 in stage 2.0 (TID 13)
java.util.NoSuchElementException: None.get
at scala.None$.get(Option.scala:347)
at scala.None$.get(Option.scala:345)
at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
17/01/22 08:15:09 ERROR Executor: Exception in task 4.0 in stage 2.0 (TID 16)
org.apache.spark.TaskKilledException
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:264)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
17/01/22 08:15:09 ERROR Executor: Exception in task 0.0 in stage 2.0 (TID 12)
java.util.NoSuchElementException: None.get
at scala.None$.get(Option.scala:347)
at scala.None$.get(Option.scala:345)
at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
17/01/22 08:15:09 ERROR Executor: Exception in task 3.0 in stage 2.0 (TID 15)
java.util.NoSuchElementException: None.get
at scala.None$.get(Option.scala:347)
at scala.None$.get(Option.scala:345)
at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
17/01/22 08:15:09 WARN TaskSetManager: Lost task 4.0 in stage 2.0 (TID 16, local
host): org.apache.spark.TaskKilledException
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:264)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in sta
ge 2.0 failed 1 times, most recent failure: Lost task 2.0 in stage 2.0 (TID 14,
localhost): java.util.NoSuchElementException: None.get
at scala.None$.get(Option.scala:347)
at scala.None$.get(Option.scala:345)
at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGSched
uler$$failJobAndIndependentStages(DAGScheduler.scala:1454)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGSche
duler.scala:1442)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGSche
duler.scala:1441)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:
59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.appl
y(DAGScheduler.scala:811)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.appl
y(DAGScheduler.scala:811)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.sc
ala:811)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGSche
duler.scala:1667)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGSchedu
ler.scala:1622)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGSchedu
ler.scala:1611)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1873)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1936)
at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1065)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:1
51)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:1
12)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)
at org.apache.spark.rdd.RDD.fold(RDD.scala:1059)
at org.apache.spark.sql.execution.datasources.json.InferSchema$.infer(InferSch
ema.scala:68)
at org.apache.spark.sql.execution.datasources.json.JsonFileFormat.inferSchema(
JsonFileFormat.scala:62)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$15.apply(Dat
aSource.scala:421)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$15.apply(Dat
aSource.scala:421)
at scala.Option.orElse(Option.scala:289)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataS
ource.scala:420)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:149)
at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:294)
at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:249)
... 52 elided
Caused by: java.util.NoSuchElementException: None.get
at scala.None$.get(Option.scala:347)
at scala.None$.get(Option.scala:345)
at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(BlockInfoM
anager.scala:343)
at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockManager.s
cala:646)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
答案 0 :(得分:2)
看起来这是一个报告的Spark问题 - 目前还没有明确的隔离或解决方案: https://issues.apache.org/jira/browse/SPARK-16599
唯一建议的解决方法是降级到Spark 1.6.2。