我正在尝试通过将云数据融合成大文件来加载xml文件 查询数据接收器。我无法加载此特定的xml。
> org.apache.spark.SparkException: Job aborted due to stage failure: > Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 > in stage 0.0 (TID 3, > cdap-promopocp-c439e4ad-06e4-11ea-9714-ded3d232b4e4-w-1.c.p-asna-analytics-002.internal, > executor 2): org.apache.spark.SparkException: Task failed while > writing rows at > org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:155) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) > at org.apache.spark.scheduler.Task.run(Task.scala:109) at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) Caused by: > java.lang.NullPointerException at > io.cdap.plugin.gcp.bigquery.sink.AvroRecordWriter.close(AvroRecordWriter.java:104) > at > org.apache.spark.internal.io.HadoopMapReduceWriteConfigUtil.closeWriter(SparkHadoopWriter.scala:361) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:137) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:127) > at > org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1415) > at > org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:139) > ... 8 more Suppressed: java.lang.NullPointerException at > io.cdap.plugin.gcp.bigquery.sink.AvroRecordWriter.close(AvroRecordWriter.java:104) > at > org.apache.spark.internal.io.HadoopMapReduceWriteConfigUtil.closeWriter(SparkHadoopWriter.scala:361) > at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$1.apply$mcV$sp(SparkHadoopWriter.scala:142) > at > org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1424) > ... 9 more > > Driver stacktrace: at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1661) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1649) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1648) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > ~[scala-library-2.11.8.jar:na] at > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > ~[scala-library-2.11.8.jar:na] at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1648) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > scala.Option.foreach(Option.scala:257) ~[scala-library-2.11.8.jar:na] > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1882) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1831) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1820) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > ~[spark-core_2.11-2.3.3.jar:na] at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.SparkContext.runJob(SparkContext.scala:2034) > ~[na:2.3.3] at > org.apache.spark.SparkContext.runJob(SparkContext.scala:2055) > ~[na:2.3.3] at > org.apache.spark.SparkContext.runJob(SparkContext.scala:2087) > ~[na:2.3.3] at > org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:78) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1083) > [spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1081) > [spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1081) > [spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > [spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > [spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.rdd.RDD.withScope(RDD.scala:363) > [spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1081) > [spark-core_2.11-2.3.3.jar:2.3.3] at > io.cdap.cdap.app.runtime.spark.DefaultSparkExecutionContext.saveAsNewAPIHadoopDataset(DefaultSparkExecutionContext.scala:44) > [na:na] at > io.cdap.cdap.app.runtime.spark.AbstractSparkExecutionContext$$anonfun$saveAsDataset$1.apply(AbstractSparkExecutionContext.scala:313) > [na:na] at > io.cdap.cdap.app.runtime.spark.AbstractSparkExecutionContext$$anonfun$saveAsDataset$1.apply(AbstractSparkExecutionContext.scala:304) > [na:na] at > io.cdap.cdap.app.runtime.spark.AbstractSparkExecutionContext$$anon$7.run(AbstractSparkExecutionContext.scala:451) > [na:na] at > io.cdap.cdap.app.runtime.spark.SparkTransactional.execute(SparkTransactional.java:208) > [na:na] at > io.cdap.cdap.app.runtime.spark.AbstractSparkExecutionContext.saveAsDataset(AbstractSparkExecutionContext.scala:442) > [na:na] at > io.cdap.cdap.app.runtime.spark.AbstractSparkExecutionContext.saveAsDataset(AbstractSparkExecutionContext.scala:304) > [na:na] at > io.cdap.cdap.app.runtime.spark.SerializableSparkExecutionContext.saveAsDataset(SerializableSparkExecutionContext.scala:68) > [na:na] at > io.cdap.cdap.app.runtime.spark.DefaultJavaSparkExecutionContext.saveAsDataset(DefaultJavaSparkExecutionContext.scala:125) > [na:na] at > io.cdap.cdap.app.runtime.spark.DefaultJavaSparkExecutionContext.saveAsDataset(DefaultJavaSparkExecutionContext.scala:117) > [na:na] at > io.cdap.cdap.etl.spark.batch.SparkBatchSinkFactory.writeFromRDD(SparkBatchSinkFactory.java:103) > [hydrator-spark-core2_2.11-6.1.0.jar:na] at > io.cdap.cdap.etl.spark.batch.RDDCollection$1.run(RDDCollection.java:179) > [hydrator-spark-core2_2.11-6.1.0.jar:na] at > io.cdap.cdap.etl.spark.SparkPipelineRunner.runPipeline(SparkPipelineRunner.java:350) > [hydrator-spark-core2_2.11-6.1.0.jar:na] at > io.cdap.cdap.etl.spark.batch.BatchSparkPipelineDriver.run(BatchSparkPipelineDriver.java:151) > [hydrator-spark-core2_2.11-6.1.0.jar:na] at > io.cdap.cdap.app.runtime.spark.SparkTransactional$2.run(SparkTransactional.java:236) > [na:na] at > io.cdap.cdap.app.runtime.spark.SparkTransactional.execute(SparkTransactional.java:208) > [na:na] at > io.cdap.cdap.app.runtime.spark.SparkTransactional.execute(SparkTransactional.java:138) > [na:na] at > io.cdap.cdap.app.runtime.spark.AbstractSparkExecutionContext.execute(AbstractSparkExecutionContext.scala:229) > [na:na] at > io.cdap.cdap.app.runtime.spark.SerializableSparkExecutionContext.execute(SerializableSparkExecutionContext.scala:60) > [na:na] at > io.cdap.cdap.app.runtime.spark.DefaultJavaSparkExecutionContext.execute(DefaultJavaSparkExecutionContext.scala:88) > [na:na] at > io.cdap.cdap.api.Transactionals.execute(Transactionals.java:63) > [na:na] at > io.cdap.cdap.etl.spark.batch.BatchSparkPipelineDriver.run(BatchSparkPipelineDriver.java:118) > [hydrator-spark-core2_2.11-6.1.0.jar:na] at > io.cdap.cdap.app.runtime.spark.SparkMainWrapper$.main(SparkMainWrapper.scala:86) > [na:na] at > io.cdap.cdap.app.runtime.spark.SparkMainWrapper.main(SparkMainWrapper.scala) > [na:na] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native > Method) ~[na:1.8.0_222] at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > ~[na:1.8.0_222] at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > ~[na:1.8.0_222] at java.lang.reflect.Method.invoke(Method.java:498) > ~[na:1.8.0_222] at > org.apache.spark.deploy.yarn.ApplicationMaster$$anon$4.run(ApplicationMaster.scala:721) > [spark-yarn_2.11-2.3.3.jar:2.3.3] Caused by: > org.apache.spark.SparkException: Task failed while writing rows at > org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:155) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.scheduler.Task.run(Task.scala:109) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[na:1.8.0_222] at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[na:1.8.0_222] at java.lang.Thread.run(Thread.java:748) > ~[na:1.8.0_222] Caused by: java.lang.NullPointerException: null at > io.cdap.plugin.gcp.bigquery.sink.AvroRecordWriter.close(AvroRecordWriter.java:104) > ~[1573739273851-0/:na] at > org.apache.spark.internal.io.HadoopMapReduceWriteConfigUtil.closeWriter(SparkHadoopWriter.scala:361) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:137) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:127) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1415) > ~[spark-core_2.11-2.3.3.jar:na] at > org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:139) > ~[spark-core_2.11-2.3.3.jar:2.3.3] ... 8 common frames omitted > Suppressed: java.lang.NullPointerException: null at > io.cdap.plugin.gcp.bigquery.sink.AvroRecordWriter.close(AvroRecordWriter.java:104) > ~[1573739273851-0/:na] at > org.apache.spark.internal.io.HadoopMapReduceWriteConfigUtil.closeWriter(SparkHadoopWriter.scala:361) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$1.apply$mcV$sp(SparkHadoopWriter.scala:142) > ~[spark-core_2.11-2.3.3.jar:2.3.3] at > org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1424) > ~[spark-core_2.11-2.3.3.jar:na] ... 9 common frames omitted