从Spark流媒体作业编写JSON时,我会间歇性地获得以下异常。这项工作是写入Azure blob存储。 可能导致此错误的原因是什么?
写作代码是:
df.write.mode(SaveMode.Append).json(rawOutputFile)
调用堆栈:
org.apache.spark.SparkException: Task failed while writing rows
at org.apache.spark.sql.execution.datasources.DefaultWriterContainer.writeRows(WriterContainer.scala:261)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(InsertIntoHadoopFsRelationCommand.scala:143)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(InsertIntoHadoopFsRelationCommand.scala:143)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.apache.spark.scheduler.Task.run(Task.scala:86)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException
at com.microsoft.azure.storage.core.Utility.initIOException(Utility.java:733)
at com.microsoft.azure.storage.blob.BlobOutputStreamInternal.writeBlock(BlobOutputStreamInternal.java:443)
at com.microsoft.azure.storage.blob.BlobOutputStreamInternal.access$000(BlobOutputStreamInternal.java:52)
at com.microsoft.azure.storage.blob.BlobOutputStreamInternal$1.call(BlobOutputStreamInternal.java:387)
at com.microsoft.azure.storage.blob.BlobOutputStreamInternal$1.call(BlobOutputStreamInternal.java:384)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
... 3 more
Suppressed: java.lang.IllegalArgumentException: Self-suppression not permitted
at java.lang.Throwable.addSuppressed(Throwable.java:1043)
at java.io.FilterOutputStream.close(FilterOutputStream.java:159)
at org.apache.hadoop.fs.azure.NativeAzureFileSystem$NativeAzureFsOutputStream.close(NativeAzureFileSystem.java:949)
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat$LineRecordWriter.close(TextOutputFormat.java:111)
at org.apache.spark.sql.execution.datasources.json.JsonOutputWriter.close(JsonFileFormat.scala:199)
at org.apache.spark.sql.execution.datasources.DefaultWriterContainer.org$apache$spark$sql$execution$datasources$DefaultWriterContainer$$abortTask$1(WriterContainer.scala:282)
at org.apache.spark.sql.execution.datasources.DefaultWriterContainer$$anonfun$writeRows$2.apply$mcV$sp(WriterContainer.scala:258)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1354)
at org.apache.spark.sql.execution.datasources.DefaultWriterContainer.writeRows(WriterContainer.scala:258)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(InsertIntoHadoopFsRelationCommand.scala:143)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(InsertIntoHadoopFsRelationCommand.scala:143)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.apache.spark.scheduler.Task.run(Task.scala:86)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
... 3 more
[CIRCULAR REFERENCE:java.io.IOException]
Caused by: com.microsoft.azure.storage.StorageException: The server encountered an unknown failure:
at com.microsoft.azure.storage.StorageException.translateException(StorageException.java:101)
at com.microsoft.azure.storage.core.ExecutionEngine.executeWithRetry(ExecutionEngine.java:199)
at com.microsoft.azure.storage.blob.CloudBlockBlob.uploadBlockInternal(CloudBlockBlob.java:951)
at com.microsoft.azure.storage.blob.CloudBlockBlob.uploadBlock(CloudBlockBlob.java:923)
at com.microsoft.azure.storage.blob.BlobOutputStreamInternal.writeBlock(BlobOutputStreamInternal.java:437)
... 9 more
Caused by: java.io.IOException: Error writing to server
at sun.net.www.protocol.http.HttpURLConnection.writeRequests(HttpURLConnection.java:699)
at sun.net.www.protocol.http.HttpURLConnection.writeRequests(HttpURLConnection.java:711)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1567)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1474)
at java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:480)
at com.microsoft.azure.storage.core.ExecutionEngine.executeWithRetry(ExecutionEngine.java:119)
... 12 more