Pyspark,它使用下面的代码将数据复制到S3并抛出
self.dataframe.coalesce(1).write.format(format).mode('overwrite').save(location)
例外
java.lang.OutOfMemoryError:无法创建新的本机线程
at java.lang.Thread.start(Thread.java:717)
at java.util.concurrent.ThreadPoolExecutor.addWorker(ThreadPoolExecutor.java:957)
at java.util.concurrent.ThreadPoolExecutor.execute(ThreadPoolExecutor.java:1367)
at java.util.concurrent.AbstractExecutorService.submit(AbstractExecutorService.java:134)
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.create(UploadMonitor.java:117)
at com.amazonaws.services.s3.transfer.TransferManager.doUpload(TransferManager.java:680)
at com.amazonaws.services.s3.transfer.TransferManager.upload(TransferManager.java:529)
at org.apache.hadoop.fs.s3a.S3AFileSystem.putObject(S3AFileSystem.java:1075)
at org.apache.hadoop.fs.s3a.S3AFileSystem.createEmptyObject(S3AFileSystem.java:1868)
at org.apache.hadoop.fs.s3a.S3AFileSystem.createFakeDirectory(S3AFileSystem.java:1848)
at org.apache.hadoop.fs.s3a.S3AFileSystem.createFakeDirectoryIfNecessary(S3AFileSystem.java:1353)
at org.apache.hadoop.fs.s3a.S3AFileSystem.innerDelete(S3AFileSystem.java:1317)
at org.apache.hadoop.fs.s3a.S3AFileSystem.delete(S3AFileSystem.java:1234)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.abortTask(FileOutputCommitter.java:611)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.abortTask(FileOutputCommitter.java:600)
at org.apache.hadoop.mapreduce.lib.output.DirectFileOutputCommitter.abortTask(DirectFileOutputCommitter.java:124)
at org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.abortTask(HadoopMapReduceCommitProtocol.scala:231)
at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.abort(FileFormatDataWriter.scala:85)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$1.apply$mcV$sp(FileFormatWriter.scala:247)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1403)
... 11 more