我收到了这个错误,我想这是关于内存限制的。您知道如何更改设置以避免此错误吗?
我正在运行一个包含10个节点的集群,每个节点61GB。这是我用来运行作业的命令:
spark-submit --deploy-mode client --executor-cores 5 --executor-memory 25g --driver-memory 40g adsquare-grid-join-postcode_lookup4-allfolder.py
日志中的错误:
ERROR TransportClient: Failed to send RPC 5779186781068965319 to /172.31.29.229:47384: java.nio.channels.ClosedChannelException
java.nio.channels.ClosedChannelException
at io.netty.channel.AbstractChannel$AbstractUnsafe.write(...)(Unknown Source)
17/04/30 11:52:33 WARN BlockManagerMaster: Failed to remove broadcast 15 with removeFromMaster = true - Failed to send RPC 5779186781068965319 to /172.31.29.229:47384: java.nio.channels.ClosedChannelException
java.io.IOException: Failed to send RPC 5779186781068965319 to /172.31.29.229:47384: java.nio.channels.ClosedChannelException
at org.apache.spark.network.client.TransportClient$3.operationComplete(TransportClient.java:249)
at org.apache.spark.network.client.TransportClient$3.operationComplete(TransportClient.java:233)
at io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:514)
at io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:488)
at io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:427)
at io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:129)
at io.netty.channel.AbstractChannel$AbstractUnsafe.safeSetFailure(AbstractChannel.java:852)
at io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:738)
at io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1251)
at io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:743)
at io.netty.channel.AbstractChannelHandlerContext.invokeWrite(AbstractChannelHandlerContext.java:735)
at io.netty.channel.AbstractChannelHandlerContext.access$1900(AbstractChannelHandlerContext.java:36)
at io.netty.channel.AbstractChannelHandlerContext$AbstractWriteTask.write(AbstractChannelHandlerContext.java:1072)
at io.netty.channel.AbstractChannelHandlerContext$WriteAndFlushTask.write(AbstractChannelHandlerContext.java:1126)
at io.netty.channel.AbstractChannelHandlerContext$AbstractWriteTask.run(AbstractChannelHandlerContext.java:1061)
at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:408)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:455)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:140)
at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.nio.channels.ClosedChannelException
at io.netty.channel.AbstractChannel$AbstractUnsafe.write(...)(Unknown Source)
17/04/30 11:52:33 ERROR ContextCleaner: Error cleaning broadcast 15
org.apache.spark.SparkException: Exception thrown in awaitResult
at org.apache.spark.rpc.RpcTimeout$$anonfun$1.applyOrElse(RpcTimeout.scala:77)
at org.apache.spark.rpc.RpcTimeout$$anonfun$1.applyOrElse(RpcTimeout.scala:75)
at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36)
at org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:59)
at org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:59)
at scala.PartialFunction$OrElse.apply(PartialFunction.scala:167)
at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:83)
at org.apache.spark.storage.BlockManagerMaster.removeBroadcast(BlockManagerMaster.scala:151)
at org.apache.spark.broadcast.TorrentBroadcast$.unpersist(TorrentBroadcast.scala:299)
at org.apache.spark.broadcast.TorrentBroadcastFactory.unbroadcast(TorrentBroadcastFactory.scala:45)
at org.apache.spark.broadcast.BroadcastManager.unbroadcast(BroadcastManager.scala:60)
at org.apache.spark.ContextCleaner.doCleanupBroadcast(ContextCleaner.scala:232)
at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1$$anonfun$apply$mcV$sp$2.apply(ContextCleaner.scala:188)
at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1$$anonfun$apply$mcV$sp$2.apply(ContextCleaner.scala:179)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:179)
at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1245)
at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:172)
at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:67)
Caused by: java.io.IOException: Failed to send RPC 5779186781068965319 to /172.31.29.229:47384: java.nio.channels.ClosedChannelException
at org.apache.spark.network.client.TransportClient$3.operationComplete(TransportClient.java:249)
at org.apache.spark.network.client.TransportClient$3.operationComplete(TransportClient.java:233)
at io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:514)
at io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:488)
at io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:427)
at io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:129)
at io.netty.channel.AbstractChannel$AbstractUnsafe.safeSetFailure(AbstractChannel.java:852)
at io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:738)
at io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1251)
at io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:743)
at io.netty.channel.AbstractChannelHandlerContext.invokeWrite(AbstractChannelHandlerContext.java:735)
at io.netty.channel.AbstractChannelHandlerContext.access$1900(AbstractChannelHandlerContext.java:36)
at io.netty.channel.AbstractChannelHandlerContext$AbstractWriteTask.write(AbstractChannelHandlerContext.java:1072)
at io.netty.channel.AbstractChannelHandlerContext$WriteAndFlushTask.write(AbstractChannelHandlerContext.java:1126)
at io.netty.channel.AbstractChannelHandlerContext$AbstractWriteTask.run(AbstractChannelHandlerContext.java:1061)
at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:408)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:455)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:140)
at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.nio.channels.ClosedChannelException
at io.netty.channel.AbstractChannel$AbstractUnsafe.write(...)(Unknown Source)