我有一个24x7的流媒体作业,在最初的44小时内运行良好,之后突然所有的执行程序都从集群中删除并发出错误
java.net.BindException: Cannot assign requested address
at sun.nio.ch.Net.connect0(Native Method)
at sun.nio.ch.Net.connect(Net.java:465)
at sun.nio.ch.Net.connect(Net.java:457)
at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:670)
at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:192)
at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3519)
at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:840)
at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:755)
at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:376)
at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:662)
at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:889)
at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:942)
at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:742)
at java.io.FilterInputStream.read(FilterInputStream.java:83)
at parquet.bytes.BytesUtils.readIntLittleEndian(BytesUtils.java:66)
at parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:419)
at parquet.hadoop.ParquetFileReader$2.call(ParquetFileReader.java:238)
at parquet.hadoop.ParquetFileReader$2.call(ParquetFileReader.java:234)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
[2017-01-18 03:11:15,943] WARN Failed to connect to /hostname:50010 for block, add to deadNodes and continue. java.net.BindException: Cannot assign requested address (org.apache.hadoop.hdfs.DFSClient)
java.net.BindException: Cannot assign requested address
at sun.nio.ch.Net.connect0(Native Method)
at sun.nio.ch.Net.connect(Net.java:465)
at sun.nio.ch.Net.connect(Net.java:457)
at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:670)
at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:192)
at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3519)
at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:840)
at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:755)
at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:376)
at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:662)
at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:889)
at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:942)
at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:742)
at java.io.FilterInputStream.read(FilterInputStream.java:83)
at parquet.bytes.BytesUtils.readIntLittleEndian(BytesUtils.java:66)
at parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:419)
at parquet.hadoop.ParquetFileReader$2.call(ParquetFileReader.java:238)
at parquet.hadoop.ParquetFileReader$2.call(ParquetFileReader.java:234)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
我在流媒体中进行了很多连接操作,而且我也可以看到,每次批处理都没有清除。此外,我没有在其中使用任何cache()
操作。我尝试设置spark.cleaner.ttl
,但没有看到imapact。我附上了spark ui的执行部分。
这种行为的原因是什么?我该怎么做才能克服这个问题?