java.lang.ArrayIndexOutOfBoundsException
com.ning.compress.lzf.impl.UnsafeChunkDecoder.copyOverlappingLong
处的任务失败
并且在重复任务失败后终止了整个工作。
几天前没有任何问题。
目前我们怀疑集群出现故障,因为从昨天起Ambari(Hortonworks Hadoop集群管理器)报告了损坏的块,节点问题等。
即使在完成集群重新安装后,问题仍然存在。 (我们的集群很小 - 只有3个数据节点)所以,这可能是硬件问题。
这可能是网络问题吗?还是磁盘?
详细日志如下:
14/07/10 16:33:58 INFO Main: Initializing context
--args is deprecated. Use --arg instead.
14/07/10 16:34:04 INFO Main: Processing
14/07/10 16:37:32 ERROR YarnClientClusterScheduler: Lost executor 4 on cluster04: remote Akka client disassociated
14/07/10 16:37:32 WARN TaskSetManager: Lost TID 304 (task 1.0:304)
14/07/10 16:37:32 WARN TaskSetManager: Lost TID 303 (task 1.0:303)
14/07/10 16:52:44 WARN TaskSetManager: Lost TID 1614 (task 0.0:359)
14/07/10 16:52:44 WARN TaskSetManager: Loss was due to java.lang.ArrayIndexOutOfBoundsException
java.lang.ArrayIndexOutOfBoundsException: 65535
at com.ning.compress.lzf.impl.UnsafeChunkDecoder.copyOverlappingLong(UnsafeChunkDecoder.java:221)
at com.ning.compress.lzf.impl.UnsafeChunkDecoder.decodeChunk(UnsafeChunkDecoder.java:117)
at com.ning.compress.lzf.impl.UnsafeChunkDecoder.decodeChunk(UnsafeChunkDecoder.java:66)
at com.ning.compress.lzf.LZFInputStream.readyBuffer(LZFInputStream.java:339)
at com.ning.compress.lzf.LZFInputStream.read(LZFInputStream.java:169)
at java.io.ObjectInputStream$PeekInputStream.read(ObjectInputStream.java:2310)
at java.io.ObjectInputStream$PeekInputStream.readFully(ObjectInputStream.java:2323)
at java.io.ObjectInputStream$BlockDataInputStream.readUTFBody(ObjectInputStream.java:3063)
at java.io.ObjectInputStream$BlockDataInputStream.readUTF(ObjectInputStream.java:2864)
at java.io.ObjectInputStream.readString(ObjectInputStream.java:1638)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1341)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:63)
at org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:125)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
at org.apache.spark.Aggregator.combineValuesByKey(Aggregator.scala:58)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$combineByKey$4.apply(PairRDDFunctions.scala:107)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$combineByKey$4.apply(PairRDDFunctions.scala:106)
at org.apache.spark.rdd.RDD$$anonfun$14.apply(RDD.scala:582)
at org.apache.spark.rdd.RDD$$anonfun$14.apply(RDD.scala:582)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111)
at org.apache.spark.scheduler.Task.run(Task.scala:51)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
14/07/10 16:52:45 WARN TaskSetManager: Lost TID 1617 (task 0.0:359)
14/07/10 16:52:54 WARN TaskSetManager: Lost TID 1620 (task 0.0:359)
14/07/10 16:52:54 WARN TaskSetManager: Loss was due to java.lang.ArrayIndexOutOfBoundsException
java.lang.ArrayIndexOutOfBoundsException
14/07/10 16:52:56 WARN TaskSetManager: Lost TID 1622 (task 0.0:359)
14/07/10 16:52:56 WARN TaskSetManager: Loss was due to java.lang.ArrayIndexOutOfBoundsException
java.lang.ArrayIndexOutOfBoundsException: 65535
at com.ning.compress.lzf.impl.UnsafeChunkDecoder.copyOverlappingLong(UnsafeChunkDecoder.java:221)
at com.ning.compress.lzf.impl.UnsafeChunkDecoder.decodeChunk(UnsafeChunkDecoder.java:117)
at com.ning.compress.lzf.impl.UnsafeChunkDecoder.decodeChunk(UnsafeChunkDecoder.java:66)
at com.ning.compress.lzf.LZFInputStream.readyBuffer(LZFInputStream.java:339)
at com.ning.compress.lzf.LZFInputStream.read(LZFInputStream.java:169)
at java.io.ObjectInputStream$PeekInputStream.read(ObjectInputStream.java:2310)
at java.io.ObjectInputStream$PeekInputStream.readFully(ObjectInputStream.java:2323)
at java.io.ObjectInputStream$BlockDataInputStream.readUTFBody(ObjectInputStream.java:3063)
at java.io.ObjectInputStream$BlockDataInputStream.readUTF(ObjectInputStream.java:2864)
at java.io.ObjectInputStream.readString(ObjectInputStream.java:1638)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1341)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:63)
at org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:125)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
at org.apache.spark.Aggregator.combineValuesByKey(Aggregator.scala:58)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$combineByKey$4.apply(PairRDDFunctions.scala:107)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$combineByKey$4.apply(PairRDDFunctions.scala:106)
at org.apache.spark.rdd.RDD$$anonfun$14.apply(RDD.scala:582)
at org.apache.spark.rdd.RDD$$anonfun$14.apply(RDD.scala:582)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111)
at org.apache.spark.scheduler.Task.run(Task.scala:51)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
14/07/10 16:52:56 ERROR TaskSetManager: Task 0.0:359 failed 4 times; aborting job
14/07/10 16:52:56 WARN TaskSetManager: Task 1599 was killed.
14/07/10 16:52:58 INFO Main: Clearing intermediate directory
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0:359 failed 4 times, most recent failure: Exception failure in TID 1622 on host cluster02: java.lang.ArrayIndexOutOfBoundsException: 65535
com.ning.compress.lzf.impl.UnsafeChunkDecoder.copyOverlappingLong(UnsafeChunkDecoder.java:221)
com.ning.compress.lzf.impl.UnsafeChunkDecoder.decodeChunk(UnsafeChunkDecoder.java:117)
com.ning.compress.lzf.impl.UnsafeChunkDecoder.decodeChunk(UnsafeChunkDecoder.java:66)
com.ning.compress.lzf.LZFInputStream.readyBuffer(LZFInputStream.java:339)
com.ning.compress.lzf.LZFInputStream.read(LZFInputStream.java:169)
java.io.ObjectInputStream$PeekInputStream.read(ObjectInputStream.java:2310)
java.io.ObjectInputStream$PeekInputStream.readFully(ObjectInputStream.java:2323)
java.io.ObjectInputStream$BlockDataInputStream.readUTFBody(ObjectInputStream.java:3063)
java.io.ObjectInputStream$BlockDataInputStream.readUTF(ObjectInputStream.java:2864)
java.io.ObjectInputStream.readString(ObjectInputStream.java:1638)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1341)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:63)
org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:125)
org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30)
org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
org.apache.spark.Aggregator.combineValuesByKey(Aggregator.scala:58)
org.apache.spark.rdd.PairRDDFunctions$$anonfun$combineByKey$4.apply(PairRDDFunctions.scala:107)
org.apache.spark.rdd.PairRDDFunctions$$anonfun$combineByKey$4.apply(PairRDDFunctions.scala:106)
org.apache.spark.rdd.RDD$$anonfun$14.apply(RDD.scala:582)
org.apache.spark.rdd.RDD$$anonfun$14.apply(RDD.scala:582)
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111)
org.apache.spark.scheduler.Task.run(Task.scala:51)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
java.lang.Thread.run(Thread.java:744)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1033)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1017)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1015)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1015)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:633)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:633)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:633)
at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1207)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
at akka.actor.ActorCell.invoke(ActorCell.scala:456)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Process finished with exit code 1
答案 0 :(得分:0)
当我更换可疑数据节点并重新安装群集时,问题就消失了。
我怀疑该数据节点的磁盘出现故障,因此从该磁盘读取的数据已损坏,解码器无法处理无效的输入数据。