我试图使用spark streaming和kafka,但是当它收到消息时会触发错误。当它没有收到消息时没有错误。我无法找出错误原因。请在下面找到详细信息:
爪哇
package org.cosmob.ccc.test;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import org.cosmob.ccc.spark.SparkConnect;
import org.apache.spark.streaming.Duration;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
import java.util.regex.Pattern;
import com.google.common.collect.Lists;
public class Test implements Serializable{
private static final long serialVersionUID = -6483717103605642290L;
private SparkConnect sparkConnect;
private JavaStreamingContext jssc;
private static final Pattern SPACE = Pattern.compile(" ");
public Test() {
sparkConnect = new SparkConnect();
jssc = new JavaStreamingContext(sparkConnect.getCtx(), new Duration(1000));
}
public JavaStreamingContext getStreamContext(){
return jssc;
}
public static void main(String[] args) {
Test tt = new Test();
String zkQuorum="172.16.201.173";
String topic = "test";
String group = "me";
int numThreads = 1;
Map<String, Integer> topicMap = new HashMap<String, Integer>();
topicMap.put(topic, numThreads);
JavaPairReceiverInputDStream<String, String> messages =
KafkaUtils.createStream(tt.getStreamContext(), zkQuorum, group, topicMap);
JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
private static final long serialVersionUID = 1L;
@Override
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
lines.print();
tt.getStreamContext().start();
tt.getStreamContext().awaitTermination();
}
}
错误
15/11/05 15:04:02 INFO scheduler.ReceiverTracker: Stream 0 received 0 blocks
-------------------------------------------
Time: 1446725042000 ms
-------------------------------------------
15/11/05 15:04:02 INFO scheduler.JobScheduler: Added jobs for time 1446725042000 ms
15/11/05 15:04:02 INFO scheduler.JobScheduler: Starting job streaming job 1446725042000 ms.0 from job set of time 1446725042000 ms
15/11/05 15:04:02 INFO scheduler.JobScheduler: Finished job streaming job 1446725042000 ms.0 from job set of time 1446725042000 ms
15/11/05 15:04:02 INFO scheduler.JobScheduler: Total delay: 0.006 s for time 1446725042000 ms (execution: 0.000 s)
15/11/05 15:04:02 INFO rdd.MappedRDD: Removing RDD 23 from persistence list
15/11/05 15:04:02 INFO storage.BlockManager: Removing RDD 23
15/11/05 15:04:02 INFO rdd.BlockRDD: Removing RDD 22 from persistence list
15/11/05 15:04:02 INFO storage.BlockManager: Removing RDD 22
15/11/05 15:04:02 INFO kafka.KafkaInputDStream: Removing blocks of RDD BlockRDD[22] at BlockRDD at ReceiverInputDStream.scala:69 of time 1446725042000 ms
15/11/05 15:04:03 INFO scheduler.ReceiverTracker: Stream 0 received 0 blocks
-------------------------------------------
Time: 1446725043000 ms
-------------------------------------------
15/11/05 15:04:03 INFO scheduler.JobScheduler: Added jobs for time 1446725043000 ms
15/11/05 15:04:03 INFO scheduler.JobScheduler: Starting job streaming job 1446725043000 ms.0 from job set of time 1446725043000 ms
15/11/05 15:04:03 INFO scheduler.JobScheduler: Finished job streaming job 1446725043000 ms.0 from job set of time 1446725043000 ms
15/11/05 15:04:03 INFO scheduler.JobScheduler: Total delay: 0.004 s for time 1446725043000 ms (execution: 0.000 s)
15/11/05 15:04:03 INFO rdd.MappedRDD: Removing RDD 25 from persistence list
15/11/05 15:04:03 INFO storage.BlockManager: Removing RDD 25
15/11/05 15:04:03 INFO rdd.BlockRDD: Removing RDD 24 from persistence list
15/11/05 15:04:03 INFO storage.BlockManager: Removing RDD 24
15/11/05 15:04:03 INFO kafka.KafkaInputDStream: Removing blocks of RDD BlockRDD[24] at BlockRDD at ReceiverInputDStream.scala:69 of time 1446725043000 ms
15/11/05 15:04:03 INFO storage.BlockManagerInfo: Added input-0-1446725042800 in memory on qmic-tag-s05.quwic.org:53916 (size: 77.0 B, free: 258.8 MB)
15/11/05 15:04:03 INFO storage.MemoryStore: ensureFreeSpace(77) called with curMem=13135, maxMem=996566630
15/11/05 15:04:03 INFO storage.MemoryStore: Block input-0-1446725042800 stored as bytes in memory (estimated size 77.0 B, free 950.4 MB)
15/11/05 15:04:03 INFO storage.BlockManagerInfo: Added input-0-1446725042800 in memory on wassimd.QUWIC.LOCAL:62993 (size: 77.0 B, free: 950.4 MB)
15/11/05 15:04:03 INFO storage.BlockManagerMaster: Updated info of block input-0-1446725042800
15/11/05 15:04:04 INFO scheduler.ReceiverTracker: Stream 0 received 1 blocks
15/11/05 15:04:04 INFO scheduler.JobScheduler: Added jobs for time 1446725044000 ms
15/11/05 15:04:04 INFO scheduler.JobScheduler: Starting job streaming job 1446725044000 ms.0 from job set of time 1446725044000 ms
15/11/05 15:04:04 INFO spark.SparkContext: Starting job: take at DStream.scala:608
15/11/05 15:04:04 INFO scheduler.DAGScheduler: Got job 3 (take at DStream.scala:608) with 1 output partitions (allowLocal=true)
15/11/05 15:04:04 INFO scheduler.DAGScheduler: Final stage: Stage 4(take at DStream.scala:608)
15/11/05 15:04:04 INFO scheduler.DAGScheduler: Parents of final stage: List()
15/11/05 15:04:04 INFO scheduler.DAGScheduler: Missing parents: List()
15/11/05 15:04:04 INFO scheduler.DAGScheduler: Submitting Stage 4 (MappedRDD[29] at map at MappedDStream.scala:35), which has no missing parents
15/11/05 15:04:04 INFO storage.MemoryStore: ensureFreeSpace(1872) called with curMem=13212, maxMem=996566630
15/11/05 15:04:04 INFO storage.MemoryStore: Block broadcast_4 stored as values in memory (estimated size 1872.0 B, free 950.4 MB)
15/11/05 15:04:04 INFO storage.MemoryStore: ensureFreeSpace(1260) called with curMem=15084, maxMem=996566630
15/11/05 15:04:04 INFO storage.MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 1260.0 B, free 950.4 MB)
15/11/05 15:04:04 INFO storage.BlockManagerInfo: Added broadcast_4_piece0 in memory on wassimd.QUWIC.LOCAL:62993 (size: 1260.0 B, free: 950.4 MB)
15/11/05 15:04:04 INFO storage.BlockManagerMaster: Updated info of block broadcast_4_piece0
15/11/05 15:04:04 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from Stage 4 (MappedRDD[29] at map at MappedDStream.scala:35)
15/11/05 15:04:04 INFO scheduler.TaskSchedulerImpl: Adding task set 4.0 with 1 tasks
15/11/05 15:04:04 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 4.0 (TID 75, qmic-tag-s05.quwic.org, NODE_LOCAL, 1071 bytes)
15/11/05 15:04:04 INFO storage.BlockManagerInfo: Added input-0-1446725043800 in memory on qmic-tag-s05.quwic.org:53916 (size: 78.0 B, free: 258.8 MB)
15/11/05 15:04:04 INFO storage.MemoryStore: ensureFreeSpace(78) called with curMem=16344, maxMem=996566630
15/11/05 15:04:04 INFO storage.MemoryStore: Block input-0-1446725043800 stored as bytes in memory (estimated size 78.0 B, free 950.4 MB)
15/11/05 15:04:04 INFO storage.BlockManagerInfo: Added input-0-1446725043800 in memory on wassimd.QUWIC.LOCAL:62993 (size: 78.0 B, free: 950.4 MB)
15/11/05 15:04:04 INFO storage.BlockManagerMaster: Updated info of block input-0-1446725043800
15/11/05 15:04:04 INFO storage.BlockManagerInfo: Added broadcast_4_piece0 in memory on qmic-tag-s05.quwic.org:53916 (size: 1260.0 B, free: 258.8 MB)
15/11/05 15:04:04 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 4.0 (TID 75, qmic-tag-s05.quwic.org): java.lang.ClassNotFoundException: org.cosmob.ccc.test.Test$1
java.net.URLClassLoader$1.run(URLClassLoader.java:372)
java.net.URLClassLoader$1.run(URLClassLoader.java:361)
java.security.AccessController.doPrivileged(Native Method)
java.net.URLClassLoader.findClass(URLClassLoader.java:360)
java.lang.ClassLoader.loadClass(ClassLoader.java:424)
java.lang.ClassLoader.loadClass(ClassLoader.java:357)
java.lang.Class.forName0(Native Method)
java.lang.Class.forName(Class.java:344)
org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:59)
java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613)
java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:62)
org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:87)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:57)
org.apache.spark.scheduler.Task.run(Task.scala:54)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
java.lang.Thread.run(Thread.java:745)
15/11/05 15:04:04 INFO scheduler.TaskSetManager: Starting task 0.1 in stage 4.0 (TID 76, qmic-tag-s05.quwic.org, NODE_LOCAL, 1071 bytes)
15/11/05 15:04:04 INFO scheduler.TaskSetManager: Lost task 0.1 in stage 4.0 (TID 76) on executor qmic-tag-s05.quwic.org: java.lang.ClassNotFoundException (org.cosmob.ccc.test.Test$1) [duplicate 1]
15/11/05 15:04:04 INFO scheduler.TaskSetManager: Starting task 0.2 in stage 4.0 (TID 77, qmic-tag-s05.quwic.org, NODE_LOCAL, 1071 bytes)
15/11/05 15:04:04 INFO scheduler.TaskSetManager: Lost task 0.2 in stage 4.0 (TID 77) on executor qmic-tag-s05.quwic.org: java.lang.ClassNotFoundException (org.cosmob.ccc.test.Test$1) [duplicate 2]
15/11/05 15:04:04 INFO scheduler.TaskSetManager: Starting task 0.3 in stage 4.0 (TID 78, qmic-tag-s05.quwic.org, NODE_LOCAL, 1071 bytes)
15/11/05 15:04:04 INFO scheduler.TaskSetManager: Lost task 0.3 in stage 4.0 (TID 78) on executor qmic-tag-s05.quwic.org: java.lang.ClassNotFoundException (org.cosmob.ccc.test.Test$1) [duplicate 3]
15/11/05 15:04:04 ERROR scheduler.TaskSetManager: Task 0 in stage 4.0 failed 4 times; aborting job
15/11/05 15:04:04 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool
15/11/05 15:04:04 INFO scheduler.TaskSchedulerImpl: Cancelling stage 4
15/11/05 15:04:04 INFO scheduler.DAGScheduler: Failed to run take at DStream.scala:608
15/11/05 15:04:04 ERROR scheduler.JobScheduler: Error running job streaming job 1446725044000 ms.0
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 4.0 failed 4 times, most recent failure: Lost task 0.3 in stage 4.0 (TID 78, qmic-tag-s05.quwic.org): java.lang.ClassNotFoundException: org.cosmob.ccc.test.Test$1
java.net.URLClassLoader$1.run(URLClassLoader.java:372)
java.net.URLClassLoader$1.run(URLClassLoader.java:361)
java.security.AccessController.doPrivileged(Native Method)
java.net.URLClassLoader.findClass(URLClassLoader.java:360)
java.lang.ClassLoader.loadClass(ClassLoader.java:424)
java.lang.ClassLoader.loadClass(ClassLoader.java:357)
java.lang.Class.forName0(Native Method)
java.lang.Class.forName(Class.java:344)
org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:59)
java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613)
java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:62)
org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:87)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:57)
org.apache.spark.scheduler.Task.run(Task.scala:54)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1185)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1174)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1173)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1173)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:688)
at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1391)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
at akka.actor.ActorCell.invoke(ActorCell.scala:456)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
15/11/05 15:04:05 INFO scheduler.ReceiverTracker: Stream 0 received 1 blocks
15/11/05 15:04:05 INFO scheduler.JobScheduler: Added jobs for time 1446725045000 ms
15/11/05 15:04:05 INFO scheduler.JobScheduler: Starting job streaming job 1446725045000 ms.0 from job set of time 1446725045000 ms
15/11/05 15:04:05 INFO spark.SparkContext: Starting job: take at DStream.scala:608
15/11/05 15:04:05 INFO scheduler.DAGScheduler: Got job 4 (take at DStream.scala:608) with 1 output partitions (allowLocal=true)
15/11/05 15:04:05 INFO scheduler.DAGScheduler: Final stage: Stage 5(take at DStream.scala:608)
15/11/05 15:04:05 INFO scheduler.DAGScheduler: Parents of final stage: List()
15/11/05 15:04:05 INFO scheduler.DAGScheduler: Missing parents: List()
15/11/05 15:04:05 INFO scheduler.DAGScheduler: Submitting Stage 5 (MappedRDD[31] at map at MappedDStream.scala:35), which has no missing parents
15/11/05 15:04:05 INFO storage.MemoryStore: ensureFreeSpace(1872) called with curMem=16422, maxMem=996566630
15/11/05 15:04:05 INFO storage.MemoryStore: Block broadcast_5 stored as values in memory (estimated size 1872.0 B, free 950.4 MB)
15/11/05 15:04:05 INFO storage.MemoryStore: ensureFreeSpace(1260) called with curMem=18294, maxMem=996566630
15/11/05 15:04:05 INFO storage.MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 1260.0 B, free 950.4 MB)
15/11/05 15:04:05 INFO storage.BlockManagerInfo: Added broadcast_5_piece0 in memory on wassimd.QUWIC.LOCAL:62993 (size: 1260.0 B, free: 950.4 MB)
15/11/05 15:04:05 INFO storage.BlockManagerMaster: Updated info of block broadcast_5_piece0
15/11/05 15:04:05 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from Stage 5 (MappedRDD[31] at map at MappedDStream.scala:35)
15/11/05 15:04:05 INFO scheduler.TaskSchedulerImpl: Adding task set 5.0 with 1 tasks
15/11/05 15:04:05 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 5.0 (TID 79, qmic-tag-s05.quwic.org, NODE_LOCAL, 1071 bytes)
15/11/05 15:04:05 INFO storage.BlockManagerInfo: Added broadcast_5_piece0 in memory on qmic-tag-s05.quwic.org:53916 (size: 1260.0 B, free: 258.8 MB)
15/11/05 15:04:05 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 5.0 (TID 79, qmic-tag-s05.quwic.org): java.lang.ClassNotFoundException: org.cosmob.ccc.test.Test$1
java.net.URLClassLoader$1.run(URLClassLoader.java:372)
java.net.URLClassLoader$1.run(URLClassLoader.java:361)
java.security.AccessController.doPrivileged(Native Method)
java.net.URLClassLoader.findClass(URLClassLoader.java:360)
java.lang.ClassLoader.loadClass(ClassLoader.java:424)
java.lang.ClassLoader.loadClass(ClassLoader.java:357)
java.lang.Class.forName0(Native Method)
java.lang.Class.forName(Class.java:344)
org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:59)
java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613)
java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:62)
org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:87)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:57)
org.apache.spark.scheduler.Task.run(Task.scala:54)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
java.lang.Thread.run(Thread.java:745)
15/11/05 15:04:05 INFO scheduler.TaskSetManager: Starting task 0.1 in stage 5.0 (TID 80, qmic-tag-s05.quwic.org, NODE_LOCAL, 1071 bytes)
15/11/05 15:04:05 INFO scheduler.TaskSetManager: Lost task 0.1 in stage 5.0 (TID 80) on executor qmic-tag-s05.quwic.org: java.lang.ClassNotFoundException (org.cosmob.ccc.test.Test$1) [duplicate 1]
15/11/05 15:04:05 INFO scheduler.TaskSetManager: Starting task 0.2 in stage 5.0 (TID 81, qmic-tag-s05.quwic.org, NODE_LOCAL, 1071 bytes)
15/11/05 15:04:05 INFO scheduler.TaskSetManager: Lost task 0.2 in stage 5.0 (TID 81) on executor qmic-tag-s05.quwic.org: java.lang.ClassNotFoundException (org.cosmob.ccc.test.Test$1) [duplicate 2]
15/11/05 15:04:05 INFO scheduler.TaskSetManager: Starting task 0.3 in stage 5.0 (TID 82, qmic-tag-s05.quwic.org, NODE_LOCAL, 1071 bytes)
15/11/05 15:04:05 INFO scheduler.TaskSetManager: Lost task 0.3 in stage 5.0 (TID 82) on executor qmic-tag-s05.quwic.org: java.lang.ClassNotFoundException (org.cosmob.ccc.test.Test$1) [duplicate 3]
15/11/05 15:04:05 ERROR scheduler.TaskSetManager: Task 0 in stage 5.0 failed 4 times; aborting job
15/11/05 15:04:05 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 5.0, whose tasks have all completed, from pool
15/11/05 15:04:05 INFO scheduler.TaskSchedulerImpl: Cancelling stage 5
15/11/05 15:04:05 INFO scheduler.DAGScheduler: Failed to run take at DStream.scala:608
15/11/05 15:04:05 ERROR scheduler.JobScheduler: Error running job streaming job 1446725045000 ms.0
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 5.0 failed 4 times, most recent failure: Lost task 0.3 in stage 5.0 (TID 82, qmic-tag-s05.quwic.org): java.lang.ClassNotFoundException: org.cosmob.ccc.test.Test$1
java.net.URLClassLoader$1.run(URLClassLoader.java:372)
java.net.URLClassLoader$1.run(URLClassLoader.java:361)
java.security.AccessController.doPrivileged(Native Method)
java.net.URLClassLoader.findClass(URLClassLoader.java:360)
java.lang.ClassLoader.loadClass(ClassLoader.java:424)
java.lang.ClassLoader.loadClass(ClassLoader.java:357)
java.lang.Class.forName0(Native Method)
java.lang.Class.forName(Class.java:344)
org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:59)
java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613)
java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:62)
org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:87)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:57)
org.apache.spark.scheduler.Task.run(Task.scala:54)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1185)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1174)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1173)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1173)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:688)
at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1391)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
at akka.actor.ActorCell.invoke(ActorCell.scala:456)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
答案 0 :(得分:2)
您需要在uber-jar中包含您的传递依赖项(此处为org.cosmob.ccc.spark
),然后再将其传递给spark-submit。
您可以找到有关此错误的更多信息here。如果您使用sbt,则可以使用sbt-assembly解决此问题,如果您使用maven,请考虑maven-shade。
如果你正在使用Eclipse,它的答案是一样的:Eclipse必须以同样的方式构建一个超级jar。似乎主题是addressed on SO。