我试图像这样传递akka演员作为火花actorStream
:
package com.blah.spark
import akka.actor.{ Actor, Props, ActorLogging, Cancellable, ActorRef }
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.receiver._
import com.typesafe.config.ConfigFactory
import scala.collection.JavaConversions._
import java.util.concurrent._
import scala.concurrent.duration._
object BlahSpark {
def main(args: Array[String]) {
val conf = new SparkConf(false) // skip loading external settings
.setAppName("blah")
.set("spark.logConf", "true")
// .set("spark.driver.port", sparkPort.toString)
// .set("spark.driver.host", sparkHost)
.set("spark.akka.logLifecycleEvents", "true")
val ssc = new StreamingContext(conf, Seconds(1))
val actorName = "Producer"
val actorStream = ssc.actorStream[List[(String, Any)]](Props[Producer1], actorName)
actorStream.foreachRDD{ r => println(r) }
ssc.start()
}
}
class Producer1 extends Actor with ActorHelper {
override def preStart() = {
println("=== Producer is starting up ===")
}
def receive = {
case s =>
store(s)
}
}
我得到ClassNotFoundException
例外:
16/05/27 11:37:04 WARN TaskSetManager: Lost task 0.0 in stage 2.0 (TID 70, ip-172-31-7-196.ap-southeast-1.compute.internal): java.io.IOException: java.lang.ClassNotFoundException: com.blah.spark.Producer1
at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1207)
at org.apache.spark.rdd.ParallelCollectionPartition.readObject(ParallelCollectionRDD.scala:70)
at sun.reflect.GeneratedMethodAccessor12.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1058)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1909)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:194)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException: com.blah.spark.Producer1
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:68)
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1620)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1521)
at java.io.ObjectInputStream.readClass(ObjectInputStream.java:1486)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1336)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.readArray(ObjectInputStream.java:1714)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1347)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.defaultReadObject(ObjectInputStream.java:503)
at org.apache.spark.rdd.ParallelCollectionPartition$$anonfun$readObject$1.apply$mcV$sp(ParallelCollectionRDD.scala:74)
at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1204)
... 19 more
我使用sbt assembly
将应用构建到胖罐中,将其存储在spark/lib
中,然后使用
spark-submit ./bah-assembly.jar
on spark 1.6.1
。
我可以看到胖罐子里的课程:
我哪里错了?
答案 0 :(得分:0)
将此添加到spark conf解决了它:
.setJars(SparkContext.jarOfClass(this.getClass).toList)