Spark actorStream ClassNotFoundException

时间:2016-05-27 11:55:31

标签: scala apache-spark spark-streaming

我试图像这样传递akka演员作为火花actorStream

package com.blah.spark

import akka.actor.{ Actor, Props, ActorLogging, Cancellable, ActorRef }
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.receiver._

import com.typesafe.config.ConfigFactory
import scala.collection.JavaConversions._

import java.util.concurrent._
import scala.concurrent.duration._

object BlahSpark {

  def main(args: Array[String]) {
    val conf = new SparkConf(false) // skip loading external settings
      .setAppName("blah")
      .set("spark.logConf", "true")
      // .set("spark.driver.port", sparkPort.toString)
      // .set("spark.driver.host", sparkHost)
      .set("spark.akka.logLifecycleEvents", "true")
    val ssc = new StreamingContext(conf, Seconds(1))
    val actorName = "Producer"

    val actorStream = ssc.actorStream[List[(String, Any)]](Props[Producer1], actorName)

    actorStream.foreachRDD{ r => println(r) }

    ssc.start()
  }
}

class Producer1 extends Actor with ActorHelper {
  override def preStart() = {
    println("=== Producer is starting up ===")
  }

  def receive = {
    case s =>
      store(s)
  }
}

我得到ClassNotFoundException例外:

16/05/27 11:37:04 WARN TaskSetManager: Lost task 0.0 in stage 2.0 (TID 70, ip-172-31-7-196.ap-southeast-1.compute.internal): java.io.IOException: java.lang.ClassNotFoundException: com.blah.spark.Producer1
  at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1207)
  at org.apache.spark.rdd.ParallelCollectionPartition.readObject(ParallelCollectionRDD.scala:70)
  at sun.reflect.GeneratedMethodAccessor12.invoke(Unknown Source)
  at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  at java.lang.reflect.Method.invoke(Method.java:498)
  at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1058)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1909)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
  at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373)
  at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)
  at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)
  at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:194)
  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
  at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException: com.blah.spark.Producer1
  at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
  at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
  at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
  at java.lang.Class.forName0(Native Method)
  at java.lang.Class.forName(Class.java:348)
  at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:68)
  at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1620)
  at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1521)
  at java.io.ObjectInputStream.readClass(ObjectInputStream.java:1486)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1336)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
  at java.io.ObjectInputStream.readArray(ObjectInputStream.java:1714)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1347)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
  at java.io.ObjectInputStream.defaultReadObject(ObjectInputStream.java:503)
  at org.apache.spark.rdd.ParallelCollectionPartition$$anonfun$readObject$1.apply$mcV$sp(ParallelCollectionRDD.scala:74)
  at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1204)
  ... 19 more

我使用sbt assembly将应用构建到胖罐中,将其存储在spark/lib中,然后使用

提交
spark-submit ./bah-assembly.jar 

on spark 1.6.1

我可以看到胖罐子里的课程:

enter image description here

我哪里错了?

1 个答案:

答案 0 :(得分:0)

将此添加到spark conf解决了它:

.setJars(SparkContext.jarOfClass(this.getClass).toList)