如何在群集独立Spark中修复ClassNotFoundException anonfun?

时间:2019-07-16 18:41:39

标签: scala apache-spark

我正在尝试在virtualbox中连接两个工作程序。环境spark 2.0.2 scala 2.11.7 sbt 0.13.9 IDE scala在eclipse .dataset文件中上传到HDFS.in webui母版中,应用程序已完成。 Spark在我的本地计算机上运行独立模式

  • virtualbox IDE中的ubuntu:Eclipse中的scala

build.sbt

name := "movielens"
 version := "1.0"
 scalaVersion := "2.11.7"
libraryDependencies += "org.apache.spark" %% "spark-core" % 2.0.2" % 
libraryDependencies += "org.apache.spark" %% "spark-mllib" % 2.0.2" % 
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.0.2"%
 libraryDependencies += "com.github.scopt" %% "scopt" % "3.3.0"%

我的代码

object trainModel {
  def main(args:Array[String]): Unit =
  {

   val conf = new SparkConf
    conf.setMaster("spark://192.168.1.8:7077")//local[*]
   .set("spark.executor.memory","1g")
   .setAppName("trainModel")

val sc = new SparkContext(conf)
    val rawData = sc.textFile("hdfs://localhost:9001/data/rating.csv")

val sqlContext = new SQLContext(sc)

    val ratings = rawData.map(line => line.split(",").take(3) match

    {
      case Array(userId, movieId, rating) => Rating(userId.toInt,
        movieId.toInt, rating.toDouble)
    })
    println(s"Number of Ratings in Movie file ${ratings.count()} \n")

    val ratingsRDD = sc.textFile("hdfs://localhost:9001/dataset/rating.csv")
import sqlContext.implicits._
val splits = ratingsRDD.randomSplit(Array(0.8, 0.2), seed = 12345)
val trainingRatingsRDD= splits(0).cache()
val testRatingsRDD = splits(1).cache()
val numTraining = trainingRatingsRDD.count()
 val numTest = testRatingsRDD.count()
 println(s"Training: $numTraining, test: $numTest.")

 val rank = 10
        val lambdas = 10
        val numIterations = 10
        val model = ALS.train(ratings, rank, numIterations)
  val userProducts = ratings.map{case Rating(userId, movieId, rating) =>
      (userId,movieId)}
    val predictions = model.predict(userProducts).map{case
      Rating(userId, movieId, rating) =>
    ((userId, movieId), rating)}
    val ratesAndPreds = ratings.map{case Rating(userId, movieId, rating) =>
      ((userId,movieId),
        rating)}.join(predictions)
    val meanSquaredError = ratesAndPreds.map{case ((userId,movieId),
    (r1,r2)) =>
      val err = r1 -r2
      err*err}.mean
      println("Mean Squared Error= "+meanSquaredError)
      sqrt(meanSquaredError)
        val rmse = math.sqrt(meanSquaredError)
       println(s"Test RMSE = $rmse.")
  } 

}

错误

  19/07/19 22:37:11 WARN TaskSetManager: Lost task 2.0 in stage 0.0 (TID 2, 127.0.0.1): java.lang.ClassNotFoundException: com.sparkRDD.trainModel$$anonfun$1
    at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:348)
    at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67)
    at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1868)
    at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1751)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2042)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2287)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2211)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2069)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2287)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2211)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2069)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2287)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2211)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2069)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:431)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
    at org.apache.spark.scheduler.Task.run(Task.scala:86)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

19/07/19 22:37:11 INFO TaskSetManager: Starting task 2.1 in stage 0.0 (TID 6, 127.0.0.1, partition 2, ANY, 5320 bytes)
19/07/19 22:37:11 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Launching task 6 on executor id: 0 hostname: 127.0.0.1.
19/07/19 22:37:11 INFO TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 1]
19/07/19 22:37:11 INFO TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 2]
19/07/19 22:37:11 INFO TaskSetManager: Lost task 3.0 in stage 0.0 (TID 3) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 3]
19/07/19 22:37:11 INFO TaskSetManager: Lost task 4.0 in stage 0.0 (TID 4) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 4]
19/07/19 22:37:11 INFO TaskSetManager: Starting task 4.1 in stage 0.0 (TID 7, 127.0.0.1, partition 4, ANY, 5320 bytes)
19/07/19 22:37:11 INFO TaskSetManager: Starting task 3.1 in stage 0.0 (TID 8, 127.0.0.1, partition 3, ANY, 5320 bytes)
19/07/19 22:37:11 INFO TaskSetManager: Lost task 2.1 in stage 0.0 (TID 6) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 5]
19/07/19 22:37:11 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Launching task 7 on executor id: 0 hostname: 127.0.0.1.
19/07/19 22:37:11 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Launching task 8 on executor id: 0 hostname: 127.0.0.1.
19/07/19 22:37:11 INFO TaskSetManager: Lost task 5.0 in stage 0.0 (TID 5) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 6]
19/07/19 22:37:11 INFO TaskSetManager: Starting task 5.1 in stage 0.0 (TID 9, 127.0.0.1, partition 5, ANY, 5320 bytes)
19/07/19 22:37:11 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Launching task 9 on executor id: 0 hostname: 127.0.0.1.
19/07/19 22:37:11 INFO TaskSetManager: Lost task 4.1 in stage 0.0 (TID 7) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 7]
19/07/19 22:37:11 INFO TaskSetManager: Starting task 4.2 in stage 0.0 (TID 10, 127.0.0.1, partition 4, ANY, 5320 bytes)
19/07/19 22:37:11 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Launching task 10 on executor id: 0 hostname: 127.0.0.1.
19/07/19 22:37:11 INFO TaskSetManager: Lost task 3.1 in stage 0.0 (TID 8) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 8]
19/07/19 22:37:11 INFO TaskSetManager: Starting task 3.2 in stage 0.0 (TID 11, 127.0.0.1, partition 3, ANY, 5320 bytes)
19/07/19 22:37:11 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Launching task 11 on executor id: 0 hostname: 127.0.0.1.
19/07/19 22:37:11 INFO TaskSetManager: Lost task 4.2 in stage 0.0 (TID 10) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 9]
19/07/19 22:37:11 INFO TaskSetManager: Starting task 4.3 in stage 0.0 (TID 12, 127.0.0.1, partition 4, ANY, 5320 bytes)
19/07/19 22:37:11 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Launching task 12 on executor id: 0 hostname: 127.0.0.1.
19/07/19 22:37:11 INFO TaskSetManager: Lost task 5.1 in stage 0.0 (TID 9) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 10]
19/07/19 22:37:11 INFO TaskSetManager: Starting task 5.2 in stage 0.0 (TID 13, 127.0.0.1, partition 5, ANY, 5320 bytes)
19/07/19 22:37:11 INFO TaskSetManager: Lost task 3.2 in stage 0.0 (TID 11) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 11]
19/07/19 22:37:11 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Launching task 13 on executor id: 0 hostname: 127.0.0.1.
19/07/19 22:37:11 INFO TaskSetManager: Starting task 3.3 in stage 0.0 (TID 14, 127.0.0.1, partition 3, ANY, 5320 bytes)
19/07/19 22:37:11 INFO TaskSetManager: Lost task 4.3 in stage 0.0 (TID 12) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 12]
19/07/19 22:37:11 ERROR TaskSetManager: Task 4 in stage 0.0 failed 4 times; aborting job
19/07/19 22:37:11 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Launching task 14 on executor id: 0 hostname: 127.0.0.1.
19/07/19 22:37:11 INFO TaskSetManager: Lost task 5.2 in stage 0.0 (TID 13) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 13]
19/07/19 22:37:11 INFO TaskSetManager: Lost task 3.3 in stage 0.0 (TID 14) on executor 127.0.0.1: java.lang.ClassNotFoundException (com.sparkRDD.trainModel$$anonfun$1) [duplicate 14]
19/07/19 22:37:11 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 
19/07/19 22:37:11 INFO TaskSchedulerImpl: Cancelling stage 0
19/07/19 22:37:11 INFO DAGScheduler: ResultStage 0 (count at trainModel.scala:45) failed in 10.613 s
19/07/19 22:37:11 INFO DAGScheduler: Job 0 failed: count at trainModel.scala:45, took 10.945581 s
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 4 in stage 0.0 failed 4 times, most recent failure: Lost task 4.3 in stage 0.0 (TID 12, 127.0.0.1): java.lang.ClassNotFoundException: com.sparkRDD.trainModel$$anonfun$1
    at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:348)
    at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67)
    at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1868)
    at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1751)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2042)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2287)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2211)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2069)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2287)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2211)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2069)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2287)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2211)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2069)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:431)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
    at org.apache.spark.scheduler.Task.run(Task.scala:86)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1442)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1441)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
    at scala.Option.foreach(Option.scala:257)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1667)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1622)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1611)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1873)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1886)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1899)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1913)
    at org.apache.spark.rdd.RDD.count(RDD.scala:1134)
    at com.sparkRDD.trainModel$.main(trainModel.scala:45)
    at com.sparkRDD.trainModel.main(trainModel.scala)
Caused by: java.lang.ClassNotFoundException: com.sparkRDD.trainModel$$anonfun$1
    at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:348)
    at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67)
    at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1868)
    at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1751)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2042)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2287)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2211)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2069)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2287)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2211)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2069)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2287)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2211)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2069)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1573)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:431)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)

请,如何解决此错误? 预先感谢

0 个答案:

没有答案