我有一个使用Play 2.6,Scala 2.11和Spark 2.2.0的网络应用程序。
当我对某个变量执行org.apache.spark.SparkException: Task not serializable
转换时,我收到异常:flatmap
。我知道我必须在某些类中实现Serializable,但我不知道这样做的最佳实践。
异常发生在var namesRdd = names.flatMap(parseNames)
行。如果我使用MyController with Serializable
,我会遇到另一个错误:class invalid for deserialization
。所以我认为这不是解决方案。
有谁知道如何序列化Controller以使用Spark Context和flatmap?
class SparkMarvelController @Inject()(cc: ControllerComponents) extends AbstractController(cc) with I18nSupport {
def mostPopularSuperHero() = Action { implicit request: Request[AnyContent] =>
val sparkContext = SparkCommons.sparkSession.sparkContext // got sparkContext
var names = sparkContext
.textFile("resource/marvel/Marvel-names.txt") // build up a hero ID - name RDD
var namesRdd = names.flatMap(parseNames)
val mostPopularHero = sparkContext
.textFile("resource/marvel/Marvel-graph.txt") // build up superhero co-apperance data
.map(countCoOccurrences) // convert to (hero ID, number of connections) RDD
.reduceByKey((x, y) => x + y) // combine entries that span more than one line
.map(x => (x._2, x._1)) // flip it to (number of connections, hero ID)
.max // find the max connections
// Look up the name (lookup returns an array of results, so we need to access the first result with (0))
val mostPopularHeroName = namesRdd.lookup(mostPopularHero._2)(0)
Ok(s"The most popular superhero is [$mostPopularHeroName] with [${mostPopularHero._1}] co-appearances.")
}
// Function to extract the hero ID and number of connections from each line
def countCoOccurrences(line: String) = {
// regex expression to split using any type of space occurrency in the line
val elements = line.split("\\s+")
(elements(0).toInt, elements.length - 1)
}
// function to extract hero ID -> hero name tuples (or None in case of Failure)
def parseNames(line: String): Option[(Int, String)] = {
var fields = line.split('\"')
if (fields.length > 1) return Some(fields(0).trim.toInt, fields(1))
else return None
}
}
错误:
play.api.http.HttpErrorHandlerExceptions$$anon$1: Execution exception[[ClassNotFoundException: controllers.SparkMarvelController$$anonfun$mostPopularSuperHero$1$$anonfun$2]]
at play.api.http.HttpErrorHandlerExceptions$.throwableToUsefulException(HttpErrorHandler.scala:255)
at play.api.http.DefaultHttpErrorHandler.onServerError(HttpErrorHandler.scala:180)
at play.core.server.AkkaHttpServer$$anonfun$3.applyOrElse(AkkaHttpServer.scala:311)
at play.core.server.AkkaHttpServer$$anonfun$3.applyOrElse(AkkaHttpServer.scala:309)
at scala.concurrent.Future$$anonfun$recoverWith$1.apply(Future.scala:346)
at scala.concurrent.Future$$anonfun$recoverWith$1.apply(Future.scala:345)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:32)
at akka.dispatch.BatchingExecutor$AbstractBatch.processBatch(BatchingExecutor.scala:55)
at akka.dispatch.BatchingExecutor$BlockableBatch$$anonfun$run$1.apply$mcV$sp(BatchingExecutor.scala:91)
at akka.dispatch.BatchingExecutor$BlockableBatch$$anonfun$run$1.apply(BatchingExecutor.scala:91)
Caused by: java.lang.ClassNotFoundException: controllers.SparkMarvelController$$anonfun$mostPopularSuperHero$1$$anonfun$2
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.util.InnerClosureFinder$$anon$4.visitMethodInsn(ClosureCleaner.scala:429)
at org.apache.xbean.asm5.ClassReader.a(Unknown Source)
at org.apache.xbean.asm5.ClassReader.b(Unknown Source)
at org.apache.xbean.asm5.ClassReader.accept(Unknown Source)
at org.apache.xbean.asm5.ClassReader.accept(Unknown Source)