我是Scala和Spark的新手。我正在尝试建立机器学习模型。当我尝试使用使用val model = pipeline.fit(trainingData)
的训练数据来拟合模型时,抛出错误,指出Task不可序列化。请帮助我解决此问题。请在下面找到相同的代码。
package com.skillassure.spark
import org.apache.spark._
import org.apache.spark.sql._
import org.apache.log4j._
import org.apache.spark.sql.functions._
import org.apache.spark.ml._
import org.apache.spark.ml.feature.Bucketizer
import org.apache.spark.ml.feature.{RegexTokenizer, IDF}
import org.apache.spark.ml.feature.StopWordsRemover
import org.apache.spark.ml.feature.{CountVectorizer, CountVectorizerModel}
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.classification.LogisticRegressionModel
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.apache.spark.ml.feature._
import org.apache.spark.ml.classification._
import org.apache.spark.ml.evaluation._
import org.apache.spark.ml.tuning._
import scala.reflect.api.materializeTypeTag
object ReviewAnalysis extends java.io.Serializable {
def main(args: Array[String]): Unit = {
// Set the log level to only print errors
Logger.getLogger("org").setLevel(Level.ERROR)
val spark = SparkSession.builder
.appName("SparkSessionExample")
.master("local[*]")
.getOrCreate;
var file ="src/Resource/review-sample.json";
val df0 = spark.read.format("json")
.option("inferSchema", "true")
.load(file);
val df = df0.withColumn("reviewTS",df0.col("summary")+ " " + df0.col("reviewText"))
.drop("helpful")
.drop("reviewerID")
.drop("reviewerName")
.drop("reviewTime");
//df.printSchema;
//df.show(5);
df.describe("overall").show;
//
val df1 = df.filter("overall !=3");
val bucketizer = new Bucketizer()
.setInputCol("overall")
.setOutputCol("label")
.setSplits(Array(Double.NegativeInfinity, 4.0,
Double.PositiveInfinity))
val df2= bucketizer.transform(df1);
df2.show(5);
//val df3 = df2.groupBy("overall","label").count.show;
//df2.groupBy("label").count.show(5);
val dff = df2.selectExpr("asin", "cast(overall as string) overall","reviewText","summary","unixReviewTime","cast(reviewTS as string) reviewTS","label");
dff.show(10);
val fractions = Map(1.0 -> .1, 0.0 -> 1.0);
val df3 = dff.stat.sampleBy("label", fractions, 36L);
df3.show(3);
//df2.stat.sampleBy("label", fractions, 36L);
//df3.groupBy("label").count().show;
df3.printSchema();
val splitSeed = 5043
val Array(trainingData, testData) = dff.randomSplit(Array(0.8, 0.2), splitSeed)
val tokenizer = new RegexTokenizer()
.setInputCol("reviewTS")
.setOutputCol("reviewTokensUf")
.setPattern("\\s+|[,.()\"]");
val remover = new StopWordsRemover()
.setStopWords(StopWordsRemover
.loadDefaultStopWords("english"))
.setInputCol("reviewTokensUf")
.setOutputCol("reviewTokens");
val cv = new CountVectorizer()
.setInputCol("reviewTokens")
.setOutputCol("cv")
.setVocabSize(200000);
// list of feature columns
val idf = new IDF()
.setInputCol("cv")
.setOutputCol("features");
// create Logistic Regression estimator
// regularizer parameters avoid overfitting
val lr = new LogisticRegression()
.setMaxIter(100)
.setRegParam(0.02)
.setElasticNetParam(0.3);
//val steps = Array( tokenizer, remover, cv, idf,lr);
val pipeline = new Pipeline().setStages(Array( tokenizer, remover, cv, idf,lr))
//val pipeline = new Pipeline().setStages(steps);
val model = pipeline.fit(trainingData)
}
}
请找到完整的错误:
20/06/14 06:08:51 ERROR Instrumentation: org.apache.spark.SparkException: Task not serializable
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:396)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:386)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:159)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2379)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$1(RDD.scala:886)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:388)
at org.apache.spark.rdd.RDD.mapPartitionsWithIndex(RDD.scala:885)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:720)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:173)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:211)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:208)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:169)
at org.apache.spark.sql.execution.DeserializeToObjectExec.doExecute(objects.scala:96)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:173)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:211)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:208)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:169)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:110)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:109)
at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:3073)
at org.apache.spark.sql.Dataset.rdd(Dataset.scala:3071)
at org.apache.spark.ml.feature.CountVectorizer.fit(CountVectorizer.scala:191)
at org.apache.spark.ml.feature.CountVectorizer.fit(CountVectorizer.scala:149)
at org.apache.spark.ml.Pipeline.$anonfun$fit$5(Pipeline.scala:155)
at org.apache.spark.ml.MLEvents.withFitEvent(events.scala:132)
at org.apache.spark.ml.MLEvents.withFitEvent$(events.scala:125)
at org.apache.spark.ml.util.Instrumentation.withFitEvent(Instrumentation.scala:42)
at org.apache.spark.ml.Pipeline.$anonfun$fit$4(Pipeline.scala:155)
at scala.collection.Iterator.foreach(Iterator.scala:929)
at scala.collection.Iterator.foreach$(Iterator.scala:929)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1417)
at scala.collection.IterableViewLike$Transformed.foreach(IterableViewLike.scala:44)
at scala.collection.IterableViewLike$Transformed.foreach$(IterableViewLike.scala:44)
at scala.collection.SeqViewLike$AbstractTransformed.foreach(SeqViewLike.scala:37)
at org.apache.spark.ml.Pipeline.$anonfun$fit$2(Pipeline.scala:151)
at org.apache.spark.ml.MLEvents.withFitEvent(events.scala:132)
at org.apache.spark.ml.MLEvents.withFitEvent$(events.scala:125)
at org.apache.spark.ml.util.Instrumentation.withFitEvent(Instrumentation.scala:42)
at org.apache.spark.ml.Pipeline.$anonfun$fit$1(Pipeline.scala:137)
at org.apache.spark.ml.util.Instrumentation$.$anonfun$instrumented$1(Instrumentation.scala:191)
at scala.util.Try$.apply(Try.scala:209)
at org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:191)
at org.apache.spark.ml.Pipeline.fit(Pipeline.scala:137)
at com.skillassure.spark.ReviewAnalysis$.main(ReviewAnalysis.scala:111)
at com.skillassure.spark.ReviewAnalysis.main(ReviewAnalysis.scala)
Caused by: java.io.NotSerializableException: scala.runtime.LazyRef
Serialization stack:
- object not serializable (class: scala.runtime.LazyRef, value: LazyRef thunk)
- element of array (index: 2)
- array (class [Ljava.lang.Object;, size 3)
- field (class: java.lang.invoke.SerializedLambda, name: capturedArgs, type: class [Ljava.lang.Object;)
- object (class java.lang.invoke.SerializedLambda, SerializedLambda[capturingClass=class org.apache.spark.sql.catalyst.expressions.ScalaUDF, functionalInterfaceMethod=scala/Function1.apply:(Ljava/lang/Object;)Ljava/lang/Object;, implementation=invokeStatic org/apache/spark/sql/catalyst/expressions/ScalaUDF.$anonfun$f$2:(Lscala/Function1;Lorg/apache/spark/sql/catalyst/expressions/Expression;Lscala/runtime/LazyRef;Lorg/apache/spark/sql/catalyst/InternalRow;)Ljava/lang/Object;, instantiatedMethodType=(Lorg/apache/spark/sql/catalyst/InternalRow;)Ljava/lang/Object;, numCaptured=3])
- writeReplace data (class: java.lang.invoke.SerializedLambda)
- object (class org.apache.spark.sql.catalyst.expressions.ScalaUDF$$Lambda$2506/239902985, org.apache.spark.sql.catalyst.expressions.ScalaUDF$$Lambda$2506/239902985@3f64d943)
- field (class: org.apache.spark.sql.catalyst.expressions.ScalaUDF, name: f, type: interface scala.Function1)
- object (class org.apache.spark.sql.catalyst.expressions.ScalaUDF, bucketizer_0(knownnotnull(overall#9)))
- field (class: org.apache.spark.sql.catalyst.expressions.If, name: falseValue, type: class org.apache.spark.sql.catalyst.expressions.Expression)
- object (class org.apache.spark.sql.catalyst.expressions.If, if (isnull(overall#9)) null else bucketizer_0(knownnotnull(overall#9)))
- field (class: org.apache.spark.sql.catalyst.expressions.Alias, name: child, type: class org.apache.spark.sql.catalyst.expressions.Expression)
- object (class org.apache.spark.sql.catalyst.expressions.Alias, if (isnull(overall#9)) null else bucketizer_0(knownnotnull(overall#9)) AS label#133)
- element of array (index: 5)
- array (class [Ljava.lang.Object;, size 6)
- field (class: scala.collection.mutable.ArrayBuffer, name: array, type: class [Ljava.lang.Object;)
- object (class scala.collection.mutable.ArrayBuffer, ArrayBuffer(asin#7, cast(overall#9 as string) AS overall#187, reviewText#10, summary#14, unixReviewTime#15L, if (isnull(overall#9)) null else bucketizer_0(knownnotnull(overall#9)) AS label#133))
- field (class: org.apache.spark.sql.execution.ProjectExec, name: projectList, type: interface scala.collection.Seq)
- object (class org.apache.spark.sql.execution.ProjectExec, Project [asin#7, cast(overall#9 as string) AS overall#187, reviewText#10, summary#14, unixReviewTime#15L, if (isnull(overall#9)) null else bucketizer_0(knownnotnull(overall#9)) AS label#133]
+- Filter (isnotnull(overall#9) AND NOT (overall#9 = 3.0))
+- BatchScan[asin#7, overall#9, reviewText#10, summary#14, unixReviewTime#15L] JsonScan Location: InMemoryFileIndex[file:/C:/SparkScala/SparkLearning/src/Resource/review-sample.json], ReadSchema: struct<asin:string,overall:double,reviewText:string,summary:string,unixReviewTime:bigint>
)
- field (class: org.apache.spark.sql.execution.SortExec, name: child, type: class org.apache.spark.sql.execution.SparkPlan)
- object (class org.apache.spark.sql.execution.SortExec, Sort [asin#7 ASC NULLS FIRST, overall#187 ASC NULLS FIRST, reviewText#10 ASC NULLS FIRST, summary#14 ASC NULLS FIRST, unixReviewTime#15L ASC NULLS FIRST, label#133 ASC NULLS FIRST], false, 0
+- Project [asin#7, cast(overall#9 as string) AS overall#187, reviewText#10, summary#14, unixReviewTime#15L, if (isnull(overall#9)) null else bucketizer_0(knownnotnull(overall#9)) AS label#133]
+- Filter (isnotnull(overall#9) AND NOT (overall#9 = 3.0))
+- BatchScan[asin#7, overall#9, reviewText#10, summary#14, unixReviewTime#15L] JsonScan Location: InMemoryFileIndex[file:/C:/SparkScala/SparkLearning/src/Resource/review-sample.json], ReadSchema: struct<asin:string,overall:double,reviewText:string,summary:string,unixReviewTime:bigint>
)
- element of array (index: 0)
- array (class [Ljava.lang.Object;, size 15)
- element of array (index: 1)
- array (class [Ljava.lang.Object;, size 3)
- field (class: java.lang.invoke.SerializedLambda, name: capturedArgs, type: class [Ljava.lang.Object;)
- object (class java.lang.invoke.SerializedLambda, SerializedLambda[capturingClass=class org.apache.spark.sql.execution.WholeStageCodegenExec, functionalInterfaceMethod=scala/Function2.apply:(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;, implementation=invokeStatic org/apache/spark/sql/execution/WholeStageCodegenExec.$anonfun$doExecute$4$adapted:(Lorg/apache/spark/sql/catalyst/expressions/codegen/CodeAndComment;[Ljava/lang/Object;Lorg/apache/spark/sql/execution/metric/SQLMetric;Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;, instantiatedMethodType=(Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;, numCaptured=3])
- writeReplace data (class: java.lang.invoke.SerializedLambda)
- object (class org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$2181/2096690266, org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$2181/2096690266@79d14037)
at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:41)
at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:47)
at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:101)
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:393)
... 48 more
Exception in thread "main" org.apache.spark.SparkException: Task not serializable
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:396)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:386)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:159)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2379)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$1(RDD.scala:886)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:388)
at org.apache.spark.rdd.RDD.mapPartitionsWithIndex(RDD.scala:885)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:720)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:173)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:211)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:208)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:169)
at org.apache.spark.sql.execution.DeserializeToObjectExec.doExecute(objects.scala:96)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:173)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:211)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:208)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:169)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:110)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:109)
at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:3073)
at org.apache.spark.sql.Dataset.rdd(Dataset.scala:3071)
at org.apache.spark.ml.feature.CountVectorizer.fit(CountVectorizer.scala:191)
at org.apache.spark.ml.feature.CountVectorizer.fit(CountVectorizer.scala:149)
at org.apache.spark.ml.Pipeline.$anonfun$fit$5(Pipeline.scala:155)
at org.apache.spark.ml.MLEvents.withFitEvent(events.scala:132)
at org.apache.spark.ml.MLEvents.withFitEvent$(events.scala:125)
at org.apache.spark.ml.util.Instrumentation.withFitEvent(Instrumentation.scala:42)
at org.apache.spark.ml.Pipeline.$anonfun$fit$4(Pipeline.scala:155)
at scala.collection.Iterator.foreach(Iterator.scala:929)
at scala.collection.Iterator.foreach$(Iterator.scala:929)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1417)
at scala.collection.IterableViewLike$Transformed.foreach(IterableViewLike.scala:44)
at scala.collection.IterableViewLike$Transformed.foreach$(IterableViewLike.scala:44)
at scala.collection.SeqViewLike$AbstractTransformed.foreach(SeqViewLike.scala:37)
at org.apache.spark.ml.Pipeline.$anonfun$fit$2(Pipeline.scala:151)
at org.apache.spark.ml.MLEvents.withFitEvent(events.scala:132)
at org.apache.spark.ml.MLEvents.withFitEvent$(events.scala:125)
at org.apache.spark.ml.util.Instrumentation.withFitEvent(Instrumentation.scala:42)
at org.apache.spark.ml.Pipeline.$anonfun$fit$1(Pipeline.scala:137)
at org.apache.spark.ml.util.Instrumentation$.$anonfun$instrumented$1(Instrumentation.scala:191)
at scala.util.Try$.apply(Try.scala:209)
at org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:191)
at org.apache.spark.ml.Pipeline.fit(Pipeline.scala:137)
at com.skillassure.spark.ReviewAnalysis$.main(ReviewAnalysis.scala:111)
at com.skillassure.spark.ReviewAnalysis.main(ReviewAnalysis.scala)
Caused by: java.io.NotSerializableException: scala.runtime.LazyRef
Serialization stack:
- object not serializable (class: scala.runtime.LazyRef, value: LazyRef thunk)
- element of array (index: 2)
- array (class [Ljava.lang.Object;, size 3)
- field (class: java.lang.invoke.SerializedLambda, name: capturedArgs, type: class [Ljava.lang.Object;)
- object (class java.lang.invoke.SerializedLambda, SerializedLambda[capturingClass=class org.apache.spark.sql.catalyst.expressions.ScalaUDF, functionalInterfaceMethod=scala/Function1.apply:(Ljava/lang/Object;)Ljava/lang/Object;, implementation=invokeStatic org/apache/spark/sql/catalyst/expressions/ScalaUDF.$anonfun$f$2:(Lscala/Function1;Lorg/apache/spark/sql/catalyst/expressions/Expression;Lscala/runtime/LazyRef;Lorg/apache/spark/sql/catalyst/InternalRow;)Ljava/lang/Object;, instantiatedMethodType=(Lorg/apache/spark/sql/catalyst/InternalRow;)Ljava/lang/Object;, numCaptured=3])
- writeReplace data (class: java.lang.invoke.SerializedLambda)
- object (class org.apache.spark.sql.catalyst.expressions.ScalaUDF$$Lambda$2506/239902985, org.apache.spark.sql.catalyst.expressions.ScalaUDF$$Lambda$2506/239902985@3f64d943)
- field (class: org.apache.spark.sql.catalyst.expressions.ScalaUDF, name: f, type: interface scala.Function1)
- object (class org.apache.spark.sql.catalyst.expressions.ScalaUDF, bucketizer_0(knownnotnull(overall#9)))
- field (class: org.apache.spark.sql.catalyst.expressions.If, name: falseValue, type: class org.apache.spark.sql.catalyst.expressions.Expression)
- object (class org.apache.spark.sql.catalyst.expressions.If, if (isnull(overall#9)) null else bucketizer_0(knownnotnull(overall#9)))
- field (class: org.apache.spark.sql.catalyst.expressions.Alias, name: child, type: class org.apache.spark.sql.catalyst.expressions.Expression)
- object (class org.apache.spark.sql.catalyst.expressions.Alias, if (isnull(overall#9)) null else bucketizer_0(knownnotnull(overall#9)) AS label#133)
- element of array (index: 5)
- array (class [Ljava.lang.Object;, size 6)
- field (class: scala.collection.mutable.ArrayBuffer, name: array, type: class [Ljava.lang.Object;)
- object (class scala.collection.mutable.ArrayBuffer, ArrayBuffer(asin#7, cast(overall#9 as string) AS overall#187, reviewText#10, summary#14, unixReviewTime#15L, if (isnull(overall#9)) null else bucketizer_0(knownnotnull(overall#9)) AS label#133))
- field (class: org.apache.spark.sql.execution.ProjectExec, name: projectList, type: interface scala.collection.Seq)
- object (class org.apache.spark.sql.execution.ProjectExec, Project [asin#7, cast(overall#9 as string) AS overall#187, reviewText#10, summary#14, unixReviewTime#15L, if (isnull(overall#9)) null else bucketizer_0(knownnotnull(overall#9)) AS label#133]
+- Filter (isnotnull(overall#9) AND NOT (overall#9 = 3.0))
+- BatchScan[asin#7, overall#9, reviewText#10, summary#14, unixReviewTime#15L] JsonScan Location: InMemoryFileIndex[file:/C:/SparkScala/SparkLearning/src/Resource/review-sample.json], ReadSchema: struct<asin:string,overall:double,reviewText:string,summary:string,unixReviewTime:bigint>
)
- field (class: org.apache.spark.sql.execution.SortExec, name: child, type: class org.apache.spark.sql.execution.SparkPlan)
- object (class org.apache.spark.sql.execution.SortExec, Sort [asin#7 ASC NULLS FIRST, overall#187 ASC NULLS FIRST, reviewText#10 ASC NULLS FIRST, summary#14 ASC NULLS FIRST, unixReviewTime#15L ASC NULLS FIRST, label#133 ASC NULLS FIRST], false, 0
+- Project [asin#7, cast(overall#9 as string) AS overall#187, reviewText#10, summary#14, unixReviewTime#15L, if (isnull(overall#9)) null else bucketizer_0(knownnotnull(overall#9)) AS label#133]
+- Filter (isnotnull(overall#9) AND NOT (overall#9 = 3.0))
+- BatchScan[asin#7, overall#9, reviewText#10, summary#14, unixReviewTime#15L] JsonScan Location: InMemoryFileIndex[file:/C:/SparkScala/SparkLearning/src/Resource/review-sample.json], ReadSchema: struct<asin:string,overall:double,reviewText:string,summary:string,unixReviewTime:bigint>
)
- element of array (index: 0)
- array (class [Ljava.lang.Object;, size 15)
- element of array (index: 1)
- array (class [Ljava.lang.Object;, size 3)
- field (class: java.lang.invoke.SerializedLambda, name: capturedArgs, type: class [Ljava.lang.Object;)
- object (class java.lang.invoke.SerializedLambda, SerializedLambda[capturingClass=class org.apache.spark.sql.execution.WholeStageCodegenExec, functionalInterfaceMethod=scala/Function2.apply:(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;, implementation=invokeStatic org/apache/spark/sql/execution/WholeStageCodegenExec.$anonfun$doExecute$4$adapted:(Lorg/apache/spark/sql/catalyst/expressions/codegen/CodeAndComment;[Ljava/lang/Object;Lorg/apache/spark/sql/execution/metric/SQLMetric;Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;, instantiatedMethodType=(Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;, numCaptured=3])
- writeReplace data (class: java.lang.invoke.SerializedLambda)
- object (class org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$2181/2096690266, org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$2181/2096690266@79d14037)
at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:41)
at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:47)
at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:101)
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:393)
... 48 more
答案 0 :(得分:0)
问题代码不在描述中提到的对象ReviewAnalysis
中。请尝试通过在项目中的以下类中进行注释/删除来对此进行编译-
com.spark.programming.foodreview$.main(foodreview.scala:154)
at com.spark.programming.foodreview.main(foodreview.scala)