我已使用以下配置配置了spark作业,该配置在我的本地计算机上运行良好,但在EMR群集上引发了以下异常
scalaVersion in Global := "2.11.8"
crossScalaVersions in Global := Seq("2.11.8")
依赖关系如下。
val ScallopVersion = "3.1.2"
val SparkVersion = "2.4.0"
val OpenCsvVersion = "3.7"
val MockitoVersion = "1.10.19"
val ConfigVersion = "1.2.1"
val ScalazVersion = "7.3.0-M31"
val ConfigDependencies = Seq("com.typesafe" % "config" % ConfigVersion)
val ParserDependencies = Seq("com.opencsv" % "opencsv" % OpenCsvVersion)
val JodaTimeDependencies = Seq(
"joda-time" % "joda-time" % JodaTimeVersion % Provided,
"org.joda" % "joda-convert" % JodaConvertVersion % Provided)
val KryoDependencies = Seq("com.esotericsoftware.kryo" % "kryo" % KryoVersion % Provided)
val PredictiveCommonDependencies = Seq("com.here.traffic.predictive" % "predictive-common_2.10" % PredictiveCommonVersion)
val SparkDependencies = Seq(
"org.apache.spark" %% "spark-core" % SparkVersion % Provided excludeAll ExclusionRule("org.apache.hadoop"),
"org.apache.spark" %% "spark-core" % SparkVersion % Test excludeAll ExclusionRule("org.apache.hadoop"),
"org.apache.hadoop" % "hadoop-client" % HadoopVersion % Provided excludeAll(ExclusionRule(organization = "javax.servlet"), ExclusionRule(organization = "org.mortbay.jetty")),
"org.apache.hadoop" % "hadoop-client" % HadoopVersion % Test excludeAll(ExclusionRule(organization = "javax.servlet"), ExclusionRule(organization = "org.mortbay.jetty")),
"com.google.guava" % "guava" % GuavaVersion
)
val FunctionalProgrammingDependencies = Seq("org.scalaz" % "scalaz-core_2.11" % ScalazVersion)
val ScallopDependencies = Seq("org.rogach" %% "scallop" % ScallopVersion)
ERROR ApplicationMaster: User class threw exception: org.apache.spark.SparkException: Task not serializable
org.apache.spark.SparkException: Task not serializable
at com.here.traffic.learning.steps.MapMatcherStep$.apply(MapMatcherStep.scala:44)
at com.here.traffic.learning.steps.TASteps$.runJob(TASteps.scala:22)
at com.here.traffic.learning.steps.TASteps$.main(TASteps.scala:13)
at com.here.traffic.learning.steps.TASteps.main(TASteps.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:678)
Caused by: java.io.UTFDataFormatException: encoded string too long: 89216 bytes
at java.io.DataOutputStream.writeUTF(DataOutputStream.java:364)
at java.io.DataOutputStream.writeUTF(DataOutputStream.java:323)
at com.typesafe.config.impl.SerializedConfigValue.writeValueData(SerializedConfigValue.java:301)
//在分析字符串时抛出下一行。
com.here.traffic.learning.steps.MapMatcher步骤第44行
val pdrProbes: RDD[MapMatchInputPoint] = rawProbes.flatMap(ss=> pdrParser.parse(ss))