我正在尝试在Amazon EMR上运行一个简单的Spark SQL应用程序。 Spark的版本是2.4.3和EMR 5.26.0。尝试创建SparkSession时,我立即从Netty库收到NoClassDefFound错误。
我尝试将Netty依赖项显式重写为Spark依赖项所使用的版本,但是没有运气。我可以在本地构建和运行jar。
build.sbt:
resolvers += DefaultMavenRepository
name := "test-spark-sql"
version := "0.0.1"
scalaVersion := "2.12.8"
val sparkVersion = "2.4.3"
libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion exclude ("org.apache", "zookeeper") exclude ("org.apache.spark", "spark-core")
libraryDependencies += "org.apache.spark" %% "spark-core" % sparkVersion % "provided"
dependencyOverrides += "io.netty" %% "netty-all" % "4.1.17-Final"
dependencyOverrides += "io.netty" %% "netty" % "3.9.9-Final"
// test suite settings
fork in Test := true
javaOptions ++= Seq("-Xms512M", "-Xmx2048M", "-XX:MaxPermSize=2048M", "-XX:+CMSClassUnloadingEnabled")
// Show runtime of tests
testOptions in Test += Tests.Argument(TestFrameworks.ScalaTest, "-oD")
// JAR file settings
run in Compile := Defaults.runTask(fullClasspath in Compile, mainClass in (Compile, run), runner in (Compile, run)).evaluated
runMain in Compile := Defaults.runMainTask(fullClasspath in Compile, runner in(Compile, run)).evaluated
assemblyJarName in assembly := "test-spark-sql.jar"
test in assembly := {}
assemblyMergeStrategy in assembly := {
case PathList("org","aopalliance", xs @ _*) => MergeStrategy.last
case PathList("javax", "inject", xs @ _*) => MergeStrategy.last
case PathList("javax", "servlet", xs @ _*) => MergeStrategy.last
case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
case PathList("org", "apache", xs @ _*) => MergeStrategy.last
case PathList("com", "google", xs @ _*) => MergeStrategy.last
case PathList("com", "esotericsoftware", xs @ _*) => MergeStrategy.last
case PathList("com", "codahale", xs @ _*) => MergeStrategy.last
case PathList("com", "yammer", xs @ _*) => MergeStrategy.last
case "about.html" => MergeStrategy.rename
case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
case "META-INF/mailcap" => MergeStrategy.last
case "META-INF/mimetypes.default" => MergeStrategy.last
case "plugin.properties" => MergeStrategy.last
case "log4j.properties" => MergeStrategy.last
case "git.properties" => MergeStrategy.last
case x =>
val oldStrategy = (assemblyMergeStrategy in assembly).value
oldStrategy(x)
}
Main.scala:
package com.test.spark.sql
import org.apache.spark.sql.SparkSession
object Main {
def main(args: Array[String]) = {
val spark: SparkSession = {
SparkSession
.builder()
.appName("spark session")
.master("local")
.getOrCreate()
}
import spark.implicits._
val someDF = Seq(
(8, "bat"),
(64, "mouse"),
(-27, "horse")
).toDF("number", "word")
someDF
.show()
}
}
错误的堆栈跟踪:
Exception in thread "main" java.lang.NoSuchMethodError: io.netty.buffer.PooledByteBufAllocator.defaultNumHeapArena()I
at org.apache.spark.network.util.NettyUtils.createPooledByteBufAllocator(NettyUtils.java:113)
at org.apache.spark.network.client.TransportClientFactory.<init>(TransportClientFactory.java:106)
at org.apache.spark.network.TransportContext.createClientFactory(TransportContext.java:99)
at org.apache.spark.rpc.netty.NettyRpcEnv.<init>(NettyRpcEnv.scala:71)
at org.apache.spark.rpc.netty.NettyRpcEnvFactory.create(NettyRpcEnv.scala:462)
at org.apache.spark.rpc.RpcEnv$.create(RpcEnv.scala:57)
at org.apache.spark.SparkEnv$.create(SparkEnv.scala:250)
at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:185)
at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:257)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:424)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2520)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$5(SparkSession.scala:935)
at scala.Option.getOrElse(Option.scala:138)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:926)
at com.test.spark.sql.Main$.main(Main.scala:11)
at com.test.spark.sql.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:239)
at org.apache.hadoop.util.RunJar.main(RunJar.java:153)