我有一个使用SBT运行的流作业。
每当我执行“ sbt run”时,都会看到以下错误。我看到这是因为工作人员无法获得所需的kafka依赖关系。
有没有一种方法可以将依赖罐与sbt run命令一起传递?
错误:
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 13.0 failed 4 times, most recent failure: Lost task 1.3 in stage 13.0 (TID 227, 10.148.9.12, executor 1): java.lang.ClassNotFoundException: org.apache.spark.sql.kafka010.KafkaContinuousDataReaderFactory
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67)
build.sbt:
name := "MyAPP"
version := "0.5"
scalaVersion := "2.11.8"
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "2.3.1",
"org.apache.spark" %% "spark-sql" % "2.3.1",
"org.apache.spark" %% "spark-streaming" % "2.3.1",
"org.apache.spark" %% "spark-streaming-kafka-0-10" % "2.3.1",
"org.apache.spark" %% "spark-sql-kafka-0-10" % "2.3.1",
"com.typesafe" % "config" % "1.3.2",
"org.apache.logging.log4j" % "log4j-api" % "2.11.0",
"org.apache.logging.log4j" % "log4j-core" % "2.11.0",
"org.apache.logging.log4j" %% "log4j-api-scala" % "11.0",
"org.scalatest" %% "scalatest" % "3.0.5" % "test",
"org.apache.kafka" % "kafka_2.11" % "0.10.2.2",
"org.apache.kafka" % "kafka-clients" % "0.10.2.2",
"ml.combust.mleap" %% "mleap-runtime" % "0.11.0",
"com.typesafe.play" % "play-json_2.11" % "2.6.10",
"com.fasterxml.jackson.module" % "jackson-module-scala_2.11" % "2.8.11",
"net.liftweb" %% "lift-json" % "3.3.0"
)
lazy val excludeJpountz = ExclusionRule(organization = "net.jpountz.lz4", name = "lz4")
lazy val kafkaClients = "org.apache.kafka" % "kafka-clients" % "0.10.2.2" excludeAll(excludeJpountz)
logBuffered in Test := false
fork in Test := true
// Don't run tests before assembling
test in assembly := {}
retrieveManaged := true
assemblyMergeStrategy in assembly := {
case "META-INF/services/org.apache.spark.sql.sources.DataSourceRegister" => MergeStrategy.concat
case PathList("META-INF", xs@_*) => MergeStrategy.discard
case "log4j.properties" => MergeStrategy.discard
case x => MergeStrategy.first`enter code here`
}
unmanagedBase := baseDirectory.value / "lib"