Buld.sbt
name := "BigData"
version := "0.1"
scalaVersion := "2.12.7"
libraryDependencies += "com.github.tototoshi" %% "scala-csv" % "1.3.5"
// https://mvnrepository.com/artifact/org.apache.spark/spark-core
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.4.0"
// https://mvnrepository.com/artifact/org.apache.spark/spark-sql
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.0"
// https://mvnrepository.com/artifact/com.microsoft.sqlserver/mssql-jdbc
libraryDependencies += "com.microsoft.sqlserver" % "mssql-jdbc" % "6.1.0.jre8"
libraryDependencies += "com.databricks" % "spark-xml_2.11" % "0.4.1"
// https://mvnrepository.com/artifact/com.typesafe.akka/akka-actor
libraryDependencies += "com.typesafe.akka" %% "akka-actor" % "2.5.19"
// https://mvnrepository.com/artifact/com.typesafe.akka/akka-http
libraryDependencies += "com.typesafe.akka" %% "akka-http" % "10.1.5"
// https://mvnrepository.com/artifact/com.typesafe.akka/akka-stream
libraryDependencies += "com.typesafe.akka" %% "akka-stream" % "2.5.19"
// https://mvnrepository.com/artifact/org.apache.livy/livy-core
libraryDependencies += "org.apache.livy" %% "livy-core" % "0.5.0-incubating"
使用scala和spark的代码
import org.apache.spark.sql.SparkSession
object sparkXml {
def main(args: Array[String]): Unit = {
val spark = SparkSession.
builder.master("local[*]")
//.config("spark.debug.maxToStringFields", "100")
.appName("Insight Application Big Data")
.getOrCreate()
val df = spark.read
.format("com.databricks.spark.xml")
.option("rowTag", "book")
.load("src/main/resources/in/books.xml")
df.printSchema()
}
}
错误消息
Exception in thread "main" java.lang.NoClassDefFoundError: scala/Product$class
at com.databricks.spark.xml.XmlRelation.<init>(XmlRelation.scala:35)
at com.databricks.spark.xml.DefaultSource.createRelation(DefaultSource.scala:65)
at com.databricks.spark.xml.DefaultSource.createRelation(DefaultSource.scala:43)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:318)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)
at com.amkcambodia.insight.app.components.sparkXml$.main(sparkXml.scala:16)
at com.amkcambodia.insight.app.components.sparkXml.main(sparkXml.scala)
Caused by: java.lang.ClassNotFoundException: scala.Product$class
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 9 more
18/12/16 07:15:17 INFO SparkContext: Invoking stop() from shutdown hook
答案 0 :(得分:2)
当前,Scala 2.12支持的com.databricks-spark-xml“软件包在Maven回购中不可用 https://mvnrepository.com/artifact/com.databricks/spark-xml
降级到Scala 2.11应该可以解决此问题。请尝试以下版本更改
scalaVersion := "2.11.12"
libraryDependencies += "com.databricks" % "spark-xml_2.11" % "0.4.1"