尝试运行此代码时:
val conf = new SparkConf()
.setMaster("local[1]")
.setAppName("Small")
.set("spark.executor.memory", "2g")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
val df = sc.parallelize(Array((1,30),(2,10),(3,20),(1,10)(2,30))).toDF("books","readers")
val results = df.join(
df.select($"books" as "r_books", $"readers" as "r_readers"),
$"readers" === $"r_readers" and $"books" < $"r_books"
)
.groupBy($"books", $"r_books")
.agg($"books", $"r_books", count($"readers"))
在SBT控制台下启动以下build.sbt:
name := "Small"
version := "1.0"
scalaVersion := "2.10.4"
libraryDependencies += "org.apache.spark" %% "spark-core" % "1.3.1"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "1.3.1"
返回错误:
scala.reflect.internal.MissingRequirementError:JavaMirror中的类org.apache.spark.sql.catalyst.ScalaReflection与java.net.URLClassLoader@13a9a4f9 ...
有什么想法吗?
答案 0 :(得分:0)
你有一些拼写错误,最后你不需要导入最后一个函数计数org.apache.spark.sql.functions._
检查一下,我修正了拼写错误并添加了导入
import org.apache.spark.{ SparkContext, SparkConf }
import org.apache.spark.sql.functions._
/**
* Created by anquegi on 01/06/15.
*/
object QSpark162015 extends App {
val conf = new SparkConf()
.setMaster("local[2]")
.setAppName("QSpark162015")
.set("spark.executor.memory", "2g")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
val df = sc.parallelize(Array((1, 30), (2, 10), (3, 20), (1, 10), (2, 30))).toDF("books", "readers")
val results = df.join(
df.select($"books" as "r_books", $"readers" as "r_readers"),
$"readers" === $"r_readers" and $"books" < $"r_books"
)
.groupBy($"books", $"r_books")
.agg($"books", $"r_books", count($"readers"))
results.foreach(println _)
sc.stop()
}