我试图在eclipse中使用Scala语言中的Spark MLlib算法。在编译期间没有问题,并且在运行时有一个错误说" NoSuchMethodError"。
这是我的代码#Copied
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.regression.LinearRegressionWithSGD
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib._
object LinearRegression {
def truncate(k: Array[String], n: Int): List[String] = {
var trunced = k.take(n - 1) ++ k.drop(n)
// println(trunced.length)
return trunced.toList
}
}
def main(args: Array[String]): Unit = {
val sc = new SparkContext(new SparkConf().setAppName("linear regression").setMaster("local"))
//Loading Data
val data = sc.textFile("D://Innominds//DataSets//Regression//Regression Dataset.csv")
println("Total no of instances :" + data.count())
//Split the data into training and testing
val split = data.randomSplit(Array(0.8, 0.2))
val train = split(0).cache()
println("Training instances :" + train.count())
val test = split(1).cache()
println("Testing instances :" + test.count())
//Mapping the data
val trainingRDD = train.map {
line =>
val parts = line.split(',')
//println(parts.length)
LabeledPoint(parts(5).toDouble, Vectors.dense(truncate(parts, 5).map(x => x.toDouble).toArray))
}
val testingRDD = test.map {
line =>
val parts = line.split(',')
LabeledPoint(parts(5).toDouble, Vectors.dense(truncate(parts, 5).map(x => x.toDouble).toArray))
}
val model = LinearRegressionWithSGD.train(trainingRDD, 20)
val predict = testingRDD.map { x =>
val score = model.predict(x.features)
(score, x.label)
}
val loss = predict.map {
case (p, l) =>
val err = p - l
err * err
}.reduce(_ + _)
val rmse = math.sqrt(loss / test.count())
println("Test RMSE = " + rmse)
sc.stop()
}
在开发模型时出现错误,即
Var model = LInearRegressionWithSGD(trainingRDD,20).
此行之前的打印语句将完美地在控制台上打印值。
pom.Xml中的依赖关系是:
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.specs</groupId>
<artifactId>specs</artifactId>
<version>1.2.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.11</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>14.0.1</version>
</dependency>
</dependencies>
日食错误:
15/03/19 15:11:32 INFO SparkContext: Created broadcast 6 from broadcast at GradientDescent.scala:185
Exception in thread "main" java.lang.NoSuchMethodError: org.apache.spark.rdd.RDD.treeAggregate$default$4(Ljava/lang/Object;)I
at org.apache.spark.mllib.optimization.GradientDescent$$anonfun$runMiniBatchSGD$1.a pply$mcVI$sp(GradientDescent.scala:189)
at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:166)
at org.apache.spark.mllib.optimization.GradientDescent$.runMiniBatchSGD(GradientDes cent.scala:184)
at org.apache.spark.mllib.optimization.GradientDescent.optimize(GradientDescent.sca la:107)
at org.apache.spark.mllib.regression.GeneralizedLinearAlgorithm.run(GeneralizedLine arAlgorithm.scala:263)
at
org.apache.spark.mllib.regression.GeneralizedLinearAlgorithm.run(GeneralizedLine arAlgorithm.scala:190)
at org.apache.spark.mllib.regression.LinearRegressionWithSGD$.train(LinearRegressio n.scala:150)
at org.apache.spark.mllib.regression.LinearRegressionWithSGD$.train(LinearRegressio n.scala:184)
at Algorithms.LinearRegression$.main(LinearRegression.scala:46)
at Algorithms.LinearRegression.main(LinearRegression.scala)
答案 0 :(得分:1)
您正在使用spark-core
1.2.1和spark-mllib
1.3.0。确保对两个依赖项使用相同的版本。