我想使用线性SVM进行分类。这是我在使用Mllib时遇到的问题。我正在使用CDH 5.4.4和Spark 1.3与MLlib相关性在我的pom文件中指定如下:
<properties>
<uber.jar.name>linearsvm.jar</uber.jar.name>
<cdh.version>2.6.0-cdh5.4.4</cdh.version>
<cdh.spark.version>1.3.0-cdh5.4.4</cdh.spark.version>
</properties>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.10</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>${cdh.spark.version}</version>
<exclusions>
<exclusion>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
</exclusion>
</exclusions>
</dependency>
这是我训练模型的代码
def main() {
val numIterations = 100
// Run training algorithm to build the model
val model = SVMWithSGD.train(training, numIterations)
// Save the trained model
model.save(spark,"mymodelpath")
}
这是我加载该模型的另一个类
def performScoring (test: RDD[LabeledPoint] ) {
// load the saved model
val savedModel = SVMModel.load(spark, "mymodelpath")
savedModel.clearThreshold()
// Compute raw scores on the test set.
val scoreAndLabels = test.map { point =>
val score = savedModel.predict(point.features)
(score, point.label)
}
// Get evaluation metrics.
val metrics = new BinaryClassificationMetrics(scoreAndLabels)
val auROC = metrics.areaUnderROC()
println("Area under ROC = " + auROC)
}
以下是我得到的例外情况:
Exception in thread "main" java.lang.IncompatibleClassChangeError: Implementing class
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:800)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:449)
at java.net.URLClassLoader.access$100(URLClassLoader.java:71)
at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:190)
at org.apache.spark.mapred.SparkHadoopMapRedUtil$class.firstAvailableClass(SparkHadoopMapRedUtil.scala:61)
at org.apache.spark.mapred.SparkHadoopMapRedUtil$class.newJobContext(SparkHadoopMapRedUtil.scala:27)
at org.apache.spark.SparkHadoopWriter.newJobContext(SparkHadoopWriter.scala:40)
at org.apache.spark.SparkHadoopWriter.getJobContext(SparkHadoopWriter.scala:182)
at org.apache.spark.SparkHadoopWriter.preSetup(SparkHadoopWriter.scala:64)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1057)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:954)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:863)
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1290)
at org.apache.spark.mllib.classification.impl.GLMClassificationModel$SaveLoadV1_0$.save(GLMClassificationModel.scala:61)
at org.apache.spark.mllib.classification.SVMModel.save(SVM.scala:84)
at LinearSVM$.main(LinearSVM.scala:32)
at LinearSVM.main(LinearSVM.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)