在Eclipse / Spark IDE中使用Execute Exception进行错误的符号引用

时间:2017-03-03 11:15:23

标签: java eclipse apache-spark

我想在Eclipse上试验Spark ML,我必须首先执行一些数据操作。下面的代码显示了后者。


package org.test.spark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

import org.apache.spark.ml.classification.RandomForestClassifier
import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
import org.apache.spark.ml.feature.StringIndexer
import org.apache.spark.ml.feature.VectorAssembler

import org.apache.spark.ml.tuning.{ ParamGridBuilder, CrossValidator }
import org.apache.spark.ml.{ Pipeline, PipelineStage }

import org.apache.spark.rdd.RDD

import org.apache.spark.sql._

object DataTest{

  import scala.reflect.runtime.universe.TypeTag

  case class Credit(
    creditability: Double,
    balance: Double, duration: Double, history: Double, purpose: Double, amount: Double,
    savings: Double, employment: Double, instPercent: Double, sexMarried: Double, guarantors: Double,
    residenceDuration: Double, assets: Double, age: Double, concCredit: Double, apartment: Double,
    credits: Double, occupation: Double, dependents: Double, hasPhone: Double, foreign: Double

  def main(args: Array[String]) = {

    //Start the Spark context
    val conf = new SparkConf()
    val sc = new SparkContext(conf)

    val sqlContext= new org.apache.spark.sql.SQLContext(sc)

    import sqlContext.implicits._

  // function to create a  Credit class from an Array of Double
    def parseCredit(line: Array[Double]): Credit = {
      line(1) - 1, line(2), line(3), line(4) , line(5),
      line(6) - 1, line(7) - 1, line(8), line(9) - 1, line(10) - 1,
      line(11) - 1, line(12) - 1, line(13), line(14) - 1, line(15) - 1,
      line(16) - 1, line(17) - 1, line(18) - 1, line(19) - 1, line(20) - 1

// function to transform an RDD of Strings into an RDD of Double
   def parseRDD(rdd: RDD[String]): RDD[Array[Double]] = {

  val creditDF= parseRDD(sc.textFile("germancredit.csv")).map(parseCredit).toDF().cache()





  sqlContext.sql("SELECT creditability, avg(balance) as avgbalance, avg(amount) as avgamt, avg(duration) as avgdur  FROM credit GROUP BY creditability ").show

  //define the feature columns to put in the feature vector
  val featureCols = Array("balance", "duration", "history", "purpose", "amount",
    "savings", "employment", "instPercent", "sexMarried",  "guarantors",
    "residenceDuration", "assets",  "age", "concCredit", "apartment",
    "credits",  "occupation", "dependents",  "hasPhone", "foreign" )

//set the input and output column names

  val assembler = new VectorAssembler().setInputCols(featureCols).setOutputCol("features")

//return a dataframe with all of the  feature columns in  a vector column
  val df2 = assembler.transform(creditDF)

// the transform method produced a new column: features.




当我运行mvn clean install时,我得到以下内容:


错误:错误的符号引用。 PipelineStage.class中的签名是指术语内部   在包org.apache.spark中,这是不可用的。   它可能完全从当前类路径或版本中丢失   类路径可能与编译PipelineStage.class时使用的版本不兼容。   val assembler = new VectorAssembler()。setInputCols(featureCols).setOutputCol(“features”)

似乎问题发生在电话val assembler = new VectorAssembler().setInputCols(featureCols).setOutputCol("features")

当我运行mvn clean package时,我得到:


无法执行目标   org.scala-tools:maven-scala-plugin:2.15.2:在项目上编译(默认)   spark:wrap:org.apache.commons.exec.ExecuteException:进程已退出   错误:1(退出值:1) - > [帮助1]


<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

            <name>Scala-tools Maven2 Repository</name>







            <!-- mixed scala/java compile -->





            <!-- for fatjar -->
                <!--This plugin's configuration is used to store Eclipse m2e settings 
                    only. It has no influence on the Maven build itself. -->



1 个答案:

答案 0 :(得分:0)



在这里,有些东西使用版本1.6.0和其他2.0.1 - 我认为火花神器都需要它们的版本相同。