在Spark 1.6中使用LinearRegressionWithSGD获得负面预测

时间:2018-10-31 15:04:57

标签: apache-spark linear-regression apache-spark-mllib

package ml_prj_01_01

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LinearRegressionWithSGD

object linear_02 {

  def par1(x: String): Array[Double]={
    val a= x.split(",")
    val b=a(0).toDouble
    val c=a(1).toDouble
    val d=a(2).toDouble
    val e=a(3).toDouble
    val f=a(4).toDouble
    val g=a(5).toDouble
    val h=a(6).toDouble
    val i=a(7).toDouble
    val j=a(8).toDouble
    val k=a(9).toDouble
    val l=a(10).toDouble


    return Array(b,c,d,e,f,g,h,i,j,k,l)
  }

  def par2(x: Array[Double]): LabeledPoint={
    return new

  LabeledPoint(x(0),Vectors.dense(x(1),x(2),x(3)
  ,x(4),x(5),x(6),x(7),x(8),x( 
  9),x(10)) )

  }

  def main(args: Array[String]) {
  val conf = new 
  SparkConf().setAppName("linear_02").setMaster("local[*]")
  val sc = new SparkContext(conf)



  val file = sc.textFile("/home/user/Downloads/Prj_files/house price 
  linear_reg/housedata(linear regression)/pricing_finalized")


  val todouble= file.map(x=> par1(x))
  val tolp= todouble.map(x=> par2(x))


  val training= tolp.filter(x=> x.features(2) >900)
  val testing= tolp.filter(x=> x.features(2) <=900)



  val model= 
  LinearRegressionWithSGD.train(training,50,0.0000006,1.0)
  val predictions= testing.map(x=> 
  (x.label,model.predict(x.features)) )

  predictions.take(10).foreach(println)


}

}

在上面的代码中,我只获得了负面的预测,采用特征和标签来预测价格会产生负面的输出,这是不可能的。因此,甚至均方误差和均方根误差也无法计算。

与文件相对应的列名称为:-  定价,卧室,浴室,起居,平方英尺,地板,海滨,景观,状况,面积,地下室

以下链接提供了数据集文件:

dataset_file

0 个答案:

没有答案