package ml_prj_01_01
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LinearRegressionWithSGD
object linear_02 {
def par1(x: String): Array[Double]={
val a= x.split(",")
val b=a(0).toDouble
val c=a(1).toDouble
val d=a(2).toDouble
val e=a(3).toDouble
val f=a(4).toDouble
val g=a(5).toDouble
val h=a(6).toDouble
val i=a(7).toDouble
val j=a(8).toDouble
val k=a(9).toDouble
val l=a(10).toDouble
return Array(b,c,d,e,f,g,h,i,j,k,l)
}
def par2(x: Array[Double]): LabeledPoint={
return new
LabeledPoint(x(0),Vectors.dense(x(1),x(2),x(3)
,x(4),x(5),x(6),x(7),x(8),x(
9),x(10)) )
}
def main(args: Array[String]) {
val conf = new
SparkConf().setAppName("linear_02").setMaster("local[*]")
val sc = new SparkContext(conf)
val file = sc.textFile("/home/user/Downloads/Prj_files/house price
linear_reg/housedata(linear regression)/pricing_finalized")
val todouble= file.map(x=> par1(x))
val tolp= todouble.map(x=> par2(x))
val training= tolp.filter(x=> x.features(2) >900)
val testing= tolp.filter(x=> x.features(2) <=900)
val model=
LinearRegressionWithSGD.train(training,50,0.0000006,1.0)
val predictions= testing.map(x=>
(x.label,model.predict(x.features)) )
predictions.take(10).foreach(println)
}
}
在上面的代码中,我只获得了负面的预测,采用特征和标签来预测价格会产生负面的输出,这是不可能的。因此,甚至均方误差和均方根误差也无法计算。
与文件相对应的列名称为:- 定价,卧室,浴室,起居,平方英尺,地板,海滨,景观,状况,面积,地下室
以下链接提供了数据集文件: