Spark Scala映射到REPL中的Case类

时间:2016-02-23 07:08:38

标签: scala apache-spark

在scala REPL(使用Spark)中,我设置并获得以下内容:

scala> import Math.{PI,cos,sin}    
import Math.{PI, cos, sin}

scala> import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.Vectors

scala> import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.linalg.Vector

scala> import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.RDD

scala> import com.github.fommil.netlib.BLAS;
import com.github.fommil.netlib.BLAS

scala> case class CoordCC(x:Double, y:Double, z:Double)
defined class CoordCC

scala> case class IndivMDzCC(info:String, coord:CoordCC)
defined class IndivMDzCC

scala> val radius = 3959.0
radius: Double = 3959.0

scala> val radianPerDegree = PI / 180.0
radianPerDegree: Double = 0.017453292519943295

scala> val pathMD = "/Users/stephan/Galvanize/QuickLabs/Dec16-Lab1-Data-Science-2/mcdonalds.csv"
pathMD: String = /Users/stephan/Galvanize/QuickLabs/Dec16-Lab1-Data-Science-2/mcdonalds.csv

scala> val dataMD = sc.textFile(pathMD)
dataMD: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[3] at textFile at <console>:34

scala> dataMD.take(2)
res2: Array[String] = Array(-149.868307,61.140133,"McDonalds [WM]-Anchorage,AK","8900 Old Seward Hwy [WM], Anchorage,AK, (907) 344-5831", -149.88113,61.192426,"McDonalds [WM]-Anchorage,AK","3101 A St [WM], Anchorage,AK, (907) 561-5137")

scala> val pMD = dataMD.
     |     map(s => 
     |         // double
     |         s.split(",").
     |         take(2).
     |         map(_.toDouble)
     |     ).
     |     map(ar => {
     |         val R = 3959.0
     |         val tmp = PI / 180.0
     |         val lat = tmp * ar(0)
     |         val lon = tmp * ar(1)
     |         val tem = R * cos(lat) 
     |         val x = tem * cos(lon)
     |         val y = tem * sin(lon)
     |         val z = R * sin(lat)
     |         IndivMDzCC("something", CoordCC(x,y,z))
     |      })
pMD: org.apache.spark.rdd.RDD[IndivMDzCC] = MapPartitionsRDD[5] at map at <console>:47

scala> val pMD = dataMD.
     |   map(x => {
     |     val s = x.split(",")
     |     val lat = radianPerDegree * s(0).toDouble
     |     val lon = radianPerDegree * s(1).toDouble
     |     val tem = radius * cos(lat) 
     |     val x = tem * cos(lon)
     |     val y = tem * sin(lon)
     |     val z = radius * sin(lat)
     |     IndivMDzCC(s(2) +","+ s(3), CoordCC(x,y,z))
     |   })
<console>:47: error: recursive value s needs type
           val lat = radianPerDegree * s(0).toDouble
                                       ^
<console>:46: error: value split is not a member of Double
           val s = x.split(",")
                     ^

scala> 

注意:在上面的take(2)中,我有两个字符串,分别是每个字符串中的纬度,经度,名称和地址。

我尝试过多种方式更改地图的内部代码,有时我会获得递归定义和其他错误。我不确定为什么会发生这种情况......必须有一个更基本的问题。我做错了什么(在更多新人的条款中)?

0 个答案:

没有答案