我遇到了创建Dataframe的问题,获得了重载的方法值....
import org.apache.spark.sql.types.DataTypes._
import org.apache.spark.sql.types.{StructType, StructField, StringType, DoubleType}
import org.apache.spark.mllib.linalg.{Vector, Vectors}
val schemaString = "prediction"
val schema = StructType(schemaString.split(" ").map(fieldName ⇒ StructField(fieldName, DoubleType, true)))
val preds = predictions.select("probability").rdd.map(row =>
(row.getAs[Vector](0).toString)).first.split(",").map(_.trim replaceAll ("[\\[\\]\"]", "")).map(_.toDouble).zipWithIndex
val sortedPreds = preds.sortBy(_._1).reverse
val doubles = sortedPreds map { case (score, toolIndex) => toolIndex.toDouble }
// case class myRow(i: Double, j: Double, k: Double, m: Double, z: Double)
val queryRDDRow = sc.parallelize(Seq(doubles))
val predDF = sqlContext.createDataFrame(queryRDDRow, schema)