我已使用案例类将RDD转换为数据框,但我当前的数据有700列。我曾经提到过使用structtypes,但我找不到一个例子。希望有人可以在这里分享一个例子。谢谢。凯文
答案 0 :(得分:-2)
以下是使用structType的示例输入示例:
一个,1,2.0
B,2,3.0
import org.apache.spark.sql.Row
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.types.IntegerType
import org.apache.spark.sql.types.DoubleType
def getSchema(): StructType = {
val schema = new StructType(Array(
StructField("col_a", StringType, nullable = true),
StructField("col_b", IntegerType, nullable = true),
StructField("col_c", DoubleType, nullable = true)
))
schema
}
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val rdd = sc.textFile("/tmp/test").map(m => m.split(",", -1)).map(m => Row(m(0),m(1).toInt,m(2).toDouble))
val df = sqlContext.createDataFrame(rdd, getSchema)
df.show
+-----+-----+-----+
|col_a|col_b|col_c|
+-----+-----+-----+
| a| 1| 2.0|
| b| 2| 3.0|
+-----+-----+-----+