我很新火花
我可以使用 loadrisk.show()方法查看数据,但是当我创建对象 JavaRDD balRDD = loadrisk.javaRDD(); 时,我得到的是空指针
public class LoadBalRDD implements Serializable {
public JavaPairRDD getBalRDD(SQLContext sqlContext) {
Dataset<Row> loadrisk = sqlContext.read().format("com.databricks.spark.csv").option("header", "true")
.option("mode", "DROPMALFORMED").load("/home/data/test.csv");
loadrisk.show(); // able to see the result
JavaRDD<Row> balRDD = loadrisk.javaRDD(); // here not loading
JavaPairRDD<String, Balrdd> balRDDMap = balRDD.mapToPair(x -> {
String aml_acc_id = "";
if (!x.isNullAt(x.fieldIndex("aml_acc_id")))
aml_acc_id = x.getAs("aml_acc_id").toString();
Tuple2<String, Balrdd> tp = new Tuple2(x.getAs(x.fieldIndex("aml_acc_id")).toString(),
new Balrdd(aml_acc_id));
return tp;
}).repartitionAndSortWithinPartitions(new CustomAcctIdPartitioner());
return balRDDMap;
}
}