我正在使用带有Spark MLlib的Spark流来评估朴素的贝叶斯模型。实际上,由于我无法将JavaPairDStream的对象转换为RDD来计算精度,因此我无法继续前进。预测和标签的结果存储在这个JavaPairDStream中,但我想浏览它们中的每一个并进行比较以计算准确度。
我将发布我的代码以使我的问题更加清晰,代码在计算精度的部分引发异常(运算符/未定义参数类型JavaDStream,double)(因为这种方式只是使用JavaPairRDD没有),我需要一个帮助来计算JavaPairDStream的准确性。
编辑:我编辑了代码,我现在的问题是如何读取精度值JavaDStream,然后为每批数据累计这个值来计算所有数据的准确性。
public static JSONArray testSparkStreaming(){
SparkConf sparkConf = new SparkConf().setAppName("My app").setMaster("local[2]").set("spark.driver.allowMultipleContexts", "true");
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.milliseconds(500));
String savePath = "path to saved model";
final NaiveBayesModel savedModel = NaiveBayesModel.load(jssc.sparkContext().sc(), savePath);
JavaDStream<String> data = jssc.textFileStream("path to CSV file");
JavaDStream<LabeledPoint> testData = data.map(new Function<String, LabeledPoint>() {
public LabeledPoint call(String line) throws Exception {
List<String> featureList = Arrays.asList(line.trim().split(","));
double[] points = new double[featureList.size()-1];
double classLabel = Double.parseDouble(featureList.get(featureList.size() - 1));
for (int i = 0; i < featureList.size()-1; i++){
points[i] = Double.parseDouble(featureList.get(i));
}
return new LabeledPoint(classLabel, Vectors.dense(points));
}
});
JavaPairDStream<Double, Double> predictionAndLabel = testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
public Tuple2<Double, Double> call(LabeledPoint p) {
return new Tuple2<Double, Double>(savedModel.predict(p.features()), p.label());
}
});
JavaDStream<Long> accuracy = predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
public Boolean call(Tuple2<Double, Double> pl) throws JSONException {
return pl._1().equals(pl._2());
}
}).count();
jssc.start();
jssc.awaitTermination();
System.out.println("*************");
JSONArray jsonArray = new JSONArray();
JSONObject obj = new JSONObject();
jsonArray.put(obj);
obj = new JSONObject();
obj.put("Accuracy", accuracy*100 + "%");
jsonArray.put(obj);
return jsonArray;
}