是否有可能在Apache Spark中的多类分类问题中找到错误度量(精度和召回率)。我正在使用来自Spark的MlLib的Logistic回归来构建我的模型,并想使用错误度量来评估我的模型。
答案 0 :(得分:0)
假设您的测试数据在 $.ajax({
url: queryURL,
method: "GET",
}).done(function(response) {
var events = response.events;
for (var i = 0; i < 3; i++) {
var tRow = $("<tr>");
var url = events[i].url
console.log(events[i].url);
tRow.append($("<td>").text(events[i].name.text));
tRow.append($("<td>").html(url));
tRow.append($("<td>").text(events[i].start.local));
tRow.append($("<td>").text(events[i].end.local));
$("#eventTable").append(tRow);
}
eventDB.push({
event,
address,
distance,
})
test
混乱矩阵
import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS import org.apache.spark.mllib.evaluation.MulticlassMetrics import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLUtils val predictionAndLabels = test.map { case LabeledPoint(label, features) => val prediction = model.predict(features) (prediction, label) } val metrics = new MulticlassMetrics(predictionAndLabels)
总体统计
println("Confusion matrix:") println(metrics.confusionMatrix)
按标签精度
val accuracy = metrics.accuracy println("Summary Statistics") println(s"Accuracy = $accuracy")
按标签调用
val labels = metrics.labels labels.foreach { l => println(s"Precision($l) = " + metrics.precision(l)) }
标签误报率
labels.foreach { l => println(s"Recall($l) = " + metrics.recall(l)) }
按标签的F量测
labels.foreach { l => println(s"FPR($l) = " + metrics.falsePositiveRate(l)) }
加权统计
labels.foreach { l => println(s"F1-Score($l) = " + metrics.fMeasure(l)) }