使用R中的虹膜数据集,我试图将NaïveBayes分类器拟合到虹膜训练数据中,这样我就可以为朴素贝叶斯分类器生成训练数据集(预测与实际)的混淆矩阵,什么是NaïveBayes分类器的错误分类率?
到目前为止,这是我的代码:
iris$spl=sample.split(iris,SplitRatio=0.8)
train=subset(iris, iris$spl==TRUE)
test=subset(iris, iris$spl==FALSE)
iris.nb <- naiveBayes(Species~.,data = train)
iris.nb
nb_test_predict <- predict(iris.nb, train)
有关如何解决此问题的任何建议吗?
答案 0 :(得分:6)
包caret
包括confusionMatrix
函数,该函数返回非常完整的输出。
library(e1071)
library(caTools)
library(caret)
iris$spl = sample.split(iris, SplitRatio = 0.8)
train <- subset(iris, iris$spl == TRUE)
test <- subset(iris, iris$spl == FALSE)
iris.nb <- naiveBayes(Species ~ ., data = train)
nb_train_predict <- predict(iris.nb, test[ , names(test) != "Species"])
cfm <- confusionMatrix(nb_train_predict, test$Species)
cfm
# Confusion Matrix and Statistics
#
# Reference
# Prediction setosa versicolor virginica
# setosa 17 0 0
# versicolor 0 14 1
# virginica 0 2 16
#
# Overall Statistics
#
# Accuracy : 0.94
# 95% CI : (0.8345, 0.9875)
# No Information Rate : 0.34
# P-Value [Acc > NIR] : < 2.2e-16
#
# Kappa : 0.9099
# Mcnemar's Test P-Value : NA
#
# Statistics by Class:
#
# Class: setosa Class: versicolor Class: virginica
# Sensitivity 1.00 0.8750 0.9412
# Specificity 1.00 0.9706 0.9394
# Pos Pred Value 1.00 0.9333 0.8889
# Neg Pred Value 1.00 0.9429 0.9688
# Prevalence 0.34 0.3200 0.3400
# Detection Rate 0.34 0.2800 0.3200
# Detection Prevalence 0.34 0.3000 0.3600
# Balanced Accuracy 1.00 0.9228 0.9403
将混淆矩阵显示为ggplot graphic:
library(ggplot2)
library(scales)
ggplotConfusionMatrix <- function(m){
mytitle <- paste("Accuracy", percent_format()(m$overall[1]),
"Kappa", percent_format()(m$overall[2]))
p <-
ggplot(data = as.data.frame(m$table) ,
aes(x = Reference, y = Prediction)) +
geom_tile(aes(fill = log(Freq)), colour = "white") +
scale_fill_gradient(low = "white", high = "steelblue") +
geom_text(aes(x = Reference, y = Prediction, label = Freq)) +
theme(legend.position = "none") +
ggtitle(mytitle)
return(p)
}
ggplotConfusionMatrix(cfm)
答案 1 :(得分:1)
您似乎在寻找table
功能
library(e1071)
library(caTools)
spl=sample.split(iris$Species,SplitRatio=0.8)
train=subset(iris, spl==TRUE)
test=subset(iris, spl==FALSE)
iris.nb <- naiveBayes(Species~.,data = train)
iris.nb
nb_train_predict <- predict(iris.nb, train[,!names(train) %in% "Species"])
table(nb_train_predict,train$Species)
输出
nb_train_predict setosa versicolor virginica
setosa 40 0 0
versicolor 0 38 4
virginica 0 2 36
误分类率可以计算为
mean(nb_train_predict != train$Species)