Question

我是R和机器学习的新手，我正在使用2个类的数据。我正在尝试进行交叉验证，但是当我尝试制作模型的混淆矩阵时，我得到一个错误，即所有参数必须具有相同的长度。我无法理解为什么我输入的内容长度不一样。任何正确方向的帮助都将受到赞赏。

library(MASS)
xCV = x[sample(nrow(x)),]

folds <- cut(seq(1,nrow(xCV)),breaks=10,labels=FALSE)

for(i in 1:10){

  testIndexes = which(folds==i,arr.ind=TRUE)
  testData = xCV[testIndexes, ]
  trainData = xCV[-testIndexes, ]

}
ldamodel = lda(class ~ ., trainData)
lda.predCV = predict(model)

conf.LDA.CV=table(trainData$class, lda.predCV$class)
print(conf.LDA.CV)

Answer 1

您的代码存在的问题是您没有在循环内进行建模和预测，只需为if (strlen($cache)<1) {生成一个testIndexes，因为您覆盖了所有其他代码。

以下代码将在虹膜数据上完成：

i == 10

生成折叠：

library(MASS)
data(iris)

或者如果你想要相同大小的折叠：

set.seed(1)
folds <- sample(1:10, size = nrow(irisCV), replace = T) #5 fold CV
table(folds)
#output
folds
 1  2  3  4  5  6  7  8  9 10 
10 12 17 16 21 13 17 20 12 12

通过将模型设置为9折并预测保持运行来运行模型：

set.seed(1)
folds <- sample(rep(1:10, length.out = nrow(irisCV)), size = nrow(irisCV), replace = F)
table(folds)
#output
folds
 1  2  3  4  5  6  7  8  9 10 
15 15 15 15 15 15 15 15 15 15

这会生成一个保留预测列表，将其组合到数据框中：

CV_lda <- lapply(1:10, function(x){ 
  model <- lda(Species ~ ., iris[folds != x, ])
  preds <- predict(model,  iris[folds == x,], type="response")$class
  return(data.frame(preds, real = iris$Species[folds == x]))
})

产生混淆矩阵：

CV_lda <- do.call(rbind, CV_lda)

Answer 2

使用hglm.data

中的种子数据集

library(MASS)
data(seeds, package = "hglm.data")


seedsCV = seeds[sample(nrow(seeds)),]
folds <- cut(seq(1,nrow(seedsCV)),breaks=10,labels=FALSE)

lda.predCV <- integer(length(folds))

for(i in 1:10){

  testIndexes = which(folds==i,arr.ind=TRUE)
  testData = seedsCV[testIndexes, ]
  trainData = seedsCV[-testIndexes, ]

  ldamodel = lda(extract ~ ., trainData)

  lda.predCV[testIndexes] <- predict(ldamodel, testData)$class

}

lda.predCV <- factor(lda.predCV, labels = c("Bean", "Cucumber"))

打印混淆矩阵和准确度：

conf <- table(pred=lda.predCV, actual=seedsCV$extract)
accuracy <- sum(diag(conf))/sum(conf)

> conf
          actual
pred       Bean Cucumber
  Bean       10        0
  Cucumber    0       11


> accuracy
[1] 1

如何从交叉验证中产生混淆矩阵？

2 个答案: