Question

请考虑以下事项：

library(dplyr)
library(caret)

set.seed(42)

levels(iris$Species) <- list("setosa" = c("setosa"), "not_setosa" = c("versicolor", "virginica"))
train <- sample.int(0.35*nrow(iris))
test <- sample(setdiff((1:nrow(iris)), train), 0.4 * nrow(iris))

v <- seq(0.1, 0.8, by = 0.05)

for (i in 1:length(v)){
  m <- train(factor(Species) ~ .,
             data = iris %>% slice(train),
             method = "regLogistic",
             trControl = trainControl(method = "repeatedcv",
                                      number = 10,
                                      repeats = 3),
             tuneGrid = expand.grid(.cost = 1,
                                    .loss = c("L1", "L2_dual", "L2_primal"),
                                    .epsilon = seq(0.001, 0.01, length.out = 5)),
             metric = "Accuracy",
             preProcess = c("center", "scale"),
             cutoff = c(v[i], 1-v[i]))
  print(paste0("CUTOFF: ", v[i], " WITH ACCURACY ", 
        mean(predict(m, iris %>% slice(test)) == (iris %>% slice(test) %>% .$Species))))
}

[1] "CUTOFF: 0.1 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.15 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.2 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.25 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.3 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.35 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.4 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.45 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.5 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.55 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.6 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.65 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.7 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.75 WITH ACCURACY 0.966666666666667"
[1] "CUTOFF: 0.8 WITH ACCURACY 0.966666666666667"

我对用于选择逻辑回归模型的Accuracy指标的基础细节感到好奇。在我读过的各种网站上（caret的官方文档没有此内容），分类阈值为0.5。有没有办法在运行train()时更改此阈值？ cutoff中的train()参数似乎没有正确地执行此操作 - 首先，当截止值发生变化时，准确性保持不变是不合理的。

regLogistic：改变截止值以获得准确性

0 个答案: