我目前已经训练了一些预测模型,并使用插入符号包获得了重采样估计值。在运行模型之前,我还创建了一个独立的测试集,用于预测和评估ROC AUC分数。
我目前正在寻求在点图可视化中将两者融合在一起,类似于Max Khun的Applied Predictive Modeling
。
我该如何实现?到目前为止,这是我对模拟数据的了解。
library(caret)
library(tidyverse)
# Simulate Data
data <- twoClassSim(n = 500, linearVars = 8)
y <- data[, ncol(data)]
# Add binary data
set.seed(337)
data <- bind_cols(data[, -ncol(data)], LPH07_1(n = 500, factors = FALSE)[1:3])
data <- bind_cols(data, as.data.frame(y))
# Train/test
idx <- createDataPartition(data$y, p = .8, list = FALSE)
train <- data[idx, ]
test <- data[-idx, ]
# Index
dummy_index <- createMultiFolds(y = train$y, times = 3)
# Five stats summary
fiveStats <- function(...) c(twoClassSummary(...), defaultSummary(...))
# Train control
trCtrl <- trainControl(method = "repeatedcv",
number = 10,
repeats = 3,
classProbs = TRUE,
verboseIter = TRUE,
summaryFunction = fiveStats)
# Model 1
rf_grid <- expand.grid(mtry = c(1:7))
set.seed(337)
rf_model <- train(train[, -ncol(train)],
train$y,
trControl = trCtrl,
method = "rf",
metric = 'ROC',
tuneGrid = rf_grid,
preProcess = c('center', 'scale'))
# Model 2
set.seed(337)
glm_model <- train(train[, -ncol(train)],
train$y,
method = 'glm',
family = 'binomial',
metric = 'ROC',
preProcess = c('center', 'scale'),
trControl = trCtrl)
# Model 3
set.seed(337)
knn_model <- train(train[, -ncol(train)],
train$y,
trControl = trCtrl,
method = "kknn",
metric = 'ROC',
preProcess = c('center', 'scale'))
# Resampling list
model_list <- list('Random Forest' = rf_model,
'Logistic Regression' = glm_model,
'kNN' = knn_model)
model_resmaples <- resamples(model_list)
# Test set predictions
auc_scores <- function(model) as.numeric(roc(test$y, predict(model, test, type = 'prob')[, 1], ci = TRUE)$ci)
auc_scores_testset <- dplyr::bind_cols(rf_AUC = auc_scores(rf_model),
glm_AUC = auc_scores(glm_model),
knn_AUC = auc_scores(knn_model))
# Dotplot for resamples
dotplot(model_resmaples, metric = 'ROC')