根据独立的测试集绘制重采样结果

时间:2018-07-12 22:36:17

标签: r data-visualization r-caret

我目前已经训练了一些预测模型,并使用插入符号包获得了重采样估计值。在运行模型之前,我还创建了一个独立的测试集,用于预测和评估ROC AUC分数。

我目前正在寻求在点图可视化中将两者融合在一起,类似于Max Khun的Applied Predictive Modelingenter image description here

我该如何实现?到目前为止,这是我对模拟数据的了解。

library(caret)
library(tidyverse)

# Simulate Data
data <- twoClassSim(n = 500, linearVars = 8)
y <- data[, ncol(data)]

# Add binary data
set.seed(337)
data <- bind_cols(data[, -ncol(data)], LPH07_1(n = 500, factors = FALSE)[1:3]) 
data <- bind_cols(data, as.data.frame(y))

# Train/test
idx <- createDataPartition(data$y, p = .8, list = FALSE) 
train <- data[idx, ]
test <- data[-idx, ]

# Index
dummy_index <- createMultiFolds(y = train$y, times = 3)

# Five stats summary
fiveStats <- function(...) c(twoClassSummary(...), defaultSummary(...))



# Train control 
trCtrl <-  trainControl(method = "repeatedcv",
                        number = 10,
                        repeats = 3,
                        classProbs = TRUE,
                        verboseIter = TRUE,
                        summaryFunction = fiveStats)



# Model 1 
rf_grid <- expand.grid(mtry = c(1:7))

set.seed(337)
rf_model <- train(train[, -ncol(train)],
                 train$y,
                 trControl = trCtrl,
                 method = "rf",
                 metric = 'ROC',
                tuneGrid = rf_grid,
                preProcess = c('center', 'scale'))


# Model 2 
set.seed(337)
glm_model <- train(train[, -ncol(train)],
                    train$y, 
                    method = 'glm',
                    family = 'binomial',
                   metric = 'ROC',
                    preProcess = c('center', 'scale'),
                   trControl = trCtrl)



# Model 3
set.seed(337)
knn_model <- train(train[, -ncol(train)],
                train$y,
                trControl = trCtrl,
                method = "kknn",
                metric = 'ROC',
                preProcess = c('center', 'scale'))



# Resampling list
model_list <- list('Random Forest' = rf_model,
                   'Logistic Regression' = glm_model,
                   'kNN' = knn_model)
model_resmaples <- resamples(model_list)


# Test set predictions
auc_scores <- function(model) as.numeric(roc(test$y, predict(model, test, type = 'prob')[, 1], ci = TRUE)$ci)
auc_scores_testset <- dplyr::bind_cols(rf_AUC = auc_scores(rf_model),
                 glm_AUC = auc_scores(glm_model),
                 knn_AUC = auc_scores(knn_model))



# Dotplot for resamples
dotplot(model_resmaples, metric = 'ROC')

0 个答案:

没有答案