我正在运行r脚本来交叉验证ROC曲线的AUC。我有70个人的数据集; 24有病,46没有病。我列出了在Logistic回归模型中用于开发预测模型的八个变量。
问题是,当我使用cvAUC包中的iid_example时,它返回错误:“ Y0 [[v]]中的错误:下标超出范围”。
我寻求帮助堆栈溢出,并了解这可能是由于行数不正确而导致的,但我找不到。 有人可以帮我吗!。
R代码如下所示。 “黄金”是结果,其余变量是协变量。没有具体的暴露变量。
# Create CV folds (stratify by outcome)
.cvFolds <- function(Y, V){
Y0 <- split(sample(which(Y == 0)),
rep(1:V, length = length(which(Y == 0))))
Y1 <- split(sample(which(Y == 1)),
rep(1:V, length = length(which(Y == 1))))
folds <- vector("list", length = V)
for (v in seq(V)) {folds[[v]] <- c(Y0[[v]], Y1[[v]])}
return(folds)
}
# Train/test glm for each fold
.doFit <- function(v, folds, data){
fit <- glm(Y ~ ., data = data[-folds[[v]],], family = "binomial")
pred <- predict(fit, newdata = data[folds[[v]],], type = "response")
return(pred)
}
iid_example <- function(data, y = "V1", V = 10){
# Create folds
folds <- .cvFolds(Y = data$Y, V = V)
# CV train/predict
predictions <- unlist(sapply(seq(V), .doFit,
folds = folds, data = data))
# Re-order pred values
predictions[unlist(folds)] <- predictions
# Get CV AUC and confidence interval
out <- ci.cvAUC(predictions = predictions, labels = data$Y,
folds = folds, confidence = 0.95)
return(out)
}
# Load a training set with a binary outcome
train <- tb[ tb$zn == "Negative", c("inf", "tnf", "il2", "il6", "il10", "cd4_200", "cd8_1000","gold")]
library(cvAUC)
# Get cross-validated performance
set.seed(1)
out <- iid_example( data = train, y = "gold", V = 10 )