我想通过重复的交叉验证将带有调整参数的惩罚性回归模型拟合。我想报告最优调整参数和系数估计值。
我附上R
代码以供说明。
此代码使弹性网适合前列腺数据。
alpha
中选择(0, 0.25, 0.5, 0.75, 1)
个值lambda
和
alpha
)。lambda
和alpha
值的平均值
50次重复作为最佳值。test MSE
。这是重复交叉验证的正确方法吗?
library(ElemStatLearn)
library(glmnet)
x <- model.matrix(lpsa ~ .-train, data = prostate)[, -1]
y <- prostate$lpsa
trainlab <- which(prostate$train=="TRUE")
testlab <- which(prostate$train=="FALSE")
trainx <- x[trainlab,]
trainy <- y[trainlab]
testx <- x[testlab,]
testy <- y[testlab]
trainsx <- scale(trainx)
trainsy <- trainy - mean(trainy)
testsx <- scale(testx)
testsy <- testy - mean(testy)
alpha <- c(0,0.25,0.5,0.75,1)
size.alpha <- length(alpha)
repetition <- 50
test.mse.lmin <- rep(NA, size.alpha)
results.lmin <- matrix(NA, nrow = repetition, ncol = 3)
test.mse.l1se <- rep(NA, size.alpha)
results.l1se <- matrix(NA, nrow = repetition, ncol = 3)
set.seed(1)
for(t in 1:repetition){
for(a in 1:size.alpha){
cv.model <- cv.glmnet(trainsx, trainsy, alpha = alpha[a])
#
lam.min <- cv.model$lambda.min
lam.1se <- cv.model$lambda.1se
#
coefs.lmin <- coef(glmnet(trainsx, trainsy, alpha = alpha[a]), s = lam.min)
coefs.l1se <- coef(glmnet(trainsx, trainsy, alpha = alpha[a]), s = lam.1se)
#
test.mse.lmin[a] <- mean((testsy - cbind(1, testsx) %*% coefs.lmin)^2)
test.mse.l1se[a] <- mean((testsy - cbind(1, testsx) %*% coefs.l1se)^2)
}
which.mse.min <- which.min(test.mse.lmin)
test.mse.min <- test.mse.lmin[which.mse.min]
alpha.min <- alpha[which.mse.min]
#
which.mse.1se <- which.min(test.mse.l1se)
test.mse.1se <- test.mse.lmin[which.mse.1se]
alpha.1se <- alpha[which.mse.1se]
#
results.lmin[t, ] <- c(lam.min, alpha.min, test.mse.min)
results.l1se[t, ] <- c(lam.1se, alpha.1se, test.mse.1se)
}
colnames(results.lmin) <- c("lambda", "alpha", "test.mse")
colnames(results.l1se) <- c("lambda", "alpha", "test.mse")
means.min <- colMeans(results.lmin)
means.1se <- colMeans(results.l1se)
lambda.opt.min <- means.min[1]
alpha.opt.min <- means.min[2]
coefs.opt.min <- coef(glmnet(trainsx, trainsy, alpha = alpha.opt.min), s = lambda.opt.min)
test.mse.opt.min <- mean((testsy - cbind(1, testsx) %*% coefs.opt.min)^2)
lambda.opt.1se <- means.1se[1]
alpha.opt.1se <- means.1se[2]
coefs.opt.1se <- coef(glmnet(trainsx, trainsy, alpha = alpha.opt.1se), s = lambda.opt.1se)
test.mse.opt.1se <- mean((testsy - cbind(1, testsx) %*% coefs.opt.1se)^2)