正确执行参数调整和test.mse的重复交叉验证

时间:2019-03-11 12:17:06

标签: r performance cross-validation repeat

我想通过重复的交叉验证将带有调整参数的惩罚性回归模型拟合。我想报告最优调整参数和系数估计值

我附上R代码以供说明。

此代码使弹性网适合前列腺数据。

  • 我从alpha中选择(0, 0.25, 0.5, 0.75, 1)个值
  • 我重复交叉验证程序50次。
  • 我保存了最佳(和1se)调整参数估计值(lambdaalpha)。
  • 在此过程之后,我取了lambdaalpha值的平均值 50次重复作为最佳值。
  • 然后,我使用这些值拟合模型,并报告系数估计值和test MSE

这是重复交叉验证的正确方法吗?

library(ElemStatLearn)
library(glmnet)

x  <- model.matrix(lpsa ~ .-train, data = prostate)[, -1]
y <- prostate$lpsa

trainlab <- which(prostate$train=="TRUE")
testlab <- which(prostate$train=="FALSE")
trainx <- x[trainlab,]
trainy <- y[trainlab]
testx <- x[testlab,]
testy <- y[testlab]

trainsx <- scale(trainx)
trainsy <- trainy - mean(trainy)
testsx <- scale(testx)
testsy <- testy - mean(testy)


alpha <- c(0,0.25,0.5,0.75,1)
size.alpha <- length(alpha)

repetition <- 50

test.mse.lmin <- rep(NA, size.alpha)
results.lmin <- matrix(NA, nrow = repetition, ncol = 3)

test.mse.l1se <- rep(NA, size.alpha)
results.l1se <- matrix(NA, nrow = repetition, ncol = 3)

set.seed(1)

for(t in 1:repetition){
  for(a in 1:size.alpha){
    cv.model <- cv.glmnet(trainsx, trainsy, alpha = alpha[a])
    #
    lam.min <- cv.model$lambda.min
    lam.1se <- cv.model$lambda.1se
    #
    coefs.lmin <- coef(glmnet(trainsx, trainsy, alpha = alpha[a]), s = lam.min)
    coefs.l1se <- coef(glmnet(trainsx, trainsy, alpha = alpha[a]), s = lam.1se)
    #
    test.mse.lmin[a] <- mean((testsy - cbind(1, testsx) %*% coefs.lmin)^2)
    test.mse.l1se[a] <- mean((testsy - cbind(1, testsx) %*% coefs.l1se)^2)
  }
  which.mse.min <- which.min(test.mse.lmin)
  test.mse.min <- test.mse.lmin[which.mse.min]
  alpha.min <- alpha[which.mse.min]
  #
  which.mse.1se <- which.min(test.mse.l1se)
  test.mse.1se <- test.mse.lmin[which.mse.1se]
  alpha.1se <- alpha[which.mse.1se]
  #
  results.lmin[t, ] <- c(lam.min, alpha.min, test.mse.min)
  results.l1se[t, ] <- c(lam.1se, alpha.1se, test.mse.1se)
}

colnames(results.lmin) <- c("lambda", "alpha", "test.mse")
colnames(results.l1se) <- c("lambda", "alpha", "test.mse")

means.min <- colMeans(results.lmin)
means.1se <- colMeans(results.l1se)

lambda.opt.min <- means.min[1]
alpha.opt.min <- means.min[2]
coefs.opt.min <- coef(glmnet(trainsx, trainsy, alpha = alpha.opt.min), s = lambda.opt.min)
test.mse.opt.min <- mean((testsy - cbind(1, testsx) %*% coefs.opt.min)^2)

lambda.opt.1se <- means.1se[1]
alpha.opt.1se <- means.1se[2]
coefs.opt.1se <- coef(glmnet(trainsx, trainsy, alpha = alpha.opt.1se), s = lambda.opt.1se)
test.mse.opt.1se <- mean((testsy - cbind(1, testsx) %*% coefs.opt.1se)^2)

0 个答案:

没有答案