我在某处编码错误,因为我获得了MSE的NaN。但是我无法为自己的生活弄清楚哪里出了问题。代码的第一块中的所有内容都被视为真实信息(n,H,j和dtrain除外)。 {m = 50是测试数据集的大小}。然后,我被告知训练数据的大小应为n = 50;并拟合线性模型并找到OLS,Ridge和Lasso的MSE。
> library(glmnet)
> library(ISLR)
> set.seed(2018)
>m = 50
>**n = 50**
>p = 45
>rho = 0.7;
>Sigma = matrix(rho, p, p);
>diag(Sigma) = 1;
>X= MASS::mvrnorm(m, rep(0, p), Sigma);
>**H= MASS::mvrnorm(n, rep(0,p), Sigma);**
>beta0 = 10;
>num_nonzero = 45;
>beta = c(rep(1, num_nonzero), rep(0, p-num_nonzero));
>y = beta0+X%*%beta+rnorm(m);
>**j = beta0+H%*%beta+rnorm(n);**
>dtest = data.frame(y, X)
>**dtrain = data.frame(j, H)**
fit.lm <- lm(j ~ ., data = dtrain)
pred.lm <- predict(fit.lm, dtest)
mean((pred.lm - dtest$j)^2)
train.mat <- model.matrix(j ~ ., data = dtrain)
test.mat <- model.matrix(y ~ ., data = dtest)
grid <- 10 ^ seq(10, -2, length = 100)
fit.ridge <- glmnet(train.mat, dtrain$j, alpha = 0, lambda = grid, thresh = 1e-12)
cv.ridge <- cv.glmnet(train.mat, dtrain$j, alpha = 0, lambda = grid, thresh = 1e-12)
bestlam.ridge <- cv.ridge$lambda.min
bestlam.ridge
pred.ridge <- predict(fit.ridge, s = bestlam.ridge, newx = test.mat)
mean((pred.ridge - dtest$j)^2)
fit.lasso <- glmnet(train.mat, dtrain$j, alpha = 1, lambda = grid, thresh = 1e-12)
cv.lasso <- cv.glmnet(train.mat, dtrain$j, alpha = 1, lambda = grid, thresh = 1e-12)
bestlam.lasso <- cv.lasso$lambda.min
bestlam.lasso
pred.lasso <- predict(fit.lasso, s = bestlam.lasso, newx = test.mat)
mean((pred.lasso - dtest$j)^2)