我遇到以下问题:将数据分成训练/测试后,我无法用glmnet拟合预测测试...请参阅下面的玩具示例和BostonHousing数据集。 我认为这是因为glmnet(lasso,elnet)能够缩小数据,因此,我不知道该模型所选择的数据是哪一个。 (我认为如果有一种方法可以提取所选的功能,我可以使用所选功能自动提供我的x.test数据集。)
#' load the package
require(glmnet)
require(caret)
require(mlbench)
set.seed(123)
#' load data
data("BostonHousing")
#' Create a test/train index, to keep back a validation set for final checking
validationIndex <- createDataPartition(BostonHousing$medv, p= .7, list = FALSE)
dataset <- BostonHousing [validationIndex, ]
validation <- BostonHousing [ -validationIndex, ]
x.train <- data.matrix (dataset [ ,1:length(dataset)])
y.train <- data.matrix (dataset$medv)
colnames (y.train) <- ("MedianValue")
rownames (y.train) <- rownames (dataset)
x.test <- data.matrix (validation [,1:(length(validation)-1)])
y.test <- data.matrix(validation$medv)
rm (validationIndex, dataset, validation )
#' fit model
fit.lasso <- glmnet(x.train, y.train, family = "gaussian", alpha = 1)
fit.ridge <- glmnet(x.train, y.train, family = "gaussian", alpha = 0)
fit.elnet <- glmnet(x.train, y.train, family = "gaussian", alpha = .5)
#' cv validation with 10-folds, for each alpha possibility
fit.lasso.cv <- cv.glmnet(x.train, y.train, type.measure = "mse", alpha = 1, family = "gaussian")
fit.ridge.cv <- cv.glmnet(x.train, y.train, type.measure = "mse", alpha = 0, family="gaussian")
fit.elnet.cv <- cv.glmnet(x.train, y.train, type.measure = "mse", alpha=.5, family = "gaussian")
#' CV plot based on GLMNET's Vignette
par(mfrow=c(1,2))
plot (fit.lasso.cv); plot (fit.ridge.cv); plot (fit.elnet.cv)
plot (log (fit.lasso.cv$lambda), fit.lasso.cv$cvm, pch= 10, col="red", xlab = "log(Lambda)", ylab= fit.lasso.cv$name)
points(log(fit.elnet.cv$lambda), fit.elnet.cv$cvn, pch= 10, col = "grey")
points(log(fit.ridge.cv$lambda), fit.ridge.cv$cvm, pch= 10, col = "blue")
legend ("topleft", legend = c("alpha=1", "alpha= 0.5", "alpha= 0"), pch= 19, col = c("red", "grey", "blue"))
问题
#' Make predictions on validation dataset
yhat0 <- predict(fit.lasso.cv, s=fit.lasso.cv$lambda.1se, newx=x.test)
yhat1 <- predict(fit.ridge.cv, s=fit.ridge.cv$lambda.1se, newx=x.test)
yhat2 <- predict(fit.elnet.cv, s=fit.elnet.cv$lambda.1se, newx=x.test)
mse0 <- mean((y.test - yhat0)^2)
mse1 <- mean((y.test - yhat1)^2)
mse2 <- mean((y.test - yhat2)^2)
#' summarize the accuracy
print (mse0, mse1, mse2)
错误是: cbind2(1,newx)%*%nbeta出错: 文件中的Cholmod错误'X和/或Y有错误的尺寸'../MatrixOps/cholmod_sdmult.c,第90行