我正在使用following教程在我自己的数据上尝试脊,套索和弹性网络回归。但是,我得到的所有行的预测值都不相同,因此我也得到了相同的拟合值和mse值。
我真的很感激如果有人在R中比我更了解我的代码并且可能会指出我做错了什么。这是:
library (glmnet)
require(caTools)
set.seed(111)
new_flat <- fread('RED_SAMPLED_DATA_WITH_HEADERS.csv', header=TRUE, sep = ',')
sample = sample.split(new_flat$SUBSCRIPTION_ID, SplitRatio = .80)
train = subset(new_flat, sample == TRUE)
test = subset(new_flat, sample == FALSE)
x=model.matrix(c201512_TOTAL_MARGIN~.-SUBSCRIPTION_ID,data=train)
y=train$c201512_TOTAL_MARGIN
x1=model.matrix(c201512_TOTAL_MARGIN~.-SUBSCRIPTION_ID,data=test)
y1=test$c201512_TOTAL_MARGIN
# Fit models:
fit.lasso <- glmnet(x, y, family="gaussian", alpha=1)
fit.ridge <- glmnet(x, y, family="gaussian", alpha=0)
fit.elnet <- glmnet(x, y, family="gaussian", alpha=.5)
# 10-fold Cross validation for each alpha = 0, 0.1, ... , 0.9, 1.0
fit.lasso.cv <- cv.glmnet(x, y, type.measure="mse", alpha=1,
family="gaussian")
fit.ridge.cv <- cv.glmnet(x, y, type.measure="mse", alpha=0,
family="gaussian")
fit.elnet.cv <- cv.glmnet(x, y, type.measure="mse", alpha=.5,
family="gaussian")
for (i in 0:10) {
assign(paste("fit", i, sep=""), cv.glmnet(x, y, type.measure="mse",
alpha=i/10,family="gaussian"))
}
# Plot solution paths:
par(mfrow=c(3,2))
# For plotting options, type '?plot.glmnet' in R console
plot(fit.lasso, xvar="lambda")
plot(fit10, main="LASSO")
plot(fit.ridge, xvar="lambda")
plot(fit0, main="Ridge")
plot(fit.elnet, xvar="lambda")
plot(fit5, main="Elastic Net")
yhat0 <- predict(fit0, s=fit0$lambda.1se, newx=x1)
yhat1 <- predict(fit1, s=fit1$lambda.1se, newx=x1)
yhat2 <- predict(fit2, s=fit2$lambda.1se, newx=x1)
yhat3 <- predict(fit3, s=fit3$lambda.1se, newx=x1)
yhat4 <- predict(fit4, s=fit4$lambda.1se, newx=x1)
yhat5 <- predict(fit5, s=fit5$lambda.1se, newx=x1)
yhat6 <- predict(fit6, s=fit6$lambda.1se, newx=x1)
yhat7 <- predict(fit7, s=fit7$lambda.1se, newx=x1)
yhat8 <- predict(fit8, s=fit8$lambda.1se, newx=x1)
yhat9 <- predict(fit9, s=fit9$lambda.1se, newx=x1)
yhat10 <- predict(fit10, s=fit10$lambda.1se, newx=x1)
mse0 <- mean((y1 - yhat0)^2)
mse1 <- mean((y1 - yhat1)^2)
mse2 <- mean((y1 - yhat2)^2)
mse3 <- mean((y1 - yhat3)^2)
mse4 <- mean((y1 - yhat4)^2)
mse5 <- mean((y1 - yhat5)^2)
mse6 <- mean((y1 - yhat6)^2)
mse7 <- mean((y1 - yhat7)^2)
mse8 <- mean((y1 - yhat8)^2)
mse9 <- mean((y1 - yhat9)^2)
mse10 <- mean((y1 - yhat10)^2)
答案 0 :(得分:0)
尝试在预测功能中使用s=fit0$lambda.min
而不是s=fit0$lambda.1se
。你的系数在套索上很快降至0,因此s=fit0$lambda.1se
可能是一个过高的惩罚因素。 lambda确定系数惩罚的权重,如果它太高,你的系数将为零,并且预测将等于截距,这是因变量的平均值,例如: Y = 0.48 + 0 * X