我对standardize
中glmnet
软件包的R
选项感到困惑。当我对协变量矩阵进行标准化并设置standardize=FALSE
时,与不对协变量矩阵进行标准化并设置standardize=TRUE
时,得到不同的系数。我以为他们会一样!通过创建以下ridge.mod1
和ridge.mod2
模型来举例说明这两个。我还创建了一个模型(ridge.mod3
),该模型对结果(和协变量矩阵)进行了标准化,并使用了选项standardize=FALSE
。我只是在检查是否也需要标准化结果以获取与ridge.mod1
中相同的系数。
set.seed(1)
y <- rnorm(30, 20, 10)
x1 <- rnorm(30, 5, 2)
x2 <- x1 + rnorm(30, 0, 5)
cor(x1,x2)
x <- as.matrix(cbind(x1,x2))
z1 <- scale(x1)
z2 <- scale(x2)
z <- as.matrix(cbind(z1,z2))
y.scale <- scale(y)
n <- 30
# Fixing foldid for proper comparison
foldid=sample(rep(seq(5),length=n))
table(foldid)
library(glmnet)
cv.ridge.mod1 <- cv.glmnet(x, y, alpha = 0, nfolds = 5, foldid=foldid, standardize = TRUE)
ridge.mod1 <- glmnet(x, y, alpha = 0, standardize = TRUE)
coef(ridge.mod1, s=cv.ridge.mod1$lambda.min)
> coef(ridge.mod1, s=cv.ridge.mod1$lambda.min)
3 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) 2.082458e+01
x1 2.856136e-37
x2 4.334910e-38
cv.ridge.mod2 <- cv.glmnet(z, y, alpha = 0, nfolds = 5, foldid=foldid, standardize = FALSE)
ridge.mod2 <- glmnet(z, y, alpha = 0, standardize = FALSE)
coef(ridge.mod2, s=cv.ridge.mod2$lambda.min)
> coef(ridge.mod2, s=cv.ridge.mod2$lambda.min)
3 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) 2.082458e+01
V1 4.391657e-37
V2 2.389751e-37
cv.ridge.mod3 <- cv.glmnet(z, y.scale, alpha = 0, nfolds = 5, foldid=foldid, standardize = FALSE)
ridge.mod3 <- glmnet(z, y.scale, alpha = 0, standardize = FALSE)
coef(ridge.mod3, s=cv.ridge.mod3$lambda.min)
> coef(ridge.mod3, s=cv.ridge.mod3$lambda.min)
3 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) 1.023487e-16
V1 4.752255e-38
V2 2.585973e-38
谁能告诉我发生了什么事,以及是否(或如何)通过事先标准化(在数据处理步骤中)然后使用ridge.mod1
得到与standardize=FALSE
中相同的系数?
更新 :(我根据以下评论尝试了此操作)
因此,我尝试通过SS / n而不是SS /(n-1)进行标准化。我通过标准化y和x进行了尝试。都没有给我系数等于模型1的非标准化系数。
## Standadizing by sqrt(SS(X)/n) like glmnet instead of sqrt(SS(X)/(n-1)) which is done by the scale command
Xs <- apply(x, 2, function(m) (m - mean(m)) / sqrt(sum(m^2) / n))
Ys <- (y-mean(y)) / sqrt(sum(y^2) / n)
# Standadizing only X by sqrt(SS(X)/n)
cv.ridge.mod4 <- cv.glmnet(Xs, y, alpha = 0, nfolds = 5, foldid=foldid, standardize = FALSE)
ridge.mod4 <- glmnet(Xs, y, alpha = 0, standardize = FALSE)
coef(ridge.mod4, s=cv.ridge.mod4$lambda.min)
> coef(ridge.mod4, s=cv.ridge.mod4$lambda.min)[2]/sd(x1)
[1] 7.995171e-38
> coef(ridge.mod4, s=cv.ridge.mod4$lambda.min)[3]/sd(x2)
[1] 2.957854e-38
# Standadizing both Y and X by sqrt(SS(X)/n) but neither is centered
cv.ridge.mod6 <- cv.glmnet(Xs.noncentered, Ys.noncentered, alpha = 0, nfolds = 5, foldid=foldid, standardize = FALSE)
ridge.mod6 <- glmnet(Xs.noncentered, Ys.noncentered, alpha = 0, standardize = FALSE)
coef(ridge.mod6, s=cv.ridge.mod6$lambda.min)
> coef(ridge.mod6, s=cv.ridge.mod6$lambda.min)[2] / (sqrt(sum(x1^2) / n))
[1] 1.019023e-39
> coef(ridge.mod6, s=cv.ridge.mod6$lambda.min)[3] / (sqrt(sum(x2^2) / n))
[1] 9.189263e-40
那还是哪里错了?
答案 0 :(得分:1)
我调整了您的代码,以便可以处理更明智的问题。为了重现更改standardize=TRUE
和standardize=FALSE
选项的系数,您需要首先使用(1 / N)方差估计器公式对变量进行标准化。对于此示例,我还将变量居中以摆脱常量。我只关注变量的系数。之后,您必须注意,因此必须将公式求反以获取非标准化系数。我在以下代码中做到了这一点。
set.seed(1)
x1 <- rnorm(300, 5, 2)
x2 <- x1 + rnorm(300, 0, 5)
x3 <- rnorm(300, 6, 5)
e= rnorm(300, 0, 1)
y <- 0.3*x1+3.5*x2+x3+e
x <- as.matrix(cbind(x1,x2,x3))
sdN=function(x){
sigma=sqrt( (1/length(x)) * sum((x-mean(x))^2))
return(sigma)
}
n=300
foldid=sample(rep(seq(5),length=n))
g1=(x1-mean(x1))/sdN(x1)
g2=(x2-mean(x2))/sdN(x2)
g3=(x3-mean(x3))/sdN(x3)
gy=(y-mean(y))/sdN(y)
equis <- as.matrix(cbind(g1,g2,g3))
library(glmnet)
cv.ridge.mod1 <- cv.glmnet(x, y, alpha = 0, nfolds = 5, foldid=foldid,standardize = TRUE)
coef(cv.ridge.mod1, s=cv.ridge.mod1$lambda.min)
cv.ridge.mod2 <- cv.glmnet(equis, gy, alpha = 0, nfolds = 5, foldid=foldid, standardize = FALSE, intercept=FALSE)
beta=coef(cv.ridge.mod2, s=cv.ridge.mod2$lambda.min)
beta[2]*sdN(y)/sdN(x1)
beta[3]*sdN(y)/sdN(x2)
beta[4]*sdN(y)/sdN(x3)
coef(cv.ridge.mod1, s=cv.ridge.mod1$lambda.min)
这将产生结果:
> beta[2]*sdN(y)/sdN(x1)
[1] 0.5984356
> beta[3]*sdN(y)/sdN(x2)
[1] 3.166033
> beta[4]*sdN(y)/sdN(x3)
[1] 0.9145646
>
> coef(cv.ridge.mod1, s=cv.ridge.mod1$lambda.min)
4 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) 0.5951423
x1 0.5984356
x2 3.1660328
x3 0.9145646
如您所见,系数在小数点后4位相同。所以我希望这能回答您的问题。