我有一个包含1-3个版本的因变量和10-15个独立变量的数据集。我想为模型运行glm命令,但希望循环所有可能的自变量组合。我从来没有为循环编写代码,并且想确保我正确设置它。
以下是我的数据框的一小部分。实际数据帧具有每个变量的显式名称;不只是“DepVar1”或“IndVar1。”
dfPRAC <- structure(list(DepVar1 = c(0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1), DepVar2 = c(0, 1, 0, 0,
1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1),
IndVar1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
0, 0, 0, 1, 0, 0, 0, 1, 0), IndVar2 = c(1, 3, 9, 1, 5, 1,
1, 8, 4, 6, 3, 15, 4, 1, 1, 3, 2, 1, 10, 1, 9, 9, 11, 5),
IndVar3 = c(0.500100322564443, 1.64241601558441, 0.622735778490702,
2.42429812749226, 5.10055213237027, 1.38479786027561, 7.24663629203007,
0.5102348706939, 2.91566510995229, 3.73356170379198, 5.42003495939846,
1.29312896116503, 3.33753833987496, 0.91783513806083, 4.7735736131668,
1.17609362602233, 5.58010703426296, 5.6668754863739, 1.4377813063642,
5.07724130837643, 2.4791994535923, 2.55100067348583, 2.41043629522981,
2.14411703944206)), .Names = c("DepVar1", "DepVar2", "IndVar1",
"IndVar2", "IndVar3"), row.names = c(NA, 24L), class = "data.frame")
我目前运行单个glm模型的代码是:
RegPRAC <- glm(DepVar1 ~ IndVar1, data=dfPRAC, family=binomial("logit"))
summary(RegPRAC)
我想为所有可能的自变量组合运行模型,包括因变量的所有组合,但我不知道从哪里开始。我想的是:
for (i in dfPRAC$IndVar1:dfPRAC$IndVar3) {glm(DepVar1 ~ i, data=dfPRAC, family=binomial("logit")) }
我试过运行它,但有几个错误。任何建议将不胜感激。
答案 0 :(得分:5)
也许是这样的:
dep_vars <- c("DepVar1", "DepVar2")
ind_vars <- c("IndVar1", "IndVar2", "IndVar3")
# create all combinations of ind_vars
ind_vars_comb <-
unlist( sapply( seq_len(length(ind_vars)),
function(i) {
apply( combn(ind_vars,i), 2, function(x) paste(x, collapse = "+"))
}))
# pair with dep_vars:
var_comb <- expand.grid(dep_vars, ind_vars_comb )
# formulas for all combinations
formula_vec <- sprintf("%s ~ %s", var_comb$Var1, var_comb$Var2)
# create models
glm_res <- lapply( formula_vec, function(f) {
fit1 <- glm( f, data = dfPRAC, family = binomial("logit"))
fit1$coefficients <- coef( summary(fit1))
return(fit1)
})
names(glm_res) <- formula_vec
# get model for specific formula
glm_res[["DepVar1 ~ IndVar1"]]
# coefficients for var1 ~ var1
coef(glm_res[["DepVar1 ~ IndVar1"]])
# p-values for var1 ~ var2
coef(glm_res[["DepVar1 ~ IndVar2"]])[,"Pr(>|z|)"]
# p-values in a data.frame
p_values <-
cbind(formula_vec, as.data.frame ( do.call(rbind,
lapply(glm_res, function(x) {
coefs <- coef(x)
rbind(c(coefs[,4] , rep(NA, length(ind_vars) - length(coefs[,4]) + 1)))
})
)))
结果:
formula_vec (Intercept) IndVar1 V3 V4
1 DepVar1 ~ IndVar1 1.00000000 1.00000000 NA NA
2 DepVar2 ~ IndVar1 0.65526203 0.29437334 NA NA
3 DepVar1 ~ IndVar2 0.29307777 0.19121066 NA NA
4 DepVar2 ~ IndVar2 0.07298241 0.03858791 NA NA
5 DepVar1 ~ IndVar3 0.99950535 0.99940963 NA NA
6 DepVar2 ~ IndVar3 0.52105212 0.44715614 NA NA
7 DepVar1 ~ IndVar1+IndVar2 0.31112860 0.76310468 0.18416266 NA
8 DepVar2 ~ IndVar1+IndVar2 0.06488501 0.08833369 0.03031766 NA
9 DepVar1 ~ IndVar1+IndVar3 0.99952006 0.99999188 0.99940957 NA
10 DepVar2 ~ IndVar1+IndVar3 0.38508258 0.29593637 0.45010697 NA
11 DepVar1 ~ IndVar2+IndVar3 0.28167430 0.15753070 0.54363164 NA
12 DepVar2 ~ IndVar2+IndVar3 0.22644873 0.04654188 0.84059019 NA
13 DepVar1 ~ IndVar1+IndVar2+IndVar3 0.27858393 0.71600105 0.14812808 0.5222330
14 DepVar2 ~ IndVar1+IndVar2+IndVar3 0.15634739 0.08611677 0.02889574 0.7449513