我有一个带有2个参数(因变量的数据帧和列名)的函数,并且我希望输出是向量(因变量)和不包括因变量的数据帧。
df_handler = function(dataframe_allVars, dependent){
depend.var = eval(substitute(dependent))
pars <- as.list(match.call()[-1])
dependent = pars$dependent
indep.vars = dataframe_allVars[,-which(names(dataframe_allVars) == as.character(pars$dependent))]
return(list(depend.var, indep.vars))
}
从库MASS应用于Aids2数据框时
library(MASS)
head(Aids2)
attach(Aids2)
使用因变量“状态”,我得到了预期的结果。包含2个元素的列表,第一个是一个因子(状态为死或活着),第二个是初始的Aids2数据帧,没有“状态”列
df_handler(Aids2, status)
[[1]]
[1] D D D D D D D D D D D D D D A D D A A D D A A A D A A A A A A D
A D A A D D D D D D D D D D D D D D D D D D D D D
[58] D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D
D D D D D D D D D D D D D D D D D D D D D D D D D
[115] D D D D D D D D D D D D D D D D D D D D D D D D A D D D D D D D
[ reached getOption("max.print") -- omitted 1843 entries ]
Levels: A D
[[2]]
state sex diag death T.categ age
1 NSW M 10905 11081 hs 35
2 NSW M 11029 11096 hs 53
3 NSW M 9551 9983 hs 42
4 NSW M 9577 9654 haem 44
5 NSW M 10015 10290 hs 39
最初的daraframe是
╔════════════════════════════════════════════╗
║ state sex diag death status T.categ age ║
╠════════════════════════════════════════════╣
║ 1 NSW M 10905 11081 D hs 35 ║
║ 2 NSW M 11029 11096 D hs 53 ║
║ 3 NSW M 9551 9983 D hs 42 ║
║ 4 NSW M 9577 9654 D haem 44 ║
║ 5 NSW M 10015 10290 D hs 39 ║
║ 6 NSW M 9971 10344 D hs 36 ║
╚════════════════════════════════════════════╝
接下来,我尝试将df_handler函数嵌套到另一个名为multi_log_reg的函数中,以便对数据框的列执行逻辑回归
# glm modified for use in lapply
glm_for_apply = function(x, depend.var){
mod = glm(depend.var ~ x, family = binomial(link = "logit"))
}
# run log regression on dataframe, given the dependent variable
multi_log_reg = function(dataframe_allVars, dependent){
depend.var = df_handler(dataframe_allVars, dependent)[1]
indep.vars = df_handler(dataframe_allVars, dependent)[2]
print(depend.var)
print(indep.vars)
mod = lapply(indep.vars, FUN = glm_for_apply, depend.var)
}
运行multi_log_reg(Aids2,状态)时出现以下错误:
[[1]]
[1] D D D D D D D D D D D D D D A D D A A D D A A A D A A A A A A D
A D A A D D D D D D D D D D D D D D D D D D D D D
[58] D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D
D D D D D D D D D D D D D D D D D D D D D D D D D
[115] D D D D D D D D D D D D D D D D D D D D D D D D A D D D D D D D
[ reached getOption("max.print") -- omitted 1843 entries ]
Levels: A D
[[1]]
data frame with 0 columns and 2843 rows
Error in model.frame.default(formula = depend.var ~ x,drop.unused.levels = TRUE) :
invalid type (list) for variable 'depend.var'
有帮助吗?
答案 0 :(得分:0)
我会避免使用attach
,而会创建更简单的东西:
library(MASS)
# separate dependent variable from rest of variables
df_handler = function(dataframe_allVars, dependent){
d1 = dataframe_allVars[,dependent]
dataframe_allVars[,dependent] = NULL
return(list(d1, dataframe_allVars))
}
# build your model
glm_for_apply = function(x, depend.var){
mod = glm(depend.var ~ x, family = binomial(link = "logit"))
}
# combine previous functions and return all models created
multi_log_reg = function(dataframe_allVars, dependent){
d = df_handler(dataframe_allVars, dependent)
lapply(as.list(d[[2]]), FUN = glm_for_apply, d[[1]])
}
multi_log_reg(Aids2, "status")
哪个返回:
# $`state`
#
# Call: glm(formula = depend.var ~ x, family = binomial(link = "logit"))
#
# Coefficients:
# (Intercept) xOther xQLD xVIC
# 0.51922 -0.23623 0.12128 -0.09814
#
# Degrees of Freedom: 2842 Total (i.e. Null); 2839 Residual
# Null Deviance: 3777
# Residual Deviance: 3773 AIC: 3781
#
# $sex
#
# Call: glm(formula = depend.var ~ x, family = binomial(link = "logit"))
#
# Coefficients:
# (Intercept) xM
# 0.3868 0.1036
#
# Degrees of Freedom: 2842 Total (i.e. Null); 2841 Residual
# Null Deviance: 3777
# Residual Deviance: 3777 AIC: 3781
#
# $diag
#
# Call: glm(formula = depend.var ~ x, family = binomial(link = "logit"))
#
# Coefficients:
# (Intercept) x
# 31.225441 -0.002867
#
# Degrees of Freedom: 2842 Total (i.e. Null); 2841 Residual
# Null Deviance: 3777
# Residual Deviance: 2704 AIC: 2708
#
# $death
#
# Call: glm(formula = depend.var ~ x, family = binomial(link = "logit"))
#
# Coefficients:
# (Intercept) x
# 1171.6449 -0.1022
#
# Degrees of Freedom: 2842 Total (i.e. Null); 2841 Residual
# Null Deviance: 3777
# Residual Deviance: 296.9 AIC: 300.9
#
# $T.categ
#
# Call: glm(formula = depend.var ~ x, family = binomial(link = "logit"))
#
# Coefficients:
# (Intercept) xhsid xid xhet xhaem xblood xmother xother
# 0.49592 0.01490 -0.91878 -0.84076 0.03816 0.94444 -0.78361 -0.20824
#
# Degrees of Freedom: 2842 Total (i.e. Null); 2835 Residual
# Null Deviance: 3777
# Residual Deviance: 3743 AIC: 3759
#
# $age
#
# Call: glm(formula = depend.var ~ x, family = binomial(link = "logit"))
#
# Coefficients:
# (Intercept) x
# 0.11182 0.01006
#
# Degrees of Freedom: 2842 Total (i.e. Null); 2841 Residual
# Null Deviance: 3777
# Residual Deviance: 3771 AIC: 3775