将结果从一个函数传递到另一个不起作用

时间:2018-11-27 11:33:10

标签: r function dataframe

我有一个带有2个参数(因变量的数据帧和列名)的函数,并且我希望输出是向量(因变量)和不包括因变量的数据帧。

df_handler = function(dataframe_allVars, dependent){
    depend.var = eval(substitute(dependent))
    pars <- as.list(match.call()[-1])
    dependent = pars$dependent
    indep.vars = dataframe_allVars[,-which(names(dataframe_allVars) == as.character(pars$dependent))]
    return(list(depend.var, indep.vars))
}

从库MASS应用于Aids2数据框时

library(MASS)
head(Aids2)
attach(Aids2)

使用因变量“状态”,我得到了预期的结果。包含2个元素的列表,第一个是一个因子(状态为死或活着),第二个是初始的Aids2数据帧,没有“状态”列

df_handler(Aids2, status)

[[1]]
  [1] D D D D D D D D D D D D D D A D D A A D D A A A D A A A A A A D 
      A D A A D D D D D D D D D D D D D D D D D D D D D
 [58] D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D 
      D D D D D D D D D D D D D D D D D D D D D D D D D
[115] D D D D D D D D D D D D D D D D D D D D D D D D A D D D D D D D 
[ reached getOption("max.print") -- omitted 1843 entries ]
Levels: A D

[[2]]
      state sex  diag death T.categ age
1      NSW   M 10905 11081      hs  35
2      NSW   M 11029 11096      hs  53
3      NSW   M  9551  9983      hs  42
4      NSW   M  9577  9654    haem  44
5      NSW   M 10015 10290      hs  39

最初的daraframe是

╔════════════════════════════════════════════╗
║  state sex  diag death status T.categ age  ║
╠════════════════════════════════════════════╣
║ 1   NSW   M 10905 11081      D      hs  35 ║
║ 2   NSW   M 11029 11096      D      hs  53 ║
║ 3   NSW   M  9551  9983      D      hs  42 ║
║ 4   NSW   M  9577  9654      D    haem  44 ║
║ 5   NSW   M 10015 10290      D      hs  39 ║
║ 6   NSW   M  9971 10344      D      hs  36 ║
╚════════════════════════════════════════════╝

接下来,我尝试将df_handler函数嵌套到另一个名为multi_log_reg的函数中,以便对数据框的列执行逻辑回归

# glm modified for use in lapply
glm_for_apply = function(x, depend.var){
    mod = glm(depend.var ~ x, family = binomial(link = "logit"))
}

# run log regression on dataframe, given the dependent variable
multi_log_reg = function(dataframe_allVars, dependent){
    depend.var = df_handler(dataframe_allVars, dependent)[1]
    indep.vars = df_handler(dataframe_allVars, dependent)[2]
    print(depend.var)
    print(indep.vars)
    mod = lapply(indep.vars, FUN = glm_for_apply, depend.var)
}

运行multi_log_reg(Aids2,状态)时出现以下错误:

[[1]]
  [1] D D D D D D D D D D D D D D A D D A A D D A A A D A A A A A A D 
      A D A A D D D D D D D D D D D D D D D D D D D D D
 [58] D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D D 
      D D D D D D D D D D D D D D D D D D D D D D D D D
[115] D D D D D D D D D D D D D D D D D D D D D D D D A D D D D D D D 
[ reached getOption("max.print") -- omitted 1843 entries ]
Levels: A D

[[1]]
data frame with 0 columns and 2843 rows

Error in model.frame.default(formula = depend.var ~ x,drop.unused.levels = TRUE) : 
invalid type (list) for variable 'depend.var' 

有帮助吗?

1 个答案:

答案 0 :(得分:0)

我会避免使用attach,而会创建更简单的东西:

library(MASS)

# separate dependent variable from rest of variables
df_handler = function(dataframe_allVars, dependent){
  d1 = dataframe_allVars[,dependent]
  dataframe_allVars[,dependent] = NULL
  return(list(d1, dataframe_allVars))

}

# build your model
glm_for_apply = function(x, depend.var){
  mod = glm(depend.var ~ x, family = binomial(link = "logit"))
}

# combine previous functions and return all models created
multi_log_reg = function(dataframe_allVars, dependent){
  d = df_handler(dataframe_allVars, dependent)
  lapply(as.list(d[[2]]), FUN = glm_for_apply, d[[1]])
}

multi_log_reg(Aids2, "status")

哪个返回:

# $`state`
# 
# Call:  glm(formula = depend.var ~ x, family = binomial(link = "logit"))
# 
# Coefficients:
#   (Intercept)       xOther         xQLD         xVIC  
#       0.51922     -0.23623      0.12128     -0.09814  
# 
# Degrees of Freedom: 2842 Total (i.e. Null);  2839 Residual
# Null Deviance:        3777 
# Residual Deviance: 3773   AIC: 3781
# 
# $sex
# 
# Call:  glm(formula = depend.var ~ x, family = binomial(link = "logit"))
# 
# Coefficients:
#   (Intercept)           xM  
#        0.3868       0.1036  
# 
# Degrees of Freedom: 2842 Total (i.e. Null);  2841 Residual
# Null Deviance:        3777 
# Residual Deviance: 3777   AIC: 3781
# 
# $diag
# 
# Call:  glm(formula = depend.var ~ x, family = binomial(link = "logit"))
# 
# Coefficients:
#   (Intercept)            x  
#     31.225441    -0.002867  
# 
# Degrees of Freedom: 2842 Total (i.e. Null);  2841 Residual
# Null Deviance:        3777 
# Residual Deviance: 2704   AIC: 2708
# 
# $death
# 
# Call:  glm(formula = depend.var ~ x, family = binomial(link = "logit"))
# 
# Coefficients:
#   (Intercept)            x  
#     1171.6449      -0.1022  
# 
# Degrees of Freedom: 2842 Total (i.e. Null);  2841 Residual
# Null Deviance:        3777 
# Residual Deviance: 296.9  AIC: 300.9
# 
# $T.categ
# 
# Call:  glm(formula = depend.var ~ x, family = binomial(link = "logit"))
# 
# Coefficients:
#   (Intercept)        xhsid          xid         xhet        xhaem       xblood      xmother       xother  
#       0.49592      0.01490     -0.91878     -0.84076      0.03816      0.94444     -0.78361     -0.20824  
# 
# Degrees of Freedom: 2842 Total (i.e. Null);  2835 Residual
# Null Deviance:        3777 
# Residual Deviance: 3743   AIC: 3759
# 
# $age
# 
# Call:  glm(formula = depend.var ~ x, family = binomial(link = "logit"))
# 
# Coefficients:
#   (Intercept)            x  
#      0.11182      0.01006  
# 
# Degrees of Freedom: 2842 Total (i.e. Null);  2841 Residual
# Null Deviance:        3777 
# Residual Deviance: 3771   AIC: 3775