处理purrr

时间:2017-05-14 16:10:04

标签: r dplyr purrr broom

我目前有以下R代码,这些代码在不同的子集中运行具有不同预测变量的多个回归模型,并使用broom包返回整理的输出。

library(dplyr) 
library(purrr)
library(broom)
cars <- mtcars
preds<-c("disp", "drat", "wt")
model_fits <- map_df(preds, function(pred) {
  model_formula <- sprintf("mpg ~ %s", pred)  
  cars %>%
    group_by(cyl) %>%
      do(tidy(lm(model_formula, data = .), conf.int = T)) %>% 
      filter(term == pred) %>%
      mutate(outcome = "mpg") %>%
    select(outcome, cyl:estimate, starts_with("conf."))
})

这导致以下数据框:

> model_fits
Source: local data frame [9 x 6]
Groups: cyl [3]

  outcome   cyl  term     estimate    conf.low     conf.high
    <chr> <dbl> <chr>        <dbl>       <dbl>         <dbl>
1     mpg     4  disp -0.135141815 -0.21018121 -0.0601024237
2     mpg     6  disp  0.003605119 -0.03638572  0.0435959552
3     mpg     8  disp -0.019634095 -0.03993175  0.0006635639
4     mpg     4  drat  5.235016267 -3.19097359 13.6610061249
5     mpg     6  drat  0.350268953 -3.13669610  3.8372340053
6     mpg     8  drat  0.329543608 -3.98975120  4.6488384177
7     mpg     4    wt -5.647025261 -9.83228414 -1.4617663781
8     mpg     6    wt -2.780105939 -6.21162010  0.6514082171
9     mpg     8    wt -2.192437926 -3.80310208 -0.5817737772

在不使用循环函数的情况下,将结果向量(例如outcomes<-c("mpg", "qsec"))合并到此脚本中的最佳方法是什么?我已经考虑了map2_df包中的purrr函数,但它要求两个向量具有相同的长度。我想要的数据框如下所示:

   outcome   cyl  term     estimate     conf.low     conf.high
     <chr> <dbl> <chr>        <dbl>        <dbl>         <dbl>
1      mpg     4  disp -0.135141815 -0.210181205 -0.0601024237
2      mpg     6  disp  0.003605119 -0.036385718  0.0435959552
3      mpg     8  disp -0.019634095 -0.039931754  0.0006635639
4      mpg     4  drat  5.235016267 -3.190973592 13.6610061249
5      mpg     6  drat  0.350268953 -3.136696100  3.8372340053
6      mpg     8  drat  0.329543608 -3.989751201  4.6488384177
7      mpg     4    wt -5.647025261 -9.832284144 -1.4617663781
8      mpg     6    wt -2.780105939 -6.211620095  0.6514082171
9      mpg     8    wt -2.192437926 -3.803102076 -0.5817737772
10    qsec     4  disp  0.020522320 -0.024081106  0.0651257460
11    qsec     6  disp  0.032395786  0.003380046  0.0614115258
12    qsec     8  disp  0.003443553 -0.007442996  0.0143301028
13    qsec     4  drat -1.304473000 -4.633470581  2.0245245810
14    qsec     6  drat -2.234114977 -5.457932913  0.9897029580
15    qsec     8  drat -2.645047137 -3.791162337 -1.4989319372
16    qsec     4    wt  1.884663596  0.169516461  3.5998107312
17    qsec     6    wt  4.147883561  1.394030756  6.9017363651
18    qsec     8    wt  0.845029716  0.009104550  1.6809548809

1 个答案:

答案 0 :(得分:1)

我不知道这是否是最好的方式,但我会使用map2()interaction()进行一些灵活的准备工作:

preds<-c("disp", "drat", "wt")
outs <- c("mpg", "qsec") # new vector

# probably start function wrap here

# see how many model objects we need based on above
total_models <- interaction(outs, preds, sep = " ~ ") %>% levels() %>%
    interaction(., unique(mtcars$cyl)) %>% levels() %>% # ignore warning
    length()

# set up for map2()
formulas <- interaction(outs, preds, sep = " ~ ") %>% levels() %>%
    rep(times = total_models / length(.)) # ignore warning
groups <- split(mtcars, mtcars$cyl) %>%
    rep(each = total_models / length(.))

# use names for map's .id
names(formulas) <- formulas
#groups is names nicely by defualt split() behavior

# you could also start wrap here
map2(formulas, groups, ~ lm(.x, data = .y)) %>% # list of models here
    map_df(~ tidy(., conf.int = T) %>% # no more do()
               filter(term %in% preds), .id = "outcome") %>% # one big df here
    mutate(cyl = names(groups), # add cyl back in; polish for consistency
           outcome = gsub(" ~.*", "", outcome)) %>% 
    select(cyl, everything(), -(std.error:p.value)) %>%
    arrange(outcome, term, cyl)

   cyl outcome term     estimate     conf.low     conf.high
1    4     mpg disp -0.135141815 -0.210181205 -0.0601024237
2    6     mpg disp  0.003605119 -0.036385718  0.0435959552
3    8     mpg disp -0.019634095 -0.039931754  0.0006635639
4    4     mpg drat  5.235016267 -3.190973592 13.6610061249
5    6     mpg drat  0.350268953 -3.136696100  3.8372340053
6    8     mpg drat  0.329543608 -3.989751201  4.6488384177
7    4     mpg   wt -5.647025261 -9.832284144 -1.4617663781
8    6     mpg   wt -2.780105939 -6.211620095  0.6514082171
9    8     mpg   wt -2.192437926 -3.803102076 -0.5817737772
10   4    qsec disp  0.020522320 -0.024081106  0.0651257460
11   6    qsec disp  0.032395786  0.003380046  0.0614115258
12   8    qsec disp  0.003443553 -0.007442996  0.0143301028
13   4    qsec drat -1.304473000 -4.633470581  2.0245245810
14   6    qsec drat -2.234114977 -5.457932913  0.9897029580
15   8    qsec drat -2.645047137 -3.791162337 -1.4989319372
16   4    qsec   wt  1.884663596  0.169516461  3.5998107312
17   6    qsec   wt  4.147883561  1.394030756  6.9017363651
18   8    qsec   wt  0.845029716  0.009104550  1.6809548809