运行回归并将模型估计提取到R中的数据框

时间:2018-05-14 14:30:25

标签: r linear-regression

我有3个曝光变量x1-x3,10个结果变量y1-y10和3个协变量cv1-cv3

我想对针对所有协变量调整的每次曝光的每个结果进行回归。然后我想要模型估计,即放置在数据帧中的β,SE,p值。有没有办法在R中自动执行此操作。谢谢!

我想要运行的模型如下所示:

y1 ~ x1+cv1+cv2+cv3 ... y10 ~ x1+cv1+cv2+cv3

y1 ~ x2+cv1+cv2+cv3 ... y10 ~ x2+cv1+cv2+cv3

y1 ~ x3+cv1+cv2+cv3 ... y10 ~ x3+cv1+cv2+cv3

1 个答案:

答案 0 :(得分:0)

没有数据和可重复的示例,很难帮助您,但这是模拟数据的示例。首先,创建一个名为data的虚假数据集:

library(tidyverse)

make_df <- function(y_i) {
  data_frame(y_var = y_i, y_i = rnorm(100),
              x1 = rnorm(100),  x2 = rnorm(100),  x3 = rnorm(100),
             cv1 = runif(100), cv2 = runif(100), cv3 = runif(100))
}

ys <- paste0("Y_", sprintf("%02d", 1:10))
ys
#>  [1] "Y_01" "Y_02" "Y_03" "Y_04" "Y_05" "Y_06" "Y_07" "Y_08" "Y_09" "Y_10"

data <-
ys %>%
  map_dfr(make_df)

data
#> # A tibble: 1,000 x 8
#>    y_var    y_i      x1      x2      x3    cv1     cv2    cv3
#>    <chr>  <dbl>   <dbl>   <dbl>   <dbl>  <dbl>   <dbl>  <dbl>
#>  1 Y_01   0.504  0.892  -0.806  -1.56   0.145  0.436   0.701 
#>  2 Y_01   0.967  1.24   -1.19    0.920  0.866  0.00100 0.567 
#>  3 Y_01  -0.824 -0.729  -0.0855 -1.06   0.0665 0.780   0.471 
#>  4 Y_01   0.294  2.37   -0.514  -0.955  0.397  0.0462  0.209 
#>  5 Y_01  -0.893  0.0298  0.0369  0.0787 0.640  0.709   0.0485
#>  6 Y_01   0.670 -0.347   1.56    2.11   0.843  0.542   0.793 
#>  7 Y_01  -1.59   1.04    0.228   0.573  0.185  0.151   0.558 
#>  8 Y_01  -2.04   0.289  -0.435  -0.113  0.833  0.0898  0.653 
#>  9 Y_01  -0.637  0.818  -0.454   0.606  0.294  0.378   0.315 
#> 10 Y_01  -1.61  -0.628  -2.75    1.06   0.353  0.0863  0.332 
#> # ... with 990 more rows

此时,你有选择权。一种方法是使用group_by%&gt;%do(tidy(*))食谱:

data %>%
  gather(x_var, x_value, -c(y_var, y_i, cv1:cv3)) %>%
  group_by(y_var, x_var) %>%
  do(broom::tidy(lm(y_i ~ x_value + cv1 + cv2 + cv3, data = .)))
#> # A tibble: 150 x 7
#> # Groups:   y_var, x_var [30]
#>    y_var x_var term        estimate std.error statistic p.value
#>    <chr> <chr> <chr>          <dbl>     <dbl>     <dbl>   <dbl>
#>  1 Y_01  x1    (Intercept)  -0.111      0.344   -0.324    0.747
#>  2 Y_01  x1    x_value      -0.0440     0.111   -0.396    0.693
#>  3 Y_01  x1    cv1           0.286      0.372    0.769    0.444
#>  4 Y_01  x1    cv2           0.0605     0.379    0.160    0.873
#>  5 Y_01  x1    cv3          -0.0690     0.378   -0.182    0.856
#>  6 Y_01  x2    (Intercept)  -0.146      0.336   -0.434    0.665
#>  7 Y_01  x2    x_value       0.117      0.105    1.12     0.265
#>  8 Y_01  x2    cv1           0.287      0.362    0.793    0.430
#>  9 Y_01  x2    cv2           0.0564     0.376    0.150    0.881
#> 10 Y_01  x2    cv3           0.0125     0.379    0.0330   0.974
#> # ... with 140 more rows

另一种方法是使用split变量,然后使用map中的purrr函数:

data %>%
  gather(x_var, x_value, -c(y_var, y_i, cv1:cv3)) %>%
  mutate(y_var_x_var = paste0(y_var, x_var)) %>%
  split(.$y_var_x_var) %>%
  map(~ lm(y_i ~ x_value + cv1 + cv2 + cv3, data = .))
#> $Y_01x1
#> 
#> Call:
#> lm(formula = y_i ~ x_value + cv1 + cv2 + cv3, data = .)
#> 
#> Coefficients:
#> (Intercept)      x_value          cv1          cv2          cv3  
#>    -0.11144     -0.04396      0.28585      0.06051     -0.06896  
#> 
#> 
#> $Y_01x2
#> 
#> Call:
#> lm(formula = y_i ~ x_value + cv1 + cv2 + cv3, data = .)
#> 
#> Coefficients:
#> (Intercept)      x_value          cv1          cv2          cv3  
#>    -0.14562      0.11732      0.28726      0.05642      0.01249  
#> 
#> 
# ...and so on...
#> 
#> 
#> $Y_10x2
#> 
#> Call:
#> lm(formula = y_i ~ x_value + cv1 + cv2 + cv3, data = .)
#> 
#> Coefficients:
#> (Intercept)      x_value          cv1          cv2          cv3  
#>    -0.45689     -0.02530      0.61375      0.34377     -0.02357  
#> 
#> 
#> $Y_10x3
#> 
#> Call:
#> lm(formula = y_i ~ x_value + cv1 + cv2 + cv3, data = .)
#> 
#> Coefficients:
#> (Intercept)      x_value          cv1          cv2          cv3  
#>    -0.44423     -0.18377      0.64739      0.27688     -0.02013