Question

我有一个包含310个不同列的大数据框，在这里我创建了一个类似的小示例。我会选择使用for循环的“ sp ...”列。

`ex <-data.frame(ID=c(1,2,3,4,5,6,7,8,9,10),
            COD=c(1,8,4,5,6,8,7,2,8, 10),
            SP=c(10, 20, 40,50,60, 70,70,100,50, 40),
            sp010_hd=c(100,200,350,500,500,250,240, 680, 700, 300),
            sp300_he=c(100,500,650,500,500,250,260, 480, 700, 300),
            sp330_hg=c(100,500,650,500,500,250,260, 480, 700, 300),
            sp330_cc=c(100,500,650,500,500,250,260, 480, 700, 300),
            hd_bio_4=c(208.92,248.10,151.95, 130.01,90.01, 
            140.01,150.09,400.25, 300.00,100.50),
            hd_bio_6=c(207.92,208.10,161.95, 170.01,190.01, 
            120.01,155.09,200.25, 100.00,100.50),
            hd_bio_7=c(227.92,218.10,161.95, 170.01,150.01, 
            150.71,160.09,220.25, 120.00,108.50),
            he_bio_4=c(208.92,248.10,151.95, 140.01,60.01, 
            160.01,157.09,420.25, 300.00,100.50),
            he_bio_6=c(257.92,238.10,131.95, 130.01,160.01, 
            125.01,155.09,220.25, 100.00,100.50),
            he_bio_7=c(227.92,218.10,161.95, 130.01,150.01, 
            100.71,165.09,220.25, 120.00,108.50)))`

`

对于每列，我想应用lm（）函数。我尝试了一些尝试，但没有成功。谁能帮我？谢谢

Answer 1

您可以使用purrr::map：

library(tidyverse)

ivs <- colnames(ex %>% select(-ID, -COD, -SP)) 
names(ivs) <- ivs

ivs %>% purrr::map_dfr(~lm(ex$SP ~ ex[,.x]) %>% broom::tidy(), .id = "iv")

注意：如果我们将term列设置为实际报告使用的变量，则输出更具可读性。为此，请在对data的调用中使用lm()参数：

ivs %>% map_dfr(~lm(SP ~ ., data = ex[,c(.x, "SP")]) %>% broom::tidy(), .id = "iv")

输出：

# A tibble: 20 x 6
   iv       term        estimate std.error statistic p.value
   <chr>    <chr>          <dbl>     <dbl>     <dbl>   <dbl>
 1 sp010_hd (Intercept)  22.0      15.5        1.42   0.192 
 2 sp010_hd sp010_hd      0.0758    0.0362     2.10   0.0694
 3 sp300_he (Intercept)  42.6      22.1        1.93   0.0896
 4 sp300_he sp300_he      0.0199    0.0479     0.415  0.689 
 5 sp330_hg (Intercept)  42.6      22.1        1.93   0.0896
 6 sp330_hg sp330_hg      0.0199    0.0479     0.415  0.689 
 7 sp330_cc (Intercept)  42.6      22.1        1.93   0.0896
 8 sp330_cc sp330_cc      0.0199    0.0479     0.415  0.689 
 9 hd_bio_4 (Intercept)  36.7      19.1        1.92   0.0913
10 hd_bio_4 hd_bio_4      0.0742    0.0897     0.828  0.432 
11 hd_bio_6 (Intercept)  65.0      35.9        1.81   0.108 
12 hd_bio_6 hd_bio_6     -0.0866    0.216     -0.401  0.699 
13 hd_bio_7 (Intercept)  65.5      38.2        1.72   0.124 
14 hd_bio_7 hd_bio_7     -0.0860    0.220     -0.390  0.707 
15 he_bio_4 (Intercept)  36.0      18.2        1.98   0.0833
16 he_bio_4 he_bio_4      0.0772    0.0831     0.929  0.380 
17 he_bio_6 (Intercept)  68.6      26.7        2.57   0.0332
18 he_bio_6 he_bio_6     -0.109     0.156     -0.695  0.507 
19 he_bio_7 (Intercept)  67.8      31.6        2.15   0.0640
20 he_bio_7 he_bio_7     -0.105     0.190     -0.553  0.596

Answer 2

使用dplyr包和starts_with()函数可以轻松地选择带有“ sp”前缀的列：

ex <-data.frame(ID=c(1,2,3,4,5,6,7,8,9,10),
               COD=c(1,8,4,5,6,8,7,2,8, 10),
               SP=c(10, 20, 40,50,60, 70,70,100,50, 40),
               sp010_hd=c(100,200,350,500,500,250,240, 680, 700, 300),
               sp300_he=c(100,500,650,500,500,250,260, 480, 700, 300),
               sp330_hg=c(100,500,650,500,500,250,260, 480, 700, 300),
               sp330_cc=c(100,500,650,500,500,250,260, 480, 700, 300))


library(dplyr)
select(ex, starts_with("sp"))

如果您想扩展它以对每个变量进行回归，则可以像这样使用lapply()或purrr::map()：

#Use gather to get all the prefixes in one column with values in another,
#then use map() to do a linear model on each group
ex %>% 
  gather(key = sp_number, value = whatever_units, starts_with("sp", ignore.case = FALSE)) %>% 
  group_by(sp_number) %>% 
  nest() %>% 
  mutate(lm_results = map(data, ~lm(whatever_units ~ SP, data = .)),
         tidy_results = map(lm_results, broom::tidy)) %>% 
  unnest(tidy_results)

如何在for循环中选择列

2 个答案: