如何使用扫帚套件R

时间:2020-10-23 18:11:49

标签: r tidyverse

我正在尝试Hadley Wickham在此视频中描述的整洁方法:https://www.youtube.com/watch?v=rz3_FDVt9eg&t=1902s。只要经过整理的数据帧中只有1行,就可以通过这种方式直接获取某些统计信息,但是每个线性模型的斜率都将埋在使用Broom制作的经过整理的数据帧的第二行中。我的代码与Hadley的代码非常相似,看起来像这样。

    library(tidyverse)
    corn_by_county <- corn_final_long %>% group_by(County) %>% nest()
    
    # define & run linear models for each county
    corn_county <- function(df){
      lm(Yield ~ Year, data = df)}
    
    corn_models <- corn_by_county %>% mutate(model = map(data, corn_county))
    corn_output <- corn_models %>% mutate(tidy = map(model, broom::tidy),
                                          glance = map(model, broom::glance),
                                          augment = map(model, broom::augment),
                                          rsq = glance %>% map_dbl('r.squared'),
                                          slope = tidy %>% map_dbl('estimate')) ## slope not working

“斜率”位于“ corn_output”的“ tidy”列中嵌套的“ tidy”数据框的第二行。我已经尝试过此代码

slope = tidy %>% filter(term == 'Year') %>% map_dbl('estimate')

但是,这不起作用。如何提取斜率?谢谢。

这是我的数据样本。

corn_final_long <- structure(list(Year = c(1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1980L, 1980L, 1980L, 
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 
1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 
1980L, 1980L, 1980L, 1980L), County = c("Aurora", "Beadle", "Bennett", 
"Bon Homme", "Brookings", "Brown", "Brule", "Buffalo", "Butte", 
"Campbell", "Charles Mix", "Clark", "Clay", "Codington", "Corson", 
"Custer", "Davison", "Day", "Deuel", "Dewey", "Douglas", "Edmunds", 
"Fall River", "Faulk", "Grant", "Gregory", "Haakon", "Hamlin", 
"Hand", "Hanson", "Harding", "Hughes", "Hutchinson", "Hyde", 
"Jackson", "Jerauld", "Jones", "Kingsbury", "Lake", "Lawrence", 
"Lincoln", "Lyman", "Marshall", "Mccook", "Mcpherson", "Meade", 
"Mellette", "Miner", "Minnehaha", "Moody", "Oglala Lakota", "Pennington", 
"Perkins", "Potter", "Roberts", "Sanborn", "Spink", "Stanley", 
"Sully", "Todd", "Tripp", "Turner", "Union", "Walworth", "Yankton", 
"Ziebach", "Aurora", "Beadle", "Bennett", "Bon Homme", "Brookings", 
"Brown", "Brule", "Buffalo", "Butte", "Campbell", "Charles Mix", 
"Clark", "Clay", "Codington", "Corson", "Custer", "Davison", 
"Day", "Deuel", "Dewey", "Douglas", "Edmunds", "Fall River", 
"Faulk", "Grant", "Gregory", "Haakon", "Hamlin", "Hand", "Hanson", 
"Harding", "Hughes", "Hutchinson", "Hyde"), Yield = c(47.3, 58.9, 
103.8, 71.4, 71.7, 65.3, 53.9, 72.8, 84.8, 61, 59, 63.4, 92.4, 
75.2, 41, 94.4, 62.7, 63.6, 74, 47.7, 57.7, 51.5, 102.1, 57.6, 
72.4, 58, 39.1, 68.2, 68.6, 66, 73.3, 85, 78.8, 52.7, 45, 40.9, 
76.7, 63.6, 80.6, 85, 96.3, 87, 65.8, 74.2, 55.9, 78.8, 47.8, 
66.2, 92.6, 93.1, 60, 62.9, 53.5, 60.2, 70.5, 64.8, 68.9, 60, 
59, 94.8, 42.2, 89.5, 105.1, 68.4, 78.9, 45, 25.4, 35.8, 43.5, 
27.3, 63.2, 46, 32.3, NA, 83.3, 80.8, 34.2, 53.8, 68.1, 66.2, 
16, 100, 26.3, 44.5, 70.6, 16.7, 27.2, 29.2, 93.7, 33.5, 64.4, 
30.9, 30, 60.1, 30.7, 34.5, NA, 41.1, 38.9, 28.2)), row.names = c(NA, 
-100L), class = c("tbl_df", "tbl", "data.frame"))

2 个答案:

答案 0 :(得分:1)

您可以只选择broom输出的“估计”列的第二个元素:

corn_output <- corn_models %>% 
               mutate(tidy = map(model, broom::tidy),
                      glance = map(model, broom::glance),
                      augment = map(model, broom::augment),
                      rsq = glance %>% map_dbl('r.squared'),
                      slope = tidy %>% map_dbl(function(x) x$estimate[2]))

答案 1 :(得分:1)

仅供参考,一种使用filterpull来获取斜率的方法:

library(tidyverse)
library(broom)

corn_by_county <- corn_final_long %>% group_by(County) %>% nest()

# define & run linear models for each county
corn_county <- function(df){
  lm(Yield ~ Year, data = df)}

corn_models <- corn_by_county %>% mutate(model = map(data, corn_county))
corn_output <- corn_models %>% mutate(tidy = map(model, broom::tidy),
                                      glance = map(model, broom::glance),
                                      augment = map(model, broom::augment),
                                      rsq = glance %>% map_dbl('r.squared'),
                                      slope = tidy %>% map_dbl(~ filter(.x, term == "Year") %>% pull(estimate))) ## slope not working

head(corn_output)
#> # A tibble: 6 x 8
#> # Groups:   County [6]
#>   County   data         model tidy        glance       augment        rsq  slope
#>   <chr>    <list>       <lis> <list>      <list>       <list>       <dbl>  <dbl>
#> 1 Aurora   <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1 -21.9 
#> 2 Beadle   <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1 -23.1 
#> 3 Bennett  <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1 -60.3 
#> 4 Bon Hom~ <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1 -44.1 
#> 5 Brookin~ <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1  -8.50
#> 6 Brown    <tibble [2 ~ <lm>  <tibble [2~ <tibble [1 ~ <tibble [2 ~     1 -19.3