从嵌套数据系数中为每个图形绘制一条回归线

时间:2019-02-15 19:35:32

标签: r ggplot2 tidyr

我已经在嵌套的小块上添加了一些绘图代码以绘制两行,其中一条使用goem_smooth,另一条使用geom_abline使用回归系数绘制(意图是嵌套小块中的每一行记录的一条平滑线和一条回归线)。< / p>

geo_smooth中的行正常工作(行记录中每组数据的唯一平滑线)。但是geom_abline为每个记录向每个图形添加了一条回归线。在这种情况下,我有66行记录,因此每个图都用66条不同的回归线进行绘制。这是将gg代码添加到嵌套标题的代码:

> by_watershed_figs
# A tibble: 66 x 7
   loc_major_basin watershed_data censmodel watershed_pvalue watershed_intercept watershed_slope `map(...)`
             <int> <list> <list> <dbl> <dbl> <dbl> <list>    
 1 7080 <tibble [2,562 x 18]>  <S3: survreg> 0. -171. 0.867 <S3: gg>  
 2 7040 <tibble [12,512 x 18]> <S3: survreg> 0. -719. 0.379 <S3: gg>  
 3 7010 <tibble [3,669 x 18]> <S3: survreg> 0. -233. .18  <S3: gg>  
 4 9020 <tibble [839 x 18]> <S3: survreg> 0. -425. 2.13  <S3: gg>  
 5 9021 <tibble [1,658 x 18]> <S3: survreg> 0. 933. -.46 <S3: gg>  
 6 9022 <tibble [1,10 x 18]> <S3: survreg> 1.44e -258. .30  <S3: gg>  
 7 7030 <tibble [3,619 x 18]> <S3: survreg> 0. -2499. 1.27  <S3: gg>  
 8 4010 <tibble [857 x 18]> <S3: survreg> 0. 3496. -.71  <S3: gg>  
 9 7041 <tibble [11,353 x 18]> <S3: survreg> 0. -995. .512 <S3: gg>  
10 7020 <tibble [8,076 x 18]> <S3: survreg> 2.08 -329. .185 <S3: gg>  
# ... with 56 more rows
> # make some figures

这是将绘图代码添加到嵌套标题的代码

# make some figures
by_watershed_figs <- by_watershed_full %>% 
  mutate(
    map(watershed_data, ~ ggplot(., aes(x = year, y = combined_stube_conv100_conv60)) + 
          geom_point(alpha = 0.4, size = 0.25) + 
          labs(x = 'Year', 
               y = 'Clarity (cm)',
               title = 'Minnesota stream Secchi tube measures',
               subtitle = loc_major_basin) + 
          ylim(0, 130) +
          geom_smooth(se = TRUE, color = 'purple') +
          geom_abline(intercept = watershed_intercept, slope = watershed_slope, color = 'orange')
    )
  )

在测试中,我可以使用[[[1]]这样在第一行记录中建立单行购买索引:

 geom_abline(intercept = watershed_intercept[[1]], slope = watershed_slope[[1]], color = 'orange')

但是,正如预期的那样,这将为所有66个图形增加一个类似于第一条记录数据的回归,而不是从每行记录的数据中恢复。

添加带有虚拟数据的可复制示例:

library(tidyverse)
library(lubridate)
library(gam)
library(broom)
library(purrr)

tb <- structure(list(
  dt = structure(c(
    14245, 14276, 14304, 14335, 14365,
    14396, 14426, 14457, 14488, 14518, 14549, 14579, 14610, 14641,
    14669, 14700, 14730, 14761, 14791, 14822, 14853, 14883, 14914,
    14944, 14975, 15006, 15034, 15065, 15095, 15126, 15156, 15187,
    15218, 15248, 15279, 15309, 15340, 15371, 15400, 15431, 15461,
    15492, 15522, 15553, 15584, 15614, 15645, 15675, 15706, 15737,
    15765, 15796, 15826, 15857, 15887, 15918, 15949, 14245, 14276,
    14304, 14335, 14365, 14396, 14426, 14457, 14488, 14518, 14549,
    14579, 14610, 14641, 14669, 14700, 14730, 14761, 14791, 14822,
    14853, 14883, 14914, 14944, 14975, 15006, 15034, 15065, 15095,
    15126, 15156, 15187, 15218, 15248, 15279, 15309, 15340, 15371,
    15400, 15431, 15461, 15492, 15522, 15553, 15584, 15614, 15645,
    15675, 15706, 15737, 15765, 15796, 15826, 15857, 15887, 15918,
    15949
  ), class = "Date"),
  averagetemperature = c(
    -4.299, 1.454, 4.808, 7.623, 12.627, 17.305, 19.792, 21.724,
    19.502, 11.22, 10.261, 1.563, -0.595, 0.771, 6.489, 10.935,
    13.803, 19.055, 24.106, 24.948, 19.229, 14.582, 8.539, -0.071,
    -1.582, 0.276, 3.474, 7.383, 12.133, 18.011, 24.412, 23.414,
    18.331, 13.837, 9.555, 5.327, 2.67, 3.698, 12.145, 8.383, 14.956,
    19.532, 25.909, 22.778, 18.693, 12.229, 7.27, 5.592, 1.056, -0.509,
    1.323, 6.644, 13.734, 17.913, 21.914, 22.23, 19.977, -5.36, -0.372,
    3.579, 10.478, 15.447, 19.058, 21.103, 22.769, 17.043, 10.364, 8.217,
    -0.624, -2.359, -1.456, 6.715, 12.076, 17.119, 21.943, 24.789,
    22.67, 19.172, 11.911, 5.876, -2.165, -4.463, -1.244, 3.474,
    10.555, 16.917, 21.032, 24.564, 22.13, 19.301, 12.001, 8.013,
    2.987, -0.0410000000000004, 2.185, 8.734, 10.324, 17.779, 20.165,
    24.479, 22.731, 18.177, 12.436, 4.103, 2.586, -0.968, -1.365,
    2.518, 9.723, 15.544, 20.892, 24.722, 21.001, 17.408
  ),
  averagetemperatureuncertainty = c(
    0.336,
    0.328, 0.247, 0.348, 0.396, 0.554, 0.481, 0.315, 0.225, 0.162,
    0.372, 0.348, 0.348, 0.364, 0.357, 0.538, 0.892, 0.33, 0.325,
    0.36, 0.322, 0.241, 0.307, 0.326, 0.522, 0.446, 0.279, 0.265,
    0.733, 0.773, 0.255, 0.404, 0.173, 0.154, 0.334, 0.483, 0.727,
    0.567, 0.369, 0.347, 0.835, 0.519, 0.516, 0.42, 0.329, 0.333,
    0.263, 0.537, 0.528, 0.473, 0.275, 0.462, 0.863, 0.669, 0.322,
    0.373, 1.033, 0.288, 0.214, 0.14, 0.259, 0.267, 0.452, 0.348,
    0.277, 0.22, 0.153, 0.181, 0.228, 0.314, 0.319, 0.235, 0.135,
    0.2, 0.387, 0.28, 0.257, 0.165, 0.154, 0.174, 0.436, 0.355, 0.33,
    0.167, 0.222, 0.312, 0.42, 0.438, 0.163, 0.16, 0.23, 0.298, 0.466,
    0.493, 0.253, 0.276, 0.258, 0.301, 0.39, 0.403, 0.224, 0.269,
    0.344, 0.298, 0.257, 0.29, 0.241, 0.255, 0.355, 0.281, 0.273,
    0.279, 0.323, 1.048
  ), city = c(
    "Chicago", "Chicago", "Chicago",
    "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
    "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
    "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
    "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
    "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
    "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
    "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
    "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
    "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
    "New York", "New York", "New York", "New York", "New York", "New York",
    "New York", "New York", "New York", "New York", "New York", "New York",
    "New York", "New York", "New York", "New York", "New York", "New York",
    "New York", "New York", "New York", "New York", "New York", "New York",
    "New York", "New York", "New York", "New York", "New York", "New York",
    "New York", "New York", "New York", "New York", "New York", "New York",
    "New York", "New York", "New York", "New York", "New York", "New York",
    "New York", "New York", "New York", "New York", "New York", "New York",
    "New York", "New York", "New York", "New York", "New York", "New York",
    "New York", "New York", "New York"
  ), country = c(
    "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States", "United States", "United States", "United States",
    "United States"
  ), latitude = c(
    "42.59N", "42.59N", "42.59N",
    "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
    "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
    "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
    "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
    "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
    "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
    "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
    "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "40.99N", "40.99N",
    "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
    "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
    "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
    "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
    "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
    "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
    "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
    "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N"
  ),
  longitude = c(
    "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
    "87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
    "87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
    "87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
    "87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
    "87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
    "87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
    "87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
    "87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
    "87.27W", "87.27W", "87.27W", "87.27W", "74.56W", "74.56W",
    "74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
    "74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
    "74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
    "74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
    "74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
    "74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
    "74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
    "74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
    "74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
    "74.56W"
  )
), row.names = c(NA, -114L), class = c(
  "tbl_df",
  "tbl", "data.frame"
), spec = structure(list(cols = list(
  dt = structure(list(
    format = ""
  ), class = c("collector_date", "collector")),
  AverageTemperature = structure(list(), class = c(
    "collector_double",
    "collector"
  )), AverageTemperatureUncertainty = structure(list(), class = c(
    "collector_double",
    "collector"
  )), City = structure(list(), class = c(
    "collector_character",
    "collector"
  )), Country = structure(list(), class = c(
    "collector_character",
    "collector"
  )), Latitude = structure(list(), class = c(
    "collector_character",
    "collector"
  )), Longitude = structure(list(), class = c(
    "collector_character",
    "collector"
  ))
), default = structure(list(), class = c(
  "collector_guess",
  "collector"
))), class = "col_spec"))

# Load the data from dput() and take a look
summary(tb)
str(tb)
names(tb)

# make lowercasae
tb <- rename_all(tb, tolower)
names(tb)

# nest data, 100 major cities
by_city_month <- tb %>% 
  filter(year(dt) >= 1900) %>%
  mutate(month = month(dt)) %>%
  mutate(yr1900 = year(dt) - 1900) %>%
  group_by(city, country, month) %>%
  nest()

by_city_month

# define function for linear model
city_model_lm <- function(df) {
  lm(averagetemperature ~ yr1900, data = df)
}

# create columns for the models
cmodels <- by_city_month %>%
  mutate(model = map(data, city_model_lm)
  )

# add tidy and glance to list
cmodels <- cmodels %>% 
  mutate(tidy = map(model, tidy),
         glance = map(model, glance)
         )

# unnest glance list
cmodels_g <- cmodels %>%
  unnest(glance) %>%
  select(city, country, month, data, model, p.value)
cmodels_g

# unnest and spread the tidy list [4x7] into 28 rows for each watershed
cmodels_t <- cmodels %>%
  unnest(tidy) %>%
  select(city, country, month, term, estimate) %>%
  spread(key = term, value = estimate) %>%
  select(city, country, month, `(Intercept)`, yr1900)
cmodels_t

# join tables to get pvalues and slopes in a single table, rename variables to
# make jointing with stream table easier to follow
cmodels_all <- left_join(cmodels_g, cmodels_t) %>%
  rename(intercept = `(Intercept)`, slope = yr1900)
cmodels_all

# add ggplot to list
cmodels_figs <- cmodels_all %>%
  mutate(
    map(data, ~ ggplot(., aes(x = yr1900, y = averagetemperature)) +
          geom_point() +
          ylab('Average Temperature') +
          xlab('Years past 1900') +
          geom_smooth(se = TRUE, color = 'purple') +
          geom_abline(intercept = intercept, slope = slope, color = 'orange') +
          ggtitle(label = city, subtitle = month)
    )
  ) %>%
  rename(plots =`map(...)`)

# draw figures
cmodels_figs$plots

0 个答案:

没有答案