我想从tbl索引值,该tbl由一些常规列和一些小标题列表组成。基本结构是这样:
# A tibble: 1,200 x 13
city country month data model modelgam glance_lm rsq glance_gam aic_gam tidy_lm augment_lm res
<chr> <chr> <dbl> <list> <list> <list> <list> <dbl> <list> <dbl> <list> <list> <list>
1 Abidjan Côte D'Ivoire 1 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.233 <tibble [1 × 6]> 154. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
2 Abidjan Côte D'Ivoire 2 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.342 <tibble [1 × 6]> 164. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
3 Abidjan Côte D'Ivoire 3 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.274 <tibble [1 × 6]> 152. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
4 Abidjan Côte D'Ivoire 4 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.376 <tibble [1 × 6]> 145. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
5 Abidjan Côte D'Ivoire 5 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.451 <tibble [1 × 6]> 112. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
6 Abidjan Côte D'Ivoire 6 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.419 <tibble [1 × 6]> 87.4 <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
7 Abidjan Côte D'Ivoire 7 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.249 <tibble [1 × 6]> 129. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
8 Abidjan Côte D'Ivoire 8 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.228 <tibble [1 × 6]> 153. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
9 Abidjan Côte D'Ivoire 9 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.230 <tibble [1 × 6]> 124. <tibble [2 × 5]> <tibble [113 × 9]> <dbl [113]>
10 Abidjan Côte D'Ivoire 10 <tibble [113 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.292 <tibble [1 × 6]> 98.7 <tibble [2 × 5]> <tibble [113 × 9]> <dbl [113]>
# ... with 1,190 more rows
例如,如果要从glance_lm中提取p.value作为第一条记录,则可以使用双括号(因为它是一个列表),如下所示:
> cmodels_details$glance_lm[[1]]$p.value
[1] 5.430896e-08
但是,如果我想要所有的p.values,这将不起作用:
> cmodels_details$glance_lm[[]]$p.value
NULL
就我而言,我特别想将这两个模型的估计值从它们的列表中拉出,并在每条记录的图形上绘制回归线。因此,能够像这样抓取截距和坡度(一次记录一条记录):
cmodels_details$tidy_lm[[1]]$estimate
[1] 26.379383982 0.007833912
或只是拦截:
> cmodels_details$tidy_lm[[1]]$estimate[1]
[1] 26.37938
不是很有帮助。最终,我想在数据结构中添加一些ggplot代码,为每个记录绘制一个图形,但是为了对两个模型(lm和gam)做出回归线,我需要从每个记录的小标题中获取估算值(或小标题列表)
这是我正在使用的数据的一部分,以及使代码正确显示的代码。
> dput(small_tb)
structure(list(dt = structure(c(14245, 14276, 14304, 14335, 14365,
14396, 14426, 14457, 14488, 14518, 14549, 14579, 14610, 14641,
14669, 14700, 14730, 14761, 14791, 14822, 14853, 14883, 14914,
14944, 14975, 15006, 15034, 15065, 15095, 15126, 15156, 15187,
15218, 15248, 15279, 15309, 15340, 15371, 15400, 15431, 15461,
15492, 15522, 15553, 15584, 15614, 15645, 15675, 15706, 15737,
15765, 15796, 15826, 15857, 15887, 15918, 15949, 14245, 14276,
14304, 14335, 14365, 14396, 14426, 14457, 14488, 14518, 14549,
14579, 14610, 14641, 14669, 14700, 14730, 14761, 14791, 14822,
14853, 14883, 14914, 14944, 14975, 15006, 15034, 15065, 15095,
15126, 15156, 15187, 15218, 15248, 15279, 15309, 15340, 15371,
15400, 15431, 15461, 15492, 15522, 15553, 15584, 15614, 15645,
15675, 15706, 15737, 15765, 15796, 15826, 15857, 15887, 15918,
15949), class = "Date"), averagetemperature = c(-4.299, 1.454,
4.808, 7.623, 12.627, 17.305, 19.792, 21.724, 19.502, 11.22,
10.261, 1.563, -0.595, 0.771, 6.489, 10.935, 13.803, 19.055,
24.106, 24.948, 19.229, 14.582, 8.539, -0.071, -1.582, 0.276,
3.474, 7.383, 12.133, 18.011, 24.412, 23.414, 18.331, 13.837,
9.555, 5.327, 2.67, 3.698, 12.145, 8.383, 14.956, 19.532, 25.909,
22.778, 18.693, 12.229, 7.27, 5.592, 1.056, -0.509, 1.323, 6.644,
13.734, 17.913, 21.914, 22.23, 19.977, -5.36, -0.372, 3.579,
10.478, 15.447, 19.058, 21.103, 22.769, 17.043, 10.364, 8.217,
-0.624, -2.359, -1.456, 6.715, 12.076, 17.119, 21.943, 24.789,
22.67, 19.172, 11.911, 5.876, -2.165, -4.463, -1.244, 3.474,
10.555, 16.917, 21.032, 24.564, 22.13, 19.301, 12.001, 8.013,
2.987, -0.0410000000000004, 2.185, 8.734, 10.324, 17.779, 20.165,
24.479, 22.731, 18.177, 12.436, 4.103, 2.586, -0.968, -1.365,
2.518, 9.723, 15.544, 20.892, 24.722, 21.001, 17.408), averagetemperatureuncertainty = c(0.336,
0.328, 0.247, 0.348, 0.396, 0.554, 0.481, 0.315, 0.225, 0.162,
0.372, 0.348, 0.348, 0.364, 0.357, 0.538, 0.892, 0.33, 0.325,
0.36, 0.322, 0.241, 0.307, 0.326, 0.522, 0.446, 0.279, 0.265,
0.733, 0.773, 0.255, 0.404, 0.173, 0.154, 0.334, 0.483, 0.727,
0.567, 0.369, 0.347, 0.835, 0.519, 0.516, 0.42, 0.329, 0.333,
0.263, 0.537, 0.528, 0.473, 0.275, 0.462, 0.863, 0.669, 0.322,
0.373, 1.033, 0.288, 0.214, 0.14, 0.259, 0.267, 0.452, 0.348,
0.277, 0.22, 0.153, 0.181, 0.228, 0.314, 0.319, 0.235, 0.135,
0.2, 0.387, 0.28, 0.257, 0.165, 0.154, 0.174, 0.436, 0.355, 0.33,
0.167, 0.222, 0.312, 0.42, 0.438, 0.163, 0.16, 0.23, 0.298, 0.466,
0.493, 0.253, 0.276, 0.258, 0.301, 0.39, 0.403, 0.224, 0.269,
0.344, 0.298, 0.257, 0.29, 0.241, 0.255, 0.355, 0.281, 0.273,
0.279, 0.323, 1.048), city = c("Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York"), country = c("United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States"), latitude = c("42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N"),
longitude = c("87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W")), row.names = c(NA, -114L), class = c("tbl_df",
"tbl", "data.frame"), spec = structure(list(cols = list(dt = structure(list(
format = ""), class = c("collector_date", "collector")),
AverageTemperature = structure(list(), class = c("collector_double",
"collector")), AverageTemperatureUncertainty = structure(list(), class = c("collector_double",
"collector")), City = structure(list(), class = c("collector_character",
"collector")), Country = structure(list(), class = c("collector_character",
"collector")), Latitude = structure(list(), class = c("collector_character",
"collector")), Longitude = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector"))), class = "col_spec"))
和一些代码:
# load libraries
library(tidyverse)
library(lubridate)
library(gam)
# library(broom)
# library(purrr)
# Load the data from dput() and take a look
summary(gltd)
str(gltd)
names(gltd)
# make lowercasae
gltd <- rename_all(gltd, tolower)
names(gltd)
# nest data, 100 major cities
by_city_month <- gltd %>%
filter(year(dt) >= 1900) %>%
mutate(month = month(dt)) %>%
mutate(yr1900 = year(dt) - 1900) %>%
group_by(city, country, month) %>%
nest()
by_city_month
# define function for linear model
city_model_lm <- function(df) {
lm(averagetemperature ~ yr1900, data = df)
}
# define function for GAM
city_model_gam <- function(df) {
gam(averagetemperature ~ s(yr1900), data = df)
}
# create columns for the models
cmodels <- by_city_month %>%
mutate(model = map(data, city_model_lm),
modelgam = map(data, city_model_gam)
)
cmodels_details <- cmodels %>%
mutate(
glance_lm = model %>% map(glance), #model summary: rsquared...
rsq = glance_lm %>% map_dbl("r.squared"), #extract rsquared
glance_gam = modelgam %>% map(broom::glance), #GAM model summary
aic_gam = glance_gam %>% map_dbl("AIC"), #extract AIC
tidy_lm = model %>% map(tidy), #model estimate: coeff...
augment_lm = model %>% map(augment), #observation stats: resid,hat...
res = augment_lm %>% map(".resid") #extract resid
)
任何建议将不胜感激。
答案 0 :(得分:1)
这是一个tidyverse
解决方案,可以为您提供所需的内容:
library(tidyverse)
cmodels_details %>%
transmute(
city = city,
month = month,
p_val = map_dbl(glance_lm, "p.value"),
coefs = map(tidy_lm, "estimate")
) %>%
unnest(coefs) %>%
mutate(x = rep(c("intercept", "x1"), nrow(.) / 2))) %>%
spread(x, coefs)
# A tibble: 24 x 5
# city month p_val intercept x1
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 Chicago 1 0.0790 -156. 1.40
# 2 Chicago 2 0.875 12.2 -0.0999
# 3 Chicago 3 0.935 20.2 -0.131
# 4 Chicago 4 0.468 58.3 -0.451
# 5 Chicago 5 0.411 -23.9 0.337
# 6 Chicago 6 0.630 -0.429 0.169
# 7 Chicago 7 0.505 -43.9 0.605
# 8 Chicago 8 0.814 35.9 -0.116
# 9 Chicago 9 0.872 14.6 0.0414
#10 Chicago 10 0.807 -12.2 0.228
# ... with 14 more rows
答案 1 :(得分:0)
作为对以下内容的回应:“最终,我想向数据结构中添加一些ggplot代码,以便为每个记录绘制一个图形,但是为了对两个模型(lm和gam)制作回归线,我需要从每个记录的小标题(或小标题列表)中获取估计值”,请尝试geom_smooth()
。您无需拟合所有模型即可绘制LM和GAM模型!希望这对您有所帮助!
by_city_month %>% unnest() %>% # your data from above
ggplot(aes(x = yr1900, y = averagetemperature, group = interaction(city,month))) +
geom_smooth(method = "lm", se = F, color = "black", alpha = .7) +
geom_smooth(method = "gam", se = F, color = "pink", alpha = .7, size = .5, linetype = 2)