Question

我有50个州，从1997年到现在的每周数据看起来像这样：

structure(list(date = c("1/1/2011", "1/8/2011", "1/15/2011", 
"1/22/2011", "1/29/2011", "2/5/2011", "2/12/2011", "2/19/2011", 
"2/26/2011", "3/5/2011", "3/12/2011", "3/19/2011", "3/26/2011", 
"4/2/2011", "4/9/2011"), CT = c(8593L, 14629L, 8084L, 6986L, 
6050L, 6368L, 5408L, 5416L, 6098L, 5260L, 4428L, 3823L, 3808L, 
4366L, 4697L), MA = c(13656L, 14779L, 9462L, 8565L, 9575L, 8548L, 
9248L, 7569L, 11373L, 8891L, 7113L, 6775L, 4524L, 7099L, 7390L
), ME = c(2521L, 3811L, 3239L, 2306L, 2381L, 2000L, 1878L, 1745L, 
1582L, 2008L, 1887L, 1676L, 1707L, 1898L, 1843L), NH = c(3155L, 
2892L, 1983L, 1961L, 1948L, 1596L, 1533L, 1534L, 1905L, 1819L, 
1434L, 1366L, 1288L, 1376L, 1477L), RI = c(3446L, 3159L, 2428L, 
2221L, 2010L, 1891L, 2351L, 2107L, 2883L, 1763L, 1595L, 1434L, 
1094L, 1154L, 1496L), VT = c(2173L, 1547L, 946L, 838L, 883L, 
838L, 704L, 890L, 1230L, 761L, 1019L, 854L, 830L, 848L, 1205L
), NJ = c(18871L, 21451L, 16872L, 15175L, 12138L, 13015L, 12777L, 
10372L, 10528L, 10036L, 9582L, 8551L, 9477L, 9141L, 9750L), NY = c(37620L, 
61983L, 33269L, 27656L, 28151L, 26433L, 26553L, 22283L, 20990L, 
38916L, 21999L, 20928L, 20657L, 21008L, 24132L), PR = c(2961L, 
2437L, 2123L, 3423L, 4364L, 4525L, 4088L, 3765L, 3181L, 2857L, 
3366L, 2985L, 2730L, 2382L, 2720L)), .Names = c("date", "CT", 
"MA", "ME", "NH", "RI", "VT", "NJ", "NY", "PR"), row.names = c(NA, 
-15L), class = c("tbl_df", "tbl", "data.frame"))

我想为过去5年的每个州制作年度图表。这就是我到目前为止所做的事情：

library(tidyverse)
library(lubridate)
require(plotly)

df <- read_csv('icclaims.csv') %>%
  mutate(date = as.Date(date, format = '%m/%e/%Y'),
         doy = as.numeric(format(date, '%j')),
         Year = as.factor(year(date))) %>%
  gather(key = 'state', value = 'claims', -date, -doy, -Year) %>%
  arrange(date)

df %>%
  filter(state %in% c('CA', 'MA')) %>%
  # Make sample plot
  ggplot(aes(x = doy, y = claims, group = Year, colour = Year)) +
  geom_path() + geom_point() +
  facet_wrap(~ state, ncol = 1) +
  geom_smooth(span = 1, level=.99) +  #THANKS TO @JIMBOU
  theme_bw()

这是输出的样子：

最终目的是看看来自特定州的新的每周数据是否超出该州的99％置信区间，这取决于我决定回顾过去多久。

剩下的问题是如何让geom_smooth（）使用所有年份的数据来计算置信区间而不是一年。简而言之，我希望在过去5年中逐年打印，但显示1997年至2017年整个数据范围的置信区间。

非常感谢你的帮助。

Answer 1

简单的技巧是将因子仅放在点/路径函数中。您的数据保存在d中，我使用melt()以长格式转换数据。

library(reshape2); 
dl <- melt(d[,1:3])
# the date object
dl$date2 <- as.Date(dl$date, format = '%m/%e/%Y')

# add a second year 
dl2 <- dl 
dl2$date2 <- dl2$date2 + 365
dl2$value <- dl2$value + 10000
dl <- rbind(dl, dl2)

# Your x value Doy and the grouping factor
dl$Doy <- as.numeric(as.character(format(dl$date2, format="%j")))
dl$Year <- as.factor(format(dl$date2, format="%Y"))

# the plot
ggplot(dl, aes(Doy,value)) + 
  geom_point(aes(color=Year)) + geom_path(aes(color=Year)) +
  facet_wrap(~ variable, ncol = 1) +
  geom_smooth()

aR-＆gt; ggplot2-＆gt;以置信区间绘制每周时间序列数据 - >多年/状态 - ＆gt;

1 个答案: