如何制作类似于此处附录的情节
其中粗线表示平均值,阴影表示数据的可变性。
示例数据df
和代码。
>dput(df)
structure(list(yr = c(1989L, 1990L, 1991L, 1992L, 1993L, 1994L,
1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L,
1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L, 1990L, 1991L,
1992L, 1993L, 1994L, 1995L, 1989L, 1990L, 1991L, 1992L, 1993L,
1994L, 1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L,
1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L, 1990L,
1991L, 1992L, 1993L, 1994L, 1995L, 1989L, 1990L, 1991L, 1992L,
1993L, 1994L, 1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L,
1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L,
1990L, 1991L, 1992L, 1993L, 1994L, 1995L), no = c(11L, 12L, 13L,
17L, 14L, 17L, 15L, 12L, 12L, 18L, 7L, 10L, 10L, 6L, 7L, 11L,
8L, 7L, 11L, 6L, 8L, 9L, 12L, 15L, 14L, 10L, 18L, 13L, 15L, 14L,
14L, 11L, 15L, 7L, 11L, 6L, 5L, 10L, 9L, 8L, 5L, 6L, 12L, 8L,
17L, 18L, 14L, 15L, 16L, 18L, 18L, 15L, 13L, 12L, 9L, 12L, 5L,
5L, 7L, 5L, 9L, 7L, 5L, 5L, 6L, 10L, 12L, 5L, 13L, 8L, 13L, 12L,
11L, 4L, 12L, 6L, 6L, 10L, 6L, 11L, 8L, 6L, 3L, 6L), lval = c("l4651",
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156",
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661",
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661", "l4651",
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156",
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661",
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661", "l4651",
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156",
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661",
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661", "l4651",
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156",
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661",
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661"), CCR = c("CR1",
"CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1",
"CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1",
"CR1", "CR1", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2",
"CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2",
"CR2", "CR2", "CR2", "CR2", "CR2", "CR3", "CR3", "CR3", "CR3",
"CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3",
"CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR4",
"CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4",
"CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4",
"CR4", "CR4")), .Names = c("yr", "no", "lval", "CCR"), row.names = c(NA,
-84L), vars = "Year", drop = TRUE, indices = list(c(0L, 7L, 14L,
21L, 28L, 35L, 42L, 49L, 56L, 63L, 70L, 77L), c(1L, 8L, 15L,
22L, 29L, 36L, 43L, 50L, 57L, 64L, 71L, 78L), c(2L, 9L, 16L,
23L, 30L, 37L, 44L, 51L, 58L, 65L, 72L, 79L), c(3L, 10L, 17L,
24L, 31L, 38L, 45L, 52L, 59L, 66L, 73L, 80L), c(4L, 11L, 18L,
25L, 32L, 39L, 46L, 53L, 60L, 67L, 74L, 81L), c(5L, 12L, 19L,
26L, 33L, 40L, 47L, 54L, 61L, 68L, 75L, 82L), c(6L, 13L, 20L,
27L, 34L, 41L, 48L, 55L, 62L, 69L, 76L, 83L)), group_sizes = c(12L,
12L, 12L, 12L, 12L, 12L, 12L), biggest_group_size = 12L, labels = structure(list(
Year = 1989:1995), row.names = c(NA, -7L), class = "data.frame", vars = "Year", drop = TRUE, .Names = "Year"), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
library(dplyr) # to calculate mean of each CCR
df_mn<-df %>%
group_by(yr,lval) %>%
summarise(meanno=mean(no))
df_mn
df
plot <- ggplot() +
geom_point(data=df, aes(x=yr,y=no,group=lval,color=lval),size = 1, lty = "solid") +
geom_line(data=df_mn, aes(yr,meanno,color=lval,group=lval),size = 1, lty = "solid")
plot
答案 0 :(得分:2)
我假设您在追求可变性的最小值和最大值。我可能错了。
library(tidyverse)
df %>%
group_by(yr,lval) %>%
mutate(value = mean(no),
min = min(no),
max = max(no)) %>%
ggplot(., aes(x=yr, y=value, group = lval, fill =lval)) +
geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.2) +
geom_line(aes(group = lval, color = lval))
我认为只要在上面的代码中添加一行,就可以更好地将它们放在下面的不同方面
+facet_grid(lval~.)
答案 1 :(得分:2)
ggplot() +
geom_point(data = df, aes(x = yr,y = no,group = lval,color = lval),size = 1) +
geom_smooth(data = df_mn, aes(yr, meanno, color = lval, group = lval, fill = lval), se = 0.05)
它看起来并不像你发布的样本那么漂亮,但那是因为数据中的回归线重叠很多,你的样本量在你想要的图中非常小,这使得变化非常大。
se = 0.05参数是置信区间。默认值为se = 0.95(行业标准),但设置得越低,阴影就越贴合(尽管更紧密的拟合会使您的数据实际上位于阴影中的准确性/置信度低得多)。
另外,如果你真的希望它看起来像你的样本图,你可以消除+ geom_point()行。
答案 2 :(得分:1)
使用geom_line
代表您的平均值,geom_ribbon
代表您的CI。
E.g。来自the excellent refrence material
huron <- data.frame(year = 1875:1972, level = as.vector(LakeHuron))
ggplot(huron, aes(year)) +
geom_ribbon(aes(ymin = level - 1, ymax = level + 1), fill = "grey70") +
geom_line(aes(y = level))
如有必要,您可以将不同的数据集传递给每个geom
。