ggplot线图,带有某些分位数的条形图

时间:2017-11-19 10:43:25

标签: r ggplot2 quantile

我有这个ggplot线图(实际上有两个y轴): enter image description here

对于指数曲线(红线),我想为指数超过0.9百分位数的每个时间段添加灰色水平条(因此当指数为高时)。所以只需猜测一下,这可能意味着三个障碍:可能介于2002-2003,2008-2010和2011-2012之间。

我按照这里产生的方式设想这些灰色条:Adding Shaded region to histogram between quintiles(所以从y = 0到y = max一直向上)。

我也试图从那里的方式中学习,但我无法将其转移到具体的问题上。 ggplot甚至可以实现这一点吗?

以下是我目前创建的情节:

library(ggplot2)
p2 <- ggplot(data, aes(x = Date))
p2 <- p2 + geom_line(aes(y = Count, colour = "Count"),size=1)
p2 <- p2 + geom_line(aes(y = Index, colour = "Index"), size=1)
p2 <- p2 + scale_y_continuous(sec.axis = sec_axis(~./1, name = "Index"))
p2 <- p2 + scale_colour_manual(values = c("#56B4E9", "#CC6666"))
p2 <- p2 + labs(y = "Count", x = "", colour = "") +
  theme(axis.title.x = element_text(face="bold", size=12)) +
  theme(axis.title.y = element_text(face="bold", size=12)) +
  theme(plot.title = element_text(face="bold", size=20, hjust=0.5))
p2

以下是复制的代码:

   data<-structure(list(Date = structure(c(959817600, 962409600, 965088000, 
967766400, 970358400, 973036800, 975628800, 978307200, 980985600, 
983404800, 986083200, 988675200, 991353600, 993945600, 996624000, 
999302400, 1001894400, 1004572800, 1007164800, 1009843200, 1012521600, 
1014940800, 1017619200, 1020211200, 1022889600, 1025481600, 1028160000, 
1030838400, 1033430400, 1036108800, 1038700800, 1041379200, 1044057600, 
1046476800, 1049155200, 1051747200, 1054425600, 1057017600, 1059696000, 
1062374400, 1064966400, 1067644800, 1070236800, 1072915200, 1075593600, 
1078099200, 1080777600, 1083369600, 1086048000, 1088640000, 1091318400, 
1093996800, 1096588800, 1099267200, 1101859200, 1104537600, 1107216000, 
1109635200, 1112313600, 1114905600, 1117584000, 1120176000, 1122854400, 
1125532800, 1128124800, 1130803200, 1133395200, 1136073600, 1138752000, 
1141171200, 1143849600, 1146441600, 1149120000, 1151712000, 1154390400, 
1157068800, 1159660800, 1162339200, 1164931200, 1167609600, 1170288000, 
1172707200, 1175385600, 1177977600, 1180656000, 1183248000, 1185926400, 
1188604800, 1191196800, 1193875200, 1196467200, 1199145600, 1201824000, 
1204329600, 1207008000, 1209600000, 1212278400, 1214870400, 1217548800, 
1220227200, 1222819200, 1225497600, 1228089600, 1230768000, 1233446400, 
1235865600, 1238544000, 1241136000, 1243814400, 1246406400, 1249084800, 
1251763200, 1254355200, 1257033600, 1259625600, 1262304000, 1264982400, 
1267401600, 1270080000, 1272672000, 1275350400, 1277942400, 1280620800, 
1283299200, 1285891200, 1288569600, 1291161600, 1293840000, 1296518400, 
1298937600, 1301616000, 1304208000, 1306886400, 1309478400, 1312156800, 
1314835200, 1317427200, 1320105600, 1322697600, 1325376000, 1328054400, 
1330560000, 1333238400, 1335830400, 1338508800, 1341100800, 1343779200, 
1346457600, 1349049600, 1351728000, 1354320000, 1356998400, 1359676800, 
1362096000, 1364774400, 1367366400, 1370044800, 1372636800, 1375315200, 
1377993600, 1380585600, 1383264000, 1385856000, 1388534400, 1391212800, 
1393632000, 1396310400, 1398902400, 1401580800, 1404172800, 1406851200, 
1409529600, 1412121600, 1414800000, 1417392000, 1420070400, 1422748800, 
1425168000, 1427846400, 1430438400, 1433116800, 1435708800, 1438387200, 
1441065600, 1443657600, 1446336000, 1448928000, 1451606400, 1454284800, 
1456790400, 1459468800, 1462060800, 1464739200, 1467331200, 1470009600, 
1472688000, 1475280000, 1477958400, 1480550400, 1483228800, 1485907200, 
1488326400, 1491004800, 1493596800, 1496275200), class = c("POSIXct", 
"POSIXt"), tzone = "UTC"), Count = c(35, 42, 67, 25, 23, 22, 
8, 3, 9, 8, 3, 10, 15, 8, 5, 0, 8, 13, 12, 1, 9, 7, 8, 15, 9, 
6, 1, 0, 10, 10, 6, 0, 4, 1, 0, 2, 3, 7, 7, 6, 12, 16, 22, 5, 
19, 16, 12, 15, 28, 28, 17, 15, 31, 16, 30, 9, 25, 7, 10, 14, 
27, 23, 35, 17, 17, 20, 22, 16, 22, 17, 21, 17, 20, 10, 9, 15, 
26, 28, 36, 10, 18, 16, 17, 34, 24, 20, 16, 7, 33, 38, 23, 12, 
7, 5, 5, 7, 3, 4, 3, 0, 0, 1, 0, 0, 1, 0, 3, 3, 6, 2, 5, 8, 14, 
12, 7, 6, 8, 13, 14, 14, 11, 11, 12, 7, 22, 21, 21, 7, 19, 8, 
21, 24, 13, 16, 4, 0, 3, 17, 11, 3, 19, 22, 18, 12, 4, 12, 7, 
8, 24, 9, 8, 11, 9, 14, 14, 28, 19, 19, 20, 22, 32, 29, 13, 18, 
23, 29, 29, 21, 31, 36, 14, 14, 32, 26, 15, 10, 15, 5, 14, 19, 
26, 22, 10, 6, 16, 12, 2, 0, 5, 4, 7, 15, 7, 13, 8, 17, 21, 2, 
7, 9, 8, 13, 20, 20, 21), Index = c(21.54, 19.9216666666667, 
18.0886956521739, 19.6392857142857, 25.2, 26.4043181818182, 26.6071428571429, 
25.0708695652174, 23.41125, 28.4968181818182, 28.0430952380952, 
22.875652173913, 20.9404761904762, 22.1884090909091, 21.8617391304348, 
34.92825, 32.7213043478261, 26.4986363636364, 23.6597619047619, 
22.3317391304348, 22.902, 18.9411904761905, 19.9004545454545, 
20.0667391304348, 25.271, 33.7980434782609, 33.7427272727273, 
37.5835714285714, 35.2430434782609, 28.1369047619048, 28.1522727272727, 
27.3721739130435, 32.20075, 30.6342857142857, 23.8888636363636, 
20.2095454545455, 20.3623809523809, 19.1858695652174, 19.2742857142857, 
19.5002272727273, 18.0234782608696, 17.34475, 16.8363043478261, 
16.1540909090909, 15.973, 17.6873913043478, 15.7018181818182, 
17.6190476190476, 15.3661363636364, 15.5097727272727, 16.6840909090909, 
14.0768181818182, 14.9738095238095, 13.5395454545455, 12.4258695652174, 
13.3909523809524, 11.7315, 13.1463043478261, 14.4590476190476, 
13.9109090909091, 11.8686363636364, 11.072380952381, 12.9513043478261, 
12.6563636363636, 14.937619047619, 12.0918181818182, 11.2445454545455, 
11.9952272727273, 12.458, 11.6939130434783, 11.879, 14.5417391304348, 
16.9186363636364, 15.2438095238095, 13.3517391304348, 12.1897619047619, 
11.3063636363636, 10.8002272727273, 10.9809523809524, 11.0823913043478, 
11.104, 15.1627272727273, 12.945, 13.3036956521739, 14.947619047619, 
17.1790909090909, 25.0260869565217, 22.243, 19.115652173913, 
25.6115909090909, 21.5071428571429, 25.8291304347826, 25.4488095238095, 
27.0592857142857, 21.5627272727273, 18.3620454545455, 22.1104761904762, 
24.3630434782609, 20.6957142857143, 29.8331818181818, 61.1773913043478, 
62.2625, 52.0306521739131, 44.7561363636364, 45.58175, 44.7954545454545, 
38.0231818181818, 31.9611904761905, 29.1404545454545, 26.2632608695652, 
25.3371428571429, 24.95, 24.2522727272727, 23.7276190476191, 
21.1726086956522, 20.5154761904762, 22.5375, 17.7673913043478, 
17.4161363636364, 32.0188095238095, 29.9163636363636, 25.7615909090909, 
24.7459090909091, 22.5193181818182, 20.3733333333333, 20.1318181818182, 
17.5478260869565, 17.2369047619048, 17.48925, 20.7234782608696, 
16.1957142857143, 16.8570454545455, 19.1531818181818, 19.0721428571429, 
35.0291304347826, 36.4813636363636, 32.8290476190476, 32.0456818181818, 
24.8781818181818, 20.4227272727273, 18.3997619047619, 16.1672727272727, 
17.8197619047619, 21.0189130434783, 21.1309523809524, 17.5431818181818, 
15.6895652173913, 15.40675, 16.4439130434783, 16.6347727272727, 
17.3714285714286, 13.5828260869565, 13.98825, 13.0366666666667, 
13.9672727272727, 13.5258695652174, 17.2715, 14.0428260869565, 
14.21, 14.7928571428571, 15.4078260869565, 12.9438095238095, 
14.1111363636364, 14.1604347826087, 15.3825, 14.8366666666667, 
14.1575, 12.4279545454545, 11.5414285714286, 12.2323913043478, 
13.4919047619048, 13.4052272727273, 18.0586956521739, 13.3775, 
16.2115217391304, 19.1502272727273, 15.87125, 14.8127272727273, 
13.5497727272727, 13.3269047619048, 14.3395454545455, 14.4634782608696, 
19.4280952380952, 24.4668181818182, 16.7895454545455, 16.1597619047619, 
17.9515217391304, 23.6490476190476, 22.6235714285714, 15.8117391304348, 
14.3004761904762, 14.7984090909091, 17.7745454545455, 13.2545238095238, 
12.3995652173913, 14.1186363636364, 14.5852380952381, 15.1065909090909, 
12.4379545454545, 11.6893181818182, 11.5305, 11.8978260869565, 
13.245, 10.8289130434783, 10.5136363636364)), .Names = c("Date", 
"Count", "Index"), row.names = c(NA, -205L), class = "data.frame")

1 个答案:

答案 0 :(得分:2)

这是一种可能的方法,首先生成应该存在水平条的日期:

df3 <- df
df3$quant <- ifelse(df$Index > quantile(df$Index, 0.9), 1, 0)

然后根据相同值的运行对数据进行分组。

df3$group <- rep(1:length(rle(df3$quant)$lengths), times = rle(df3$quant)$lengths)

或使用tidyverse:

library(tidyverse)
df %>%
  mutate(quant = ifelse(Index > quantile(Index, 0.9), 1, 0),
         group = rep(1:length(rle(quant)$lengths), times = rle(quant)$lengths)) -> df2

仅在geom_ribbon中对数据进行子集,因此仅绘制带有quant == 1的日期,并使用rle次调用获得的分组来限制色带的范围。

ggplot(df2, aes(x = Date))+
  geom_ribbon(data = subset(df2, quant == 1),
              aes(x = Date, ymin = 0, ymax = Inf, group = group), alpha = 0.2) +
  geom_line(aes(y = Count, colour = "Count"), size=1)+
  geom_line(aes(y = Index, colour = "Index"), size=1) +
  scale_y_continuous(sec.axis = sec_axis(~./1, name = "Index")) +
  scale_colour_manual(values = c("#56B4E9", "#CC6666")) +
  labs(y = "Count", x = "", colour = "") +
  theme(axis.title.x = element_text(face = "bold", size = 12)) +
  theme(axis.title.y = element_text(face = "bold", size = 12)) +
  theme(plot.title = element_text(face = "bold", size = 20, hjust = 0.5))

enter image description here