使用此代码
ggplot(dfrunning,aes(x=distance/1000))+
geom_histogram(aes(fill=catpace),binwidth=1)+
stat_bin(binwidth=1, geom="text", colour="white", size=3.5,
aes(label=..count.., group=catpace), position=position_stack(vjust=0.5)) +
scale_x_continuous(breaks=seq(0,max(dfrunning$distance), 1))+
labs(title = "Running distribution", x = "Distance in km", y = "Count", fill = "Pace in sec/km")
我宁愿看不到每个垃圾箱的数量,而是将每个垃圾箱缩放到100%。然后,填充和文本也应相应调整比例。 数据框如下所示:
> dfrunning
# A tibble: 2,201 x 11
date time type distance duration paceInMin paceInSec latitude longitude catpace catdistance
<date> <time> <chr> <dbl> <dbl> <time> <int> <dbl> <dbl> <fct> <fct>
1 2012-04-16 10:24 running 13680 4192 05:06 306 50.8 6.10 (300,330] (1.3e+04,1.4e+04]
2 2012-04-18 10:47 running 7239 2115 04:52 292 50.8 6.10 (270,300] (7e+03,8e+03]
3 2012-04-22 14:09 running 28536 10571 06:10 370 50.8 6.10 (360,1e+04] (2.8e+04,2.9e+04]
4 2012-05-05 13:11 running 16168 7308 07:31 451 50.7 6.08 (360,1e+04] (1.6e+04,1.7e+04]
5 2012-05-06 13:39 running 25033 9180 06:06 366 50.8 6.10 (360,1e+04] (2.5e+04,2.6e+04]
IMO,此问题与“ Create stacked barplot where each stack is scaled to sum to 100%”不同,因为我没有明确显示每个组的数量。
编辑:正如jaySf所指出的,这是dput(head(dfrunning,50))
structure(list(date = structure(c(15446, 15448, 15452, 15465,
15466, 15483, 15506, 15506, 15561, 15566, 15566, 15591, 15598,
15599, 15602, 15605, 15606, 15608, 15611, 15612, 15613, 15614,
15615, 15616, 15617, 15618, 15618, 15619, 15747, 15621, 15621,
15622, 15623, 15627, 15752, 15769, 15770, 15772, 15774, 15775,
15776, 15778, 15780, 15781, 15782, 15782, 15783, 15783, 15785,
15785), class = "Date"), time = structure(c(37475, 38822, 50949,
47508, 49193, 55739, 56611, 59442, 56185, 69657, 72709, 36513,
64961, 51622, 49059, 59999, 50660, 72043, 65558, 41359, 38752,
68144, 70312, 68611, 64509, 61189, 68135, 34764, 63827, 59209,
69285, 69202, 69029, 41600, 31455, 61002, 61247, 58883, 47413,
63764, 64603, 60993, 34642, 37138, 62160, 65013, 61298, 63556,
61877, 65543), class = c("hms", "difftime"), units = "secs"),
type = c("running", "running", "running", "running", "running",
"running", "running", "running", "running", "running", "running",
"running", "running", "running", "running", "running", "running",
"running", "running", "running", "running", "running", "running",
"running", "running", "running", "running", "running", "running",
"running", "running", "running", "running", "running", "running",
"running", "running", "running", "running", "running", "running",
"running", "running", "running", "running", "running", "running",
"running", "running", "running"), distance = c(13680, 7238.54607310699,
28535.7961841139, 16168.3259995435, 25033, 10714.1336730768,
2363.75157921817, 8432.38615603382, 13261.9315631379, 14914.0942764589,
4064.49780742219, 8958.01394358889, 8416.04241820714, 27086.4898199381,
8454.4109033314, 13078.5350969731, 9976.05213811295, 7522.5914405498,
20333.7962161682, 13657.8724960625, 19632.1287324509, 19161.0107979676,
10175, 18363.9547260094, 45.9799995422363, 17249.8509124987,
19079.0453104679, 9720.46644444582, 13963.9852885433, 19564.2037933423,
9233.03482250782, 14047.3764062267, 17000.9929225885, 19796.3029324504,
7986.10636548276, 16579.832919954, 9793.81660451401, 11942.1530615798,
19049.4113915166, 8852.04351847768, 7891.32210952351, 5995.21855763869,
1301.91869595747, 13996.0654474524, 2100.09008789062, 9869.70660238926,
2028.71832491649, 7433.78777880617, 15151.3936450139, 11741.0316277532
), duration = c(4192, 2115, 10571, 7308, 9180, 3292, 760,
2640, 4436, 6646, 1371, 3405, 2438, 8477, 2588, 3968, 3271,
2826, 5652, 4330, 6410, 6255, 3682, 6193, 13, 83077, 6689,
3954, 4963, 106805, 2846, 6340, 5348, 6123, 3605, 88125,
3166, 3518, 14506, 3118, 2780, 86832, 633, 4388, 662, 2796,
581, 2191, 86195, 3629), paceInMin = structure(c(18360, 17520,
22200, 27060, 21960, 18420, 19260, 18780, 20040, 26700, 20220,
22800, 17340, 18720, 18360, 18180, 19620, 22500, 16620, 19020,
19560, 19560, 21660, 20220, 16920, 72960, 21000, 24360, 21300,
NA, 18480, 27060, 18840, 18540, 27060, NA, 19380, 17640,
45660, 21120, 21120, 4980, 29160, 18780, 18900, 16980, 17160,
17640, NA, 18540), class = c("hms", "difftime"), units = "secs"),
paceInSec = c(306L, 292L, 370L, 451L, 366L, 307L, 321L, 313L,
334L, 445L, 337L, 380L, 289L, 312L, 306L, 303L, 327L, 375L,
277L, 317L, 326L, 326L, 361L, 337L, 282L, 1216L, 350L, 406L,
355L, 1859L, 308L, 451L, 314L, 309L, 451L, 1715L, 323L, 294L,
761L, 352L, 352L, 83L, 486L, 313L, 315L, 283L, 286L, 294L,
2088L, 309L), latitude = c(50.78088236, 50.78210075, 50.77468025,
50.74850298, 50.77482007, 50.78003285, 50.78238624, 50.78864819,
51.33017446, 50.77988517, 50.74921084, 51.32995008, 51.32999836,
51.33013314, 51.32992619, 48.71562467, 48.71430603, 48.7143813,
48.714214, 48.71429463, 48.717048, 48.71553859, 48.7142808,
48.71094162, 48.71536257, 48.71526475, 48.71454718, 48.71594159,
48.7144186, 48.71531186, 48.71480333, 48.70992154, 48.70286641,
48.71461113, 48.71444383, 48.71446428, 48.7146807, 48.71469336,
48.72847723, 48.71530138, 48.70127678, 48.70118726, 48.7013119,
48.73173444, 48.71487223, 48.72272649, 48.71518764, 48.72266807,
48.71527171, 48.71515763), longitude = c(6.09665447, 6.09782727,
6.09629815, 6.08372496, 6.09631483, 6.10046044, 6.09614795,
6.07149736, 7.86466297, 6.10240906, 6.08444153, 7.86465433,
7.86348933, 7.86398814, 7.86355178, 11.48824135, 11.48822618,
11.48825015, 11.48808285, 11.48821939, 11.4908933, 11.49234362,
11.48826499, 11.49619108, 11.49016634, 11.49094141, 11.48822375,
11.49247371, 11.48828653, 11.48861426, 11.48820028, 11.49726229,
11.5088289, 11.48817371, 11.48823674, 11.48811696, 11.48788319,
11.48822375, 11.56643034, 11.48987599, 11.50984076, 11.50986775,
11.50983674, 11.56601745, 11.48822492, 11.51100417, 11.48894744,
11.51097458, 11.48799551, 11.48825208), catpace = c("(300,330]",
"(270,300]", "(360,1e+04]", "(360,1e+04]", "(360,1e+04]",
"(300,330]", "(300,330]", "(300,330]", "(330,360]", "(360,1e+04]",
"(330,360]", "(360,1e+04]", "(270,300]", "(300,330]", "(300,330]",
"(300,330]", "(300,330]", "(360,1e+04]", "(270,300]", "(300,330]",
"(300,330]", "(300,330]", "(360,1e+04]", "(330,360]", "(270,300]",
"(360,1e+04]", "(330,360]", "(360,1e+04]", "(330,360]", "(360,1e+04]",
"(300,330]", "(360,1e+04]", "(300,330]", "(300,330]", "(360,1e+04]",
"(360,1e+04]", "(300,330]", "(270,300]", "(360,1e+04]", "(330,360]",
"(330,360]", "(0,180]", "(360,1e+04]", "(300,330]", "(300,330]",
"(270,300]", "(270,300]", "(270,300]", "(360,1e+04]", "(300,330]"
), catdistance = c("(1.3e+04,1.4e+04]", "(7e+03,8e+03]",
"(2.8e+04,2.9e+04]", "(1.6e+04,1.7e+04]", "(2.5e+04,2.6e+04]",
"(1e+04,1.1e+04]", "(2e+03,3e+03]", "(8e+03,9e+03]", "(1.3e+04,1.4e+04]",
"(1.4e+04,1.5e+04]", "(4e+03,5e+03]", "(8e+03,9e+03]", "(8e+03,9e+03]",
"(2.7e+04,2.8e+04]", "(8e+03,9e+03]", "(1.3e+04,1.4e+04]",
"(9e+03,1e+04]", "(7e+03,8e+03]", "(2e+04,2.1e+04]", "(1.3e+04,1.4e+04]",
"(1.9e+04,2e+04]", "(1.9e+04,2e+04]", "(1e+04,1.1e+04]",
"(1.8e+04,1.9e+04]", "(0,1e+03]", "(1.7e+04,1.8e+04]", "(1.9e+04,2e+04]",
"(9e+03,1e+04]", "(1.3e+04,1.4e+04]", "(1.9e+04,2e+04]",
"(9e+03,1e+04]", "(1.4e+04,1.5e+04]", "(1.7e+04,1.8e+04]",
"(1.9e+04,2e+04]", "(7e+03,8e+03]", "(1.6e+04,1.7e+04]",
"(9e+03,1e+04]", "(1.1e+04,1.2e+04]", "(1.9e+04,2e+04]",
"(8e+03,9e+03]", "(7e+03,8e+03]", "(5e+03,6e+03]", "(1e+03,2e+03]",
"(1.3e+04,1.4e+04]", "(2e+03,3e+03]", "(9e+03,1e+04]", "(2e+03,3e+03]",
"(7e+03,8e+03]", "(1.5e+04,1.6e+04]", "(1.1e+04,1.2e+04]"
)), row.names = c(NA, -50L), class = c("tbl_df", "tbl", "data.frame"
))
答案 0 :(得分:1)
这可以通过使用position = "fill"
参数来完成。要在文本中获得正确的间距,您还需要在position
层中更改stat_bin
参数。这应该起作用:
library(scales) # required for the nice percentage y-axis
ggplot(dfrunning,aes(x=distance/1000))+
geom_histogram(aes(fill=catpace),binwidth=1, position = "fill")+
stat_bin(binwidth=1, geom="text", colour="white", size=3.5,
aes(label=..count.., group=catpace), position=position_fill(vjust=0.5)) +
scale_x_continuous(breaks=seq(0,max(dfrunning$distance), 1))+
scale_y_continuous(labels = percent(c(0, 0.25, 0.5, 0.75, 1))) +
labs(title = "Running distribution", x = "Distance in km", y = "Percentage", fill = "Pace in sec/km")
哪个创建了这个图?
编辑:以百分比标签代替计数
我不知道如何使用上述方法将百分比计入图表。这有可能是不可能的,但我只是不知道也有可能。 无论如何,如果您希望在图表中显示百分比标签,建议您对数据进行预处理,例如像这样:
dfrunning %>%
select(distance, catpace) %>%
mutate(dist = round(distance/1000)) %>%
group_by(dist, catpace) %>%
mutate(test = n()) %>%
distinct(dist, catpace, test) %>%
group_by(dist) %>%
mutate(pct = test/sum(test)*100) %>%
ggplot(aes(x= dist, y = pct)) +
geom_bar(aes(fill=catpace), stat = "identity") +
geom_text(aes(label = paste0(round(pct, 0),"%")),
colour="white", size=3.5, angle = 90,
position = position_stack(vjust = 0.5)) +
labs(title = "Running distribution",
x = "Distance in km", y = "Percentage",
fill = "Pace in sec/km")
以下内容为您提供了以下图形(我使用angle
参数旋转了标签以使其适合):