R ggplot2 geom_area()无效

时间:2015-08-27 16:08:44

标签: r debugging ggplot2

我有一个如下所示的数据集:

头():

                          ViolDesc RESULTDTTM count cumsum
1  Adequate Handwashing/Where/When/How 2007-03-05     1      1
2  Adequate Handwashing/Where/When/How 2007-07-09     1      2
3  Adequate Handwashing/Where/When/How 2007-07-16     1      3
4  Adequate Handwashing/Where/When/How 2007-07-17     3      6
5  Adequate Handwashing/Where/When/How 2007-07-23     3      9
6  Adequate Handwashing/Where/When/How 2007-07-27     1     10
7  Adequate Handwashing/Where/When/How 2007-07-30     1     11
8  Adequate Handwashing/Where/When/How 2007-07-31     1     12
9  Adequate Handwashing/Where/When/How 2007-08-07     1     13
10 Adequate Handwashing/Where/When/How 2007-08-13     2     15

dput()50行的头样本:

structure(list(ViolDesc = c("Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How"), RESULTDTTM = structure(c(1173052800, 
1183939200, 1184544000, 1184630400, 1185148800, 1185494400, 1185753600, 
1185840000, 1186444800, 1186963200, 1187049600, 1187136000, 1187654400, 
1187827200, 1187913600, 1188172800, 1188432000, 1188864000, 1188950400, 
1189036800, 1189468800, 1189641600, 1189987200, 1190246400, 1190332800, 
1190764800, 1190851200, 1190937600, 1191283200, 1191369600, 1191542400, 
1192406400, 1192492800, 1192665600, 1193011200, 1193270400, 1193702400, 
1193788800, 1193875200, 1193961600, 1194220800, 1194307200, 1194393600, 
1194480000, 1194566400, 1194912000, 1195084800, 1195171200, 1195430400, 
1195516800), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    count = c(1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
    1L, 3L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 2L, 2L, 
    2L, 1L, 1L, 3L, 3L, 1L, 1L, 2L), cumsum = c(1L, 2L, 3L, 6L, 
    9L, 10L, 11L, 12L, 13L, 15L, 17L, 18L, 19L, 22L, 23L, 24L, 
    27L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 
    40L, 42L, 43L, 44L, 47L, 48L, 49L, 50L, 52L, 53L, 55L, 58L, 
    60L, 62L, 64L, 65L, 66L, 69L, 72L, 73L, 74L, 76L)), .Names = c("ViolDesc", 
"RESULTDTTM", "count", "cumsum"), row.names = c(NA, 50L), class = "data.frame")

dput()50行的尾部样本:

structure(list(ViolDesc = c("Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's."
), RESULTDTTM = structure(c(1342656000, 1344556800, 1345680000, 
1350518400, 1350950400, 1351036800, 1354147200, 1360022400, 1360195200, 
1363305600, 1365638400, 1366070400, 1369872000, 1369958400, 1376265600, 
1376524800, 1383696000, 1385942400, 1388016000, 1389744000, 1396224000, 
1400803200, 1401235200, 1403568000, 1405641600, 1406505600, 1407283200, 
1407801600, 1410739200, 1411603200, 1411689600, 1413849600, 1414368000, 
1415145600, 1416787200, 1417651200, 1418860800, 1420675200, 1424908800, 
1425254400, 1425427200, 1425600000, 1426118400, 1428451200, 1429056000, 
1430438400, 1430870400, 1433376000, 1438214400, 1438819200), class = c("POSIXct", 
"POSIXt"), tzone = "UTC"), count = c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), cumsum = c(92L, 
93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 
105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 
116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L, 126L, 
127L, 128L, 129L, 130L, 131L, 132L, 134L, 135L, 136L, 137L, 138L, 
139L, 140L, 142L, 143L)), .Names = c("ViolDesc", "RESULTDTTM", 
"count", "cumsum"), row.names = 15178:15227, class = "data.frame")

列类:

> sapply(r, class)
$ViolDesc
[1] "character"

$RESULTDTTM
[1] "POSIXct" "POSIXt" 

$count
[1] "integer"

$cumsum

[1] "integer"

15种不同的ViolDesc

如果我ggplot(data=r, aes(x=RESULTDTTM, y=cumsum)) + geom_line(aes(color=ViolDesc)) %>% print

我明白了:

enter image description here

我只需将此折线图成为堆积区域图。但是,当我这样做时:ggplot(data=r, aes(x=RESULTDTTM, y=cumsum, fill=ViolDesc)) + geom_area() %>% print,我得到:

enter image description here

正如你所看到的,它变得尖锐,完全不像geom_line()。有人知道为什么吗?我觉得代码是如此简单,以至于数据框架一定有问题吗?

1 个答案:

答案 0 :(得分:2)

您的数据的x值并不总是匹配。 ggplot假设你为每个变量都有一个0,它在一个时间点有一个缺失的值但是另一个有一个度量值。

我们可以通过使用每个时间点的每个值的累积最大值插值来解决这个问题。

以下是使用dplyrtidyr执行此操作的方式。

首先加载包:

library(tidyr)
library(dplyr)
library(ggplot2)

然后读入数据并修复日期格式:

z <- read.csv("sample.csv")
z$RESULTDTTM <- as.POSIXct(z$RESULTDTTM, format = "%m/%d/%Y")

现在我们想要制作数据&#34; wide&#34; - 每个时间点一行,用每列的累积最大值填充时间点,然后使其长度为#34;再次绘图:

z <- z %>% spread(ViolDesc, cumsum) %>% #this makes the data wide
           group_by(RESULTDTTM) %>%     #this makes a group for each time point
           summarise_each(funs(sum(., na.rm = TRUE)), -RESULTDTTM, -count) %>%
           #this line grouped time points together by summing - some time points had two measures
           mutate_each(funs(cummax(.)), -RESULTDTTM) %>% #this takes the cumulative maximum
           gather(var, value, -RESULTDTTM) #this shapes the data back to long

现在我们可以将其绘制出来:

ggplot(data = z, aes(x = RESULTDTTM, y = value)) +
       geom_area(aes(colour = var, fill = var), position = 'stack')

这给了我这张图: enter image description here