ggplot + boxplot按时间序列值按组

时间:2015-08-13 15:29:27

标签: r ggplot2

this question的变体 - 我无法在数据结构中获得正确的尺寸,以制作具有正确值的箱线图。

我要做的事情:x轴上的小时数,y轴上的区域,每个区域都会有一个显示按小时收入分配的箱线图。

我能得到的最接近的是以下,但它不对。如何创建具有两个因子(一个时间序列)作为轴的箱线图,由值分布填充?

数据:

regions <- structure(list(location = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("east", 
"north", "west"), class = "factor"), hour = structure(list(sec = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L, 
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 0L, 1L, 2L, 3L, 4L, 
5L, 6L, 7L, 8L, 9L, 10L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
9L, 10L), mday = c(13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L), mon = c(7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L
), year = c(115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 
115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 
115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 
115L, 115L), wday = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L), yday = c(224L, 224L, 224L, 224L, 
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 
224L, 224L, 224L, 224L, 224L, 224L, 224L), isdst = c(1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
    zone = c("CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", 
    "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", 
    "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", 
    "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT", "CDT"), 
    gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec", 
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst", 
"zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), hour_income = c(67L, 
98L, 89L, 75L, 75L, 89L, 70L, 97L, 52L, 94L, 80L, 84L, 52L, 82L, 
81L, 93L, 85L, 94L, 64L, 90L, 54L, 60L, 97L, 100L, 57L, 63L, 
90L, 58L, 86L, 68L, 52L, 78L, 61L)), .Names = c("location", "hour", 
"hour_income"), row.names = c(NA, -33L), class = "data.frame")

和箱图

ggplot(regions) + 
    geom_boxplot(aes(x=hour, y=hour_income, group=location))

1 个答案:

答案 0 :(得分:0)

首先,我们将日期时间转换为字符格式,然后创建箱图。

regions$hour <- strftime(regions$hour, format="%H:%M:%S")
ggplot(data = regions, aes(x = hour, y = hour_income)) + geom_boxplot() 

enter image description here

但是因为当你试图创建一个箱形图以显示区域时,你只能观察每个区域和小时,你只能获得线条而不是箱线图,这些不是很有意义:

ggplot(data = regions, aes(x = hour, y = hour_income)) + geom_boxplot(aes(fill= location))

enter image description here