ggplot geom_bar按月分组并显示计数

时间:2018-03-10 10:44:02

标签: r ggplot2

我发现这个令人惊讶的棘手。我想创建一个趋势条形图,显示在日期时间内按月计算的数量。

一些数据:

structure(list(id = c("p036502", "p039565", "p233823", "p185307", 
"p013780", "p063374", "p103285", "p181781", "p114989", "p191410", 
"p030093", "p226941", "p225747", "p173555", "p055350", "p060293", 
"p199435", "p074849", "p232007", "p037127", "p230221", "p116615", 
"p106275", "p070918", "p144291", "p116102", "p070029", "p107356", 
"p031939", "p044085", "p081434", "p010969", "p156550", "p048313", 
"p139731", "p048657", "p234430", "p000139", "p240738", "p047586", 
"p006436", "p211511", "p220000", "p183686", "p045806", "p246581", 
"p228935", "p176012", "p229479", "p040831", "p091593", "p151958", 
"p123997", "p017606", "p009578", "p245173", "p130035", "p207223", 
"p069404", "p186381", "p056807", "p131333", "p164867", "p250930", 
"p051288", "p183614", "p141508", "p240150", "p253358", "p055462", 
"p100203", "p216335", "p215118", "p123326", "p216043", "p190639", 
"p035608", "p194533", "p005330", "p217173", "p243195", "p204162", 
"p088899", "p218286", "p024688", "p148544", "p061723", "p062414", 
"p150704", "p064902", "p160114", "p054727", "p233164", "p164497", 
"p164688", "p226536", "p037347", "p052732", "p254241", "p041330"
), project_submitted_datetime = structure(c(1479480359, 1493222248, 
1483311464, 1471016531, 1470474551, 1478340111, 1472603443, 1470230761, 
1473806157, 1474742339, 1489079773, 1473103712, 1481203503, 1479248019, 
1486043216, 1479492186, 1473029329, 1485382089, 1461751084, 1463859966, 
1471643202, 1475223157, 1468792461, 1466087276, 1477175132, 1472419257, 
1484329230, 1476213378, 1477941424, 1477399077, 1481059184, 1483645109, 
1490904308, 1472674295, 1484011440, 1485462104, 1472038851, 1470601996, 
1487232805, 1480968888, 1484044249, 1482944283, 1488401525, 1474729990, 
1470225344, 1475788577, 1482791082, 1464016378, 1488307956, 1476242919, 
1485345902, 1487935618, 1469702976, 1488271812, 1488463835, 1481566737, 
1491245287, 1484054316, 1485724308, 1474716986, 1479078128, 1487459508, 
1463426662, 1487970888, 1470611130, 1485939126, 1462535198, 1477697562, 
1490993867, 1492697645, 1472712448, 1482328987, 1490272369, 1464291065, 
1492533451, 1469229589, 1481555129, 1470575655, 1472198934, 1476538940, 
1472054115, 1487785453, 1481195539, 1485205528, 1465419012, 1488271923, 
1471527805, 1463708265, 1466429074, 1469121081, 1474506890, 1471146637, 
1486636925, 1485953226, 1480664635, 1464224006, 1474481164, 1477663071, 
1464264088, 1484220504), class = c("POSIXct", "POSIXt"), tzone = "UTC")), .Names = c("id", 
"project_submitted_datetime"), row.names = c(NA, 100L), class = "data.frame")

尝试:

library(tidyverse)

ggplot(aes(x = project_submitted_datetime, y = n)) +
    geom_bar(stat = "identity")
  

错误:列y必须是1d原子向量或列表

尝试y = Count,y =计数和类似错误。

我知道我可以使用dplyr操作数据框:

df %>% 
  mutate(month = format(project_submitted_datetime, "%b-%Y")) %>%
  group_by(month) %>%
  summarise(Count = n()) %>%
  ggplot(aes(x = month, y = Count)) +
  geom_bar(stat = "identity") +
  theme(axis.text.x=element_text(angle = -90, hjust = 0))

但是日期不在图表上。

我想知道有什么选择来实现这个目标?求一个趋势条形图,沿水平轴按月分组,每个条形是每个月的观察数量?

2 个答案:

答案 0 :(得分:3)

图书馆zooyearmon选项可将日期转换为month-year格式。您可以根据自己的情况使用它。

library(tidyverse)
library(zoo)

df %>%
  mutate(yrmn = as.factor(as.yearmon(project_submitted_datetime))) %>%
  group_by(yrmn) %>%
  summarise(cnt = n()) %>%
  ggplot(aes(x = (yrmn), y = cnt)) +
  geom_bar(stat = "identity") +
  theme(axis.text.x=element_text(angle = -90, hjust = 0))

Here is a screenshot of the output and I have to add text for it to work

不使用zoo这就是我能够得到它的方式。

df1 <- df %>%
  mutate(mn = month(project_submitted_datetime, label = TRUE),
         yr = year(project_submitted_datetime)) %>%
  group_by(yr,mn) %>%
  summarise(cnt = n()) %>%
  mutate(yr_mn = paste(mn, yr))

df1$yr_mn <- as_factor(df1$yr_mn)

df1 %>%
  ggplot(aes(x = yr_mn, y = cnt)) +
  geom_bar(stat = "identity") +
  theme(axis.text.x=element_text(angle = -90, hjust = 0))

答案 1 :(得分:2)

解决方案,无需更改原始data.frame(仅解析ggplot2 aes中的日期列并应用geom_bar()&#34; count&#34;):

library(ggplot2)
ggplot(df, aes(format(project_submitted_datetime, "%Y-%m"))) +
    geom_bar(stat = "count") +
    labs(x = "Month")

enter image description here

作为替代方案,您可以使用正则表达式:sub("(\\d{4}-\\d{2}).*", "\\1", project_submitted_datetime)