我通过TweetteR从Twitter数据获得了这个数据框(tweets_platform)
id source created
7,71627E+17 iPhone 02/09/2016 08:34
7,71627E+17 iPhone 02/09/2016 08:34
7,71627E+17 Android 02/09/2016 08:34
7,71627E+17 Android 02/09/2016 08:34
7,71627E+17 iPhone 02/09/2016 08:34
7,71627E+17 iPhone 02/09/2016 08:34
我想让这一行Chartin命令突出显示推文发生的那一天
library(lubridate)
library(scales)
tweets_platform %>%
count(source, hour = hour(with_tz(created, "EST"))) %>%
mutate(percent = n / sum(n)) %>%
ggplot(aes(hour, percent, color = source)) +
geom_line() +
scale_y_continuous(labels = percent_format()) +
labs(x = "Hour of day (EST)",
y = "% of tweets",
color = "")
但是,当我运行代码时,控制台会返回此错误:
geom_path:每组只包含一个观察。你需要吗? 调整群体审美?
并且它仅绘制没有线条的图形。我该如何解决这个问题?
答案 0 :(得分:0)
我为您创建了一个示例数据。我认为以下内容就是你所追求的。我没有您的实际数据。所以我可能会为你做一些不必要的步骤。如果任何时间都没有推文,我想照顾它。这就是添加left_join和第二个mutate的原因。如果合适,您可以忽略它们。希望这会对你有所帮助。
mydf %>%
count(source, hour = as.numeric(format(created, "%H"))) %>%
group_by(source) %>%
mutate(percent = n / sum(n) * 100) %>%
left_join(data.frame(source = rep(c("iPhone", "Android"), each = 24),
hour = rep(0:23, times = 2),
stringsAsFactors = TRUE), .) %>%
mutate(percent = recode(percent, .missing = 0)) -> temp
ggplot(data = temp, aes(x = hour, y = percent, group = source, color = source)) +
geom_line() +
scale_x_continuous(limits = c(0, 23), breaks = 0:23) +
scale_y_continuous(limits = c(0, 100))
DATA
mydf <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), source = c("iPhone",
"Android", "iPhone", "iPhone", "Android", "iPhone", "Android",
"Android", "iPhone", "Android", "iPhone", "Android", "iPhone",
"iPhone", "Android", "iPhone", "Android", "Android", "iPhone",
"Android"), created = structure(c(1472772840, 1472780040, 1472772840,
1472780040, 1472794440, 1472787240, 1472769240, 1472774160, 1472780040,
1472805240, 1472808840, 1472812440, 1472808840, 1472816040, 1472819640,
1472819640, 1472812440, 1472813760, 1472812440, 1472813820), class = c("POSIXct",
"POSIXt"), tzone = "")), .Names = c("id", "source", "created"
), row.names = c(NA, -20L), class = "data.frame")