如何使用R绘制折线图以进行时间序列分析

时间:2020-05-13 07:19:58

标签: r ggplot2 visualization linechart

我正在尝试使用日期时间和R中在该日期和时间段内没有任何鸣叫来绘制折线图。

library(ggplot2)
df1 <- structure(list(Date = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Label = c("2020-03-12", 
            "2020-03-13"), class = "factor"), Time = structure(c(1L, 1L, 2L, 
            3L, 4L, 5L), .Label = c("00:00:00Z", "00:00:01Z", "00:10:04Z", 
            "00:25:12Z", "01:00:02Z"), class = "factor"), Text = structure(c(5L, 
            3L, 6L, 4L, 2L, 1L), .Label = c("The images of demonstrations and gathering", "Premium policy get activate by company abc", 
            "Launches of rocket", "Premium policy get activate by company abc", 
            "Technology makes trend", "The images of demonstrations and gatherings", 
            "Weather forecasting by xyz"), class = "factor")), class = "data.frame", row.names = c(NA, 
            -6L))
ggplot(df1, aes(x = Date, y = text(count)) + geom_line(aes(color = variable), size = 1)

我尝试了上面的代码以绘制所需的结果,但出现错误。以csv格式给出的数据集。

Date         Time                     Text
2020-03-12   00:00:00Z                The images of demonstrations and gatherings
2020-03-12   00:00:00Z                Premium policy get activate by company abc
2020-03-12   00:00:01Z                Weather forecasting by xyz 
2020-03-12   00:10:04Z                Technology makes trend
2020-03-12   00:25:12Z                Launches of rocket 
2020-03-12   01:00:02Z                Government launch new policy to different sector improvement

我有将近15天的数据集,想要绘制折线图以可视化推文的数量(在文本栏中给出),以查看不同时间和日期的推文趋势。

2 个答案:

答案 0 :(得分:0)

df1 <- structure(list(Date = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Label = c("3/12/2020", 
            "3/13/2020"), class = "factor"), Time = structure(c(1L, 1L, 2L, 
            3L, 4L, 5L), .Label = c("00:00:00Z", "00:00:01Z", "00:10:04Z", 
            "00:25:12Z", "01:00:02Z"), class = "factor"), Text = structure(c(5L, 
            3L, 6L, 4L, 2L, 1L), .Label = c("Government launch new policy to different sector", 
            "Launches of rocket", "Premium policy get activate by company abc", 
            "Technology makes trend", "The images of demonstrations and gatherings", 
            "Weather forecasting by xyz"), class = "factor"), X = structure(c(1L, 
            1L, 1L, 1L, 1L, 2L), .Label = c("", "improvement"), class = "factor")), class = "data.frame", row.names = c(NA, 
            -6L))                                                      

按上述方法创建数据集df1,然后运行该数据集即可得出所需的小时图

library(tidyverse)
library(lubridate)

df1 %>% 
  mutate(Time=hms(Time),
         Date=mdy(Date),
    hour=hour(Time)) %>% 
  count(hour) %>% 
  ggplot(aes(hour,n,group=1))+geom_line()+geom_point()

答案 1 :(得分:0)

这是你的追求吗?


library(dplyr)
library(lubridate)
library(stringr)
library(ggplot2)

回答您的数据

演示数据争用。


# your data; 
df1 <- structure(list(Date = structure(c(1L, 1L, 2L, 1L, 1L, 1L), 
                                       .Label = c("2020-03-12","2020-03-13"), 
                                       class = "factor"), 
                      Time = structure(c(1L, 1L, 2L,3L, 4L, 5L), 
                                       .Label = c("00:00:00Z", "00:00:01Z", "00:10:04Z","00:25:12Z", "01:00:02Z"),
                                       class = "factor"),
                      Text = structure(c(5L,3L, 6L, 4L, 2L, 1L),
                                       .Label = c("The images of demonstrations and gathering", "Premium policy get activate by company abc",
                                                  "Launches of rocket", "Premium policy get activate by company abc",
                                                  "Technology makes trend", "The images of demonstrations and gatherings", "Weather forecasting by xyz"), class = "factor")),
                 class = "data.frame", row.names = c(NA,-6L))

# data wrangle
df2 <- 
  df1 %>% 
  # change all variables from factors to character
  mutate_all(as.character) %>%
  mutate(Time = str_remove(Time, "Z$"), #remove the trailing 'Z' from Time values 
         dt = ymd_hms(paste(Date, Time, sep = " ")), # change text into datetime format using lubridtate::ymd_hms
         dt = ceiling_date(dt, unit="hour")) %>% # round to the end of the named hour, separated for clarity
  group_by(dt) %>%  
  summarise(nr_tweets = n())

# plot

p1 <- ggplot(df2, aes(dt, nr_tweets))+
        geom_line()+
        scale_x_datetime(date_breaks = "1 day", date_labels = "%d/%m")+
        ggtitle("Data from question `df1`")


具有大型数据集的答案

tib <- tibble(dt = sample(seq(ISOdate(2020,05,01), ISOdate(2020,05,15), by = "sec"), 10000, replace = TRUE),
             text = sample(c(letters[1:26], LETTERS[1:26]), 10000, replace = TRUE))


tib1 <- 
  tib %>% 
  mutate(dt = round_date(dt, unit="hour"))%>% 
  group_by(dt) %>%  
  summarise(nr_tweets = n())


p2 <- ggplot(tib1, aes(dt, nr_tweets))+
        geom_line()+
        scale_x_datetime(date_breaks = "1 day", date_labels = "%d/%m")+
        ggtitle("Result using `tib` data made up to answer the question")
  

p1/p2

reprex package(v0.3.0)于2020-05-13创建