重叠R中原始系列的预测时间序列

时间:2018-04-21 15:15:44

标签: r dataframe ggplot2 time-series forecasting

我执行预测

w=read.csv("C:/Users/admin/Documents/aggrmonth.csv", sep=";",dec=",")
w
#create time series object
w=ts(w$new,frequency = 12,start=c(2015,1)) 
w
#timeplot
plot.ts(w)

#forecast for the next months
library("forecast")
m <- stats::HoltWinters(w)
test=forecast:::forecast.HoltWinters(m,h=4) #h is how much month do you want to predict
test

现在我想提前4个月预测。 从01.2017-04.2017。我知道原始价值。

1-Jan-17    1020
1-Feb-17    800
1-Mar-17    1130
1-Apr-17    600

但我需要获得情节,其中显示CI的预测值与原始值重叠。 当然,如果我没有明确解释,我附上了情节。 needed plot 绿色曲线是该系列的初始值(我的4个月) 绿色虚线是预测值与原始值重叠。 预测的虚线曲线上的破折号是置信区间。

如何制作此类情节

w=

structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L, 
11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L, 
4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15", 
"1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15", 
"1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15", 
"1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15", 
"1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L, 
8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L, 
14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L, 
2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon", 
"new"), class = "data.frame", row.names = c(NA, -24L))

1 个答案:

答案 0 :(得分:3)

我们可以使用ggfortify创建数据框,然后使用ggplot2

绘制两个时间序列
# Load required libraries
library(lubridate)
library(magrittr)
library(tidyverse)
library(scales)
library(forecast)
library(ggfortify)

w <- structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L, 
  11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L, 
  4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15", 
  "1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15", 
  "1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15", 
  "1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15", 
  "1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L, 
  8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L, 
  14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L, 
  2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon", 
  "new"), class = "data.frame", row.names = c(NA, -24L))

# create time series object
w = ts(w$new, frequency = 12, start=c(2015, 1)) 
w

#>        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov
#> 2015  8575  8215 16399 16415 15704 19805 17484 18116 19977 14439  9258
#> 2016  4909  9539  8802 11253 11971  7838  2095  4157  3910  1306  3429
#>        Dec
#> 2015 12259
#> 2016  1390

# forecast for the next months
m <- stats::HoltWinters(w)

# h is how much month do you want to predict
pred = forecast:::forecast.HoltWinters(m, h=4) 
pred

#>          Point Forecast     Lo 80     Hi 80      Lo 95     Hi 95
#> Jan 2017    -5049.00381 -9644.003 -454.0045 -12076.449  1978.441
#> Feb 2017       37.44605 -5599.592 5674.4843  -8583.660  8658.552
#> Mar 2017     -256.41474 -6770.890 6258.0601 -10219.444  9706.615
#> Apr 2017     2593.09445 -4693.919 9880.1079  -8551.431 13737.620

# plot
plot(pred, include = 24, showgap = FALSE)

# Convert pred from list to data frame object
df1 <- fortify(pred) %>% as_tibble()

# Create Date column, remove Index column and rename other columns 
df1 %<>% 
  mutate(Date = as.Date(Index, "%Y-%m-%d")) %>% 
  select(-Index) %>% 
  rename("Low95" = "Lo 95",
         "Low80" = "Lo 80",
         "High95" = "Hi 95",
         "High80" = "Hi 80",
         "Forecast" = "Point Forecast")
df1

#> # A tibble: 28 x 8
#>     Data Fitted Forecast Low80 High80 Low95 High95 Date      
#>    <int>  <dbl>    <dbl> <dbl>  <dbl> <dbl>  <dbl> <date>    
#>  1  8575     NA       NA    NA     NA    NA     NA 2015-01-01
#>  2  8215     NA       NA    NA     NA    NA     NA 2015-02-01
#>  3 16399     NA       NA    NA     NA    NA     NA 2015-03-01
#>  4 16415     NA       NA    NA     NA    NA     NA 2015-04-01
#>  5 15704     NA       NA    NA     NA    NA     NA 2015-05-01
#>  6 19805     NA       NA    NA     NA    NA     NA 2015-06-01
#>  7 17484     NA       NA    NA     NA    NA     NA 2015-07-01
#>  8 18116     NA       NA    NA     NA    NA     NA 2015-08-01
#>  9 19977     NA       NA    NA     NA    NA     NA 2015-09-01
#> 10 14439     NA       NA    NA     NA    NA     NA 2015-10-01
#> # ... with 18 more rows

### Avoid the gap between data and forcast
# Find the last non missing NA values in obs then use that
# one to initialize all forecast columns
lastNonNAinData <- max(which(complete.cases(df1$Data)))
df1[lastNonNAinData, 
    !(colnames(df1) %in% c("Data", "Fitted", "Date"))] <- df1$Data[lastNonNAinData]

ggplot(df1, aes(x = Date)) + 
  geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
  geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
  geom_point(aes(y = Data, colour = "Data"), size = 4) +
  geom_line(aes(y = Data, group = 1, colour = "Data"), 
            linetype = "dotted", size = 0.75) +
  geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
  geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
  scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
  scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
  scale_fill_brewer(name = "Intervals") +
  guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
  theme_bw(base_size = 14)

修改:包含来自&#34; 2017-01-01&#34;的已知值到&#34; 2017-04-01&#34;

# Create new column which has known values
df1$Obs <- NA
df1$Obs[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)

ggplot(df1, aes(x = Date)) + 
  geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
  geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
  geom_point(aes(y = Data, colour = "Data"), size = 4) +
  geom_line(aes(y = Data, group = 1, colour = "Data"), 
            linetype = "dotted", size = 0.75) +
  geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
  geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
  scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
  scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
  scale_fill_brewer(name = "Intervals") +
  guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
  theme_bw(base_size = 14) +
  geom_line(aes(y = Obs, group = 4, colour = "Obs"), linetype = "dotted", size = 0.75)

或者将这些值直接放入Data

df1$Data[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)

ggplot(df1, aes(x = Date)) + 
  geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
  geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
  geom_point(aes(y = Data, colour = "Data"), size = 3) +
  geom_line(aes(y = Data, group = 1, colour = "Data"), 
            linetype = "dotted", size = 0.75) +
  geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
  geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
  scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
  scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
  scale_fill_brewer(name = "Intervals") +
  guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
  theme_bw(base_size = 14)

reprex package(v0.2.0)创建于2018-04-21。