在R中按组使用auto.arima()将数字日期格式转换为正确的日期格式

时间:2018-12-25 10:46:55

标签: r dplyr timestamp lubridate arima

让我们处理这个数据样本

timeseries<-structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L, 
32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L, 
36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L, 
40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018", 
"01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020", 
"01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018", 
"01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019", 
"01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020", 
"01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017", 
"01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019", 
"01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018", 
"01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"), 
    client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens", 
    "hooves", "Oysters"), class = "factor"), Sales = c(374L, 
    12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L, 
    117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L, 
    67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L, 
    441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data", 
"client", "stuff", "Sales"), class = "data.frame", row.names = c(NA, 
-40L))

我想按组使用auto.arima进行预测

# first the grouping variable
timeseries$group <- paste0(timeseries$client,timeseries$stuff)

# now the list
listed <- split(timeseries,timeseries$group)

library("forecast")
library("lubridate")

listed_ts <- lapply(listed,
                    function(x) ts(x[["Sales"]], start = ymd("2017-01-04"), frequency = 12)  ) 

listed_ts

listed_arima <- lapply(listed_ts,function(x) auto.arima(x) )
#Now the forecast for each arima:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,2) )
listed_forecast
do.call(rbind,listed_forecast)

listed_forecast之后,我得到下一个输出

  Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
    17170 374  12 120 242 227 268 280 419  12 172 336

    $Hornshooves
          Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
    17170 497 522  65 268 441 247 348 445 477  62 226 476
    17171 306                                            

    $KornevOysters
          Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
    17170 117 108 150  90 117 116 146 120 211 213  67 146
    17171 118 152 122 201

17170日期格式不正确

所以as.numeric(ymd("2017-01-04"))向我们展示了17170

如何以正确的日期格式获取输出? 我想要这样的普通日期格式

$Hornschickens
          Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
Dec 2017       223.8182 50.98365 396.6527 -40.50942 488.1458
Jan 2018       223.8182 50.98365 396.6527 -40.50942 488.1458

所以我假设预测中的17170表示2017,17171 = 2018  但是将其转换为阅读格式会遇到任何麻烦

我看到下一个逻辑 是否可以将这些数值自动转换为日期

其中

1970-01-01(Y-m-d)为0

1970-01-02是1

“ 2018-12-25”是17890

但是当按月份汇总输入数据时(如我的示例)

如此简单地转换为2018年12月会导致17890发生在2018年12月的范围内。

1 个答案:

答案 0 :(得分:1)

start中的listed_ts参数不应该是日期:

  

开始---第一次观察的时间。一个数字或   两个整数的向量,分别指定自然时间单位和   (基于1的)样本数进入时间单位。请参阅示例   第二种形式的使用。

(来自?ts。)因此,代替使用start = c(2017, 1)来完成这项工作。