我有以下时间序列:
ts<-data.frame(Date=c('2017-01-01','2017-01-02','2017-01-03','2017-01-04','2017-01-05','2017-01-06','2017-01-07','2017-01-08','2017-01-09','2017-01-10'),
A=c(15,37,29,18,12,8,2,24,42,10),
B=c(16,22,5,6,22,12,13,7,20,36))
ts
Date A B
1 2017-01-01 15 16
2 2017-01-02 37 22
3 2017-01-03 29 5
4 2017-01-04 18 6
5 2017-01-05 12 22
6 2017-01-06 8 12
7 2017-01-07 2 13
8 2017-01-08 24 7
9 2017-01-09 42 20
10 2017-01-10 10 36
我想在时间序列A和B上迭代应用预报包中的auto.arima
函数。
在使用功能性方法时,我需要帮助,该方法首先创建具有以下设置的预测功能(此功能将遍历多个系列):
1. splits data into train:test in 80:20 ratio
2. Trains auto.arima model on the train set
3. Model evaluation using the test set (rmse metric)
4. optional----> cross-validation with 1 time step
5. generates forecast (horizon=2) with the error metric as below:
series Date rmse pt_forecast_1 pt_forecast_2
1 A 2017-01-11 0.21 12 13
2 B 2017-01-12 0.11 36 34
需要帮助。谢谢
答案 0 :(得分:1)
我写了data_gen_func()做你需要的事情。希望对您有所帮助。输出几乎与您所需的输出相同。您需要安装预测和CombMSC软件包。如果您没有安装以下代码,则它将完成该工作。
我还展示了如何使用它,并描述了需要传递给data_gen_func()的参数。
if(!require(forecast)){
install.packages("forecast")
}
if(!require(CombMSC)){
install.packages("CombMSC")
}
#' @param dta a multiple time series
#' @param h final forecast horizon
#' @param test_size how many observation to use for test
#' @param start_fc_date Startind date of forecast. Note you can change it. this method was the fist came to my mind.
#' @param ts_frequency A character string, containing one of "day", "week",
#' "month", "quarter" or "year".
#' This can optionally be preceded by a (positive or negative)
#' integer and a space, or followed by "s".
#' @param error masure of error.
#' It can be one of the following: ME , RMSE, MAE, MPE, MAPE, MASE, ACF1.
data_gen_func <- function(dta, h, test_size, start_fc_date, ts_frequency,
error = "RMSE"){
if(!"Date" %in% class(start_fc_date)){
stop(" 'start_fc_date' must have class of 'Date'")
}
if(!"mts" %in% class(dta)){
stop("dta must be an mts")
}
nts <- ncol(dta)
fc <- data.frame(matrix(nrow = h, ncol = nts))
acc <- data.frame(matrix(nrow = 1, ncol = nts))
train_length <- nrow(dta) - test_size
for (i in 1:nts) {
d_list <- CombMSC::splitTrainTest(dta[,i], train_length)
train <- d_list$train
test <- d_list$test
point_fc <- forecast(auto.arima(train), h = test_size)$mean
acc[,i] <- accuracy(point_fc, test)[,paste0(error)]
colnames(acc)[i] <- colnames(dta)[i]
fc[,i] <- forecast(auto.arima(dta[,i]), h = h)$mean
colnames(fc)[i] <- colnames(dta)[i]
}
acc <- tidyr::pivot_longer(acc, everything(),names_to = "series",
values_to = paste0(error))
fc$date <- seq(from = start_fc_date, length.out = h, by = ts_frequency)
tidyr::pivot_longer(fc, -date,names_to = "series",
values_to = "fc")%>%
tidyr::pivot_wider(names_from = date, values_from= fc)-> fc
output <- dplyr::left_join(fc,acc)
return(output)
}
# usage -------------------
library(forecast)
library(CombMSC)
my_data <- ts(data.frame(
AA = arima.sim(list(order=c(1,0,0), ar=.5), n=50, mean = 12),
AB = arima.sim(list(order=c(1,0,0), ar=.5), n=50, mean = 12),
AC = arima.sim(list(order=c(1,0,0), ar=.5), n=50, mean = 11),
BA = arima.sim(list(order=c(1,0,0), ar=.5), n=50, mean = 10),
BB = arima.sim(list(order=c(1,0,0), ar=.5), n=50, mean = 14)),
start = c(2010, 1), frequency = 12)
end(my_data)
out1 <- data_gen_func(dta = my_data, h = 2, test_size = 1, start_fc_date = as.Date("2014-03-01"),
ts_frequency = "month", error = "MAPE")
out1
5个时间序列的输出如下所示
# A tibble: 5 x 4
series `2014-03-01` `2014-04-01` MAPE
<chr> <dbl> <dbl> <dbl>
1 AA 23.6 23.4 3.38
2 AB 24.2 24.4 1.18
3 AC 21.1 21.3 4.31
4 BA 19.9 20.1 1.47
5 BB 27.3 27.7 3.54
如果设置error = "RMSE"
,结果将如下所示:
# A tibble: 5 x 4
series `2014-03-01` `2014-04-01` RMSE
<chr> <dbl> <dbl> <dbl>
1 AA 24.0 24.0 1.05
2 AB 23.2 23.3 0.160
3 AC 22.2 22.2 0.851
4 BA 19.4 19.7 1.59
5 BB 27.5 27.9 1.04
包含示例数据:很短,因此您会得到一些警告
my_ts <-data.frame(Date=c('2017-01-01','2017-01-02','2017-01-03','2017-01-04','2017-01-05','2017-01-06','2017-01-07','2017-01-08','2017-01-09','2017-01-10'),
A=c(15,37,29,18,12,8,2,24,42,10),
B=c(16,22,5,6,22,12,13,7,20,36))
my_ts <- stats::ts(my_ts[,-1], start = c(2017,1), frequency = 7)
out2 <- data_gen_func(dta = my_ts, h = 2, test_size = 2,
start_fc_date = as.Date("2017-01-10"),
ts_frequency = "day", error = "MAPE")
out2
输出:
# A tibble: 2 x 4
series `2017-01-10` `2017-01-11` MAPE
<chr> <dbl> <dbl> <dbl>
1 A 19.7 19.7 69.0
2 B 15.9 15.9 49.9
如果您对输出不满意,也可以转换数据。
tidyr::pivot_longer(out2, -c(series,MAPE), names_to = "date",
values_to= "point_fc")
旋转后的输出
# A tibble: 4 x 4
series MAPE date point_fc
<chr> <dbl> <chr> <dbl>
1 A 69.0 2017-01-10 19.7
2 A 69.0 2017-01-11 19.7
3 B 49.9 2017-01-10 15.9
4 B 49.9 2017-01-11 15.9