我的数据集包含以下3列:
date client_id sales
01/01/2012 client 1 $1000
02/01/2012 client 1 $900
...
...
12/01/2014 client 1 $1000
01/01/2012 client 2 $300
02/01/2012 client 2 $450
...
..
12/01/2014 client 2 $375
等98个其他客户(每个客户24个月数据点)
我有多个客户(大约100个)...数据是每个客户的时间序列格式(每月24个数据点)
如何在R中使用auto.arima自动预测所有100个客户的销售额? 是否有一个声明选项? 还是我必须使用循环?
由于
答案 0 :(得分:4)
您始终可以使用lapply()
:
lapply(tsMat, function(x) forecast(auto.arima(x)))
下面是一个小例子:
library(forecast)
#generate some time-series:
sales <- replicate(100,
arima.sim(n = 24, list(ar = c(0.8), ma = c(-0.2)), sd = sqrt(0.1))
)
dates <- seq(as.Date("2012/1/1"), by = "month", length.out=24)
df <- data.frame(date=rep(dates,100), client_id=rep(1:100,each=24), sales=c(sales))
#reshape and convert it to a proper time-series format like ts:
tsMat <- ts(reshape2::dcast(df, date~client_id), start=2012, freq=12)
#forecast by auto.arima:
output <- lapply(tsMat, function(x) forecast(auto.arima(x)))
答案 1 :(得分:1)
您还可以通过在预测调用中使用“ h =#ofPeriods”来指定将来要预测的数字
Forecast.allStates <-as.data.frame(lapply(ts.allStates,function(x)Forecast(auto.arima(x),h = 67))
答案 2 :(得分:0)
另一种选择可能是tsibble
和fable
:
library(tsibble)
library(fable)
library(dplyr)
df %>%
as_tsibble(key = client_id, index = date) %>%
mutate(date = yearmonth(date)) %>%
model(arima = ARIMA(sales)) %>%
forecast(h = "1 year")
#> # A fable: 1,200 x 5 [1M]
#> # Key: client_id, .model [100]
#> client_id .model date sales .mean
#> <int> <chr> <mth> <dist> <dbl>
#> 1 1 arima 2014 gen N(0.072, 0.089) 0.0718
#> 2 1 arima 2014 feb N(0.28, 0.11) 0.281
#> 3 1 arima 2014 mar N(0.35, 0.12) 0.351
#> 4 1 arima 2014 apr N(0.024, 0.12) 0.0242
#> 5 1 arima 2014 mag N(-0.16, 0.12) -0.162
#> 6 1 arima 2014 giu N(0.029, 0.12) 0.0292
#> 7 1 arima 2014 lug N(0.24, 0.12) 0.243
#> 8 1 arima 2014 ago N(0.11, 0.12) 0.110
#> 9 1 arima 2014 set N(0.37, 0.12) 0.374
#> 10 1 arima 2014 ott N(0.37, 0.12) 0.369
#> # ... with 1,190 more rows
其中df
是:
set.seed(1)
sales <- replicate(100, arima.sim(n = 24, list(ar = c(0.8), ma = c(-0.2)), sd = sqrt(0.1)))
dates <- seq(as.Date("2012/1/1"), by = "month", length.out=24)
df <- data.frame(date=rep(dates,100), client_id=rep(1:100,each=24), sales=c(sales))