可复制的示例
df=structure(list(group = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L), year = c(1973L, 1974L, 1975L, 1976L, 1977L, 1978L,
1973L, 1974L, 1975L, 1976L, 1977L, 1978L), Jan = c(9007L, 7750L,
8162L, 7717L, 7792L, 7836L, 9007L, 7750L, 8162L, 7717L, 7792L,
7836L), Feb = c(8106L, 6981L, 7306L, 7461L, 6957L, 6892L, 8106L,
6981L, 7306L, 7461L, 6957L, 6892L), Mar = c(8928L, 8038L, 8124L,
7767L, 7726L, 7791L, 8928L, 8038L, 8124L, 7767L, 7726L, 7791L
), Apr = c(9137L, 8422L, 7870L, 7925L, 8106L, 8192L, 9137L, 8422L,
7870L, 7925L, 8106L, 8192L), May = c(10017L, 8714L, 9387L, 8623L,
8890L, 9115L, 10017L, 8714L, 9387L, 8623L, 8890L, 9115L), Jun = c(10826L,
9512L, 9556L, 8945L, 9299L, 9434L, 10826L, 9512L, 9556L, 8945L,
9299L, 9434L), Jul = c(11317L, 10120L, 10093L, 10078L, 10625L,
10484L, 11317L, 10120L, 10093L, 10078L, 10625L, 10484L), Aug = c(10744L,
9823L, 9620L, 9179L, 9302L, 9827L, 10744L, 9823L, 9620L, 9179L,
9302L, 9827L), Sep = c(9713L, 8743L, 8285L, 8037L, 8314L, 9110L,
9713L, 8743L, 8285L, 8037L, 8314L, 9110L), Oct = c(9938L, 9129L,
8466L, 8488L, 8850L, 9070L, 9938L, 9129L, 8466L, 8488L, 8850L,
9070L), Nov = c(9161L, 8710L, 8160L, 7874L, 8265L, 8633L, 9161L,
8710L, 8160L, 7874L, 8265L, 8633L), Dec = c(8927L, 8680L, 8034L,
8647L, 8796L, 9240L, 8927L, 8680L, 8034L, 8647L, 8796L, 9240L
)), .Names = c("group", "year", "Jan", "Feb", "Mar", "Apr", "May",
"Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), class = "data.frame", row.names = c(NA,
-12L))
Perforcat按组分组
library(forecast)
ld <- split(df[, -1], df$group)
ld <- lapply(ld, function(x) {ts(c(t(x[,-1])), start = min(x[,1]), frequency = 12)})
lts <- lapply(ld, ets, model = "ZZZ")
结果
$`1`
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
Jan 1979 8397.497 8022.399 8772.595 7823.834 8971.160
Feb 1979 7599.221 7162.825 8035.616 6931.812 8266.630
Mar 1979 8396.595 7906.510 8886.679 7647.075 9146.115
Apr 1979 8646.510 8108.063 9184.957 7823.026 9469.994
从1979年开始,它是预测值,我想要得到残差的结果 1973-1978。(初始值)
res <- lapply(lts, residuals)
和结果
$`1`
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
1973 497.69233 99.50607 64.44947 -15.20925 77.85009 390.89045 -277.67369 26.92614 72.42590 -85.69894 -338.10035
以此类推
df[df$year == 1973,]
,但全年如何...
然后从原始值中减去残差并得到预测值(也许我使任务复杂化了,但是否则我不知道如何获得所需的输出)
不需要更改名称point forecast
,lo80
和hi80
,我会记住,对于初始值,它们表示残差,原始值和预测值。
是否可以使用dplyr或data.table解决方案?
# Tidy-up the splits
ld <- lapply(ld, function(x) {
x %>%
gather(key, value, -year) %>%
unite(date, year, key, sep = "-") %>%
mutate(date = paste0(date, "-01")) %>%
mutate(date = as.Date(date, format = "%Y-%b-%d"))
})
结果
$`1`
date value
1 <NA> 9007
2 <NA> 7750
3 <NA> 8162
4 <NA> 7717
5 <NA> 7792
6 <NA> 7836
7 <NA> 8106
8 <NA> 6981
9 <NA> 7306
10 <NA> 7461
11 <NA> 6957
12 <NA> 6892
ld=dput()
ld <- lapply(ld, function(x) {
yr <- lubridate::year(min(x$date))
mth <- lubridate::month(min(x$date))
timetk::tk_ts(data = x, select = value, frequency = 12,
start = c(yr, mth))
})
错误
Error in x$date : $ operator is invalid for atomic vectors
> lts_all <- lapply(names(lts), function(x, lts) {
+ output_fit <- lts[[x]][["res_fit_tbl"]] %>%
+ mutate(group = x)
+ output_fcst <- lts[[x]][["res_fcst_tbl"]] %>%
+ mutate(group = x)
+
+ return(list(output_fit=output_fit, output_fcst=output_fcst))
+ }, lts)
> lts_all
[[1]]
[[1]]$output_fit
# A tibble: 72 x 6
date value residuals CI95_upper CI95_lower group
<date> <dbl> <dbl> <dbl> <dbl> <chr>
1 1973-01-01 8509 498 9083 7936 value
2 1973-02-01 8006 99.5 8580 7433 value
3 1973-03-01 8864 64.4 9437 8290 value
4 1973-04-01 9152 - 15.2 9726 8579 value
5 1973-05-01 9939 77.9 10513 9365 value
6 1973-06-01 10435 391 11009 9861 value
7 1973-07-01 11595 -278 12168 11021 value
8 1973-08-01 10717 26.9 11291 10143 value
9 1973-09-01 9641 72.4 10214 9067 value
10 1973-10-01 10024 - 85.7 10597 9450 value
# ... with 62 more rows
答案 0 :(得分:1)
从df
开始,这是一个完整的解决方案,可重复的示例:
# load libraries
load_pkgs <- c("forecast", "zoo", "timetk", "tidyverse")
sapply(load_pkgs, function(x) suppressPackageStartupMessages(library(x, character.only = T)))
第1步:预处理
# perform split by group
ld <- split(df[, -1], df$group)
# Tidy-up the splits
ld <- lapply(ld, function(x) {
x %>%
gather(key, value, -year) %>%
unite(date, year, key, sep = "-") %>%
mutate(date = paste0(date, "-01")) %>%
mutate(date = as.Date(date, format = "%Y-%b-%d"))
})
dput
首先提示:
structure(list(date = structure(c(1096, 1461, 1826, 2191, 2557,
2922, 1127, 1492, 1857, 2222, 2588, 2953, 1155, 1520, 1885, 2251,
2616, 2981, 1186, 1551, 1916, 2282, 2647, 3012, 1216, 1581, 1946,
2312, 2677, 3042, 1247, 1612, 1977, 2343, 2708, 3073, 1277, 1642,
2007, 2373, 2738, 3103, 1308, 1673, 2038, 2404, 2769, 3134, 1339,
1704, 2069, 2435, 2800, 3165, 1369, 1734, 2099, 2465, 2830, 3195,
1400, 1765, 2130, 2496, 2861, 3226, 1430, 1795, 2160, 2526, 2891,
3256), class = "Date"), value = c(9007L, 7750L, 8162L, 7717L,
7792L, 7836L, 8106L, 6981L, 7306L, 7461L, 6957L, 6892L, 8928L,
8038L, 8124L, 7767L, 7726L, 7791L, 9137L, 8422L, 7870L, 7925L,
8106L, 8192L, 10017L, 8714L, 9387L, 8623L, 8890L, 9115L, 10826L,
9512L, 9556L, 8945L, 9299L, 9434L, 11317L, 10120L, 10093L, 10078L,
10625L, 10484L, 10744L, 9823L, 9620L, 9179L, 9302L, 9827L, 9713L,
8743L, 8285L, 8037L, 8314L, 9110L, 9938L, 9129L, 8466L, 8488L,
8850L, 9070L, 9161L, 8710L, 8160L, 7874L, 8265L, 8633L, 8927L,
8680L, 8034L, 8647L, 8796L, 9240L)), class = "data.frame", row.names = c(NA,
-72L))
然后
# Transform time series to ts objects
ld <- lapply(ld, function(x) {
yr <- lubridate::year(min(x$date))
mth <- lubridate::month(min(x$date))
timetk::tk_ts(data = x, select = value, frequency = 12,
start = c(yr, mth))
})
dput
首先提示:
structure(c(9007L, 8106L, 8928L, 9137L, 10017L, 10826L, 11317L,
10744L, 9713L, 9938L, 9161L, 8927L, 7750L, 6981L, 8038L, 8422L,
8714L, 9512L, 10120L, 9823L, 8743L, 9129L, 8710L, 8680L, 8162L,
7306L, 8124L, 7870L, 9387L, 9556L, 10093L, 9620L, 8285L, 8466L,
8160L, 8034L, 7717L, 7461L, 7767L, 7925L, 8623L, 8945L, 10078L,
9179L, 8037L, 8488L, 7874L, 8647L, 7792L, 6957L, 7726L, 8106L,
8890L, 9299L, 10625L, 9302L, 8314L, 8850L, 8265L, 8796L, 7836L,
6892L, 7791L, 8192L, 9115L, 9434L, 10484L, 9827L, 9110L, 9070L,
8633L, 9240L), .Dim = c(72L, 1L), .Dimnames = list(NULL, "value"), index = structure(c(94694400,
97372800, 99792000, 102470400, 105062400, 107740800, 110332800,
113011200, 115689600, 118281600, 120960000, 123552000, 126230400,
128908800, 131328000, 134006400, 136598400, 139276800, 141868800,
144547200, 147225600, 149817600, 152496000, 155088000, 157766400,
160444800, 162864000, 165542400, 168134400, 170812800, 173404800,
176083200, 178761600, 181353600, 184032000, 186624000, 189302400,
191980800, 194486400, 197164800, 199756800, 202435200, 205027200,
207705600, 210384000, 212976000, 215654400, 218246400, 220924800,
223603200, 226022400, 228700800, 231292800, 233971200, 236563200,
239241600, 241920000, 244512000, 247190400, 249782400, 252460800,
255139200, 257558400, 260236800, 262828800, 265507200, 268099200,
270777600, 273456000, 276048000, 278726400, 281318400), tzone = "UTC", tclass = "Date"), .indexCLASS = "Date", tclass = "Date", .indexTZ = "UTC", tzone = "UTC", class = "ts", .Tsp = c(1973,
1978.91666666667, 12))
第2步:使用ets
您需要一个帮助功能来将输出转换为数据框:
# helping function
make_df <- function(ts_obj) {
ts_df <- timetk::tk_tbl(preserve_index = TRUE, ts_obj) %>%
mutate(index = zoo::as.Date(x = .$index, frac = 0)) %>%
dplyr::rename(date = index)
return(ts_df)
}
以下函数训练ets
并预测接下来的12个月;然后,准备具有拟合值和预测值的表:
lts <- lapply(ld, function(ts_obj) {
# train ets model and get fitted results
res_model <- ets(ts_obj, model = "ZZZ")
res_fit <- ts(as.numeric(res_model$fitted), start = start(ts_obj), frequency = 12)
# add extra metrics you may be interested in
model <- res_model[["method"]]
mse <- res_model[["mse"]]
# get forecasts for the next 12 months
res_fct <- forecast(res_model, h = 12)
res_fcst <- ts(res_fct$mean, start = end(ts_obj) + 1/12, frequency = 12)
# transform results to tbl
# for fitted output we keep the residuals and the 95% CI
res_fit_tbl <- make_df(res_fit) %>%
mutate(residuals = as.numeric(res_model[["residuals"]])) %>%
mutate(CI95_upper = value + 1.96*sqrt(res_model$sigma2),
CI95_lower = value - 1.96*sqrt(res_model$sigma2))
# the forecast output does not have residuals
res_fcst_tbl <- make_df(res_fcst)
return(list(res_fit_tbl = res_fit_tbl, res_fcst_tbl = res_fcst_tbl, model = model, mse = mse)) # don't forget to pass the extra metrics as output
})
第3步:将不同组的拟合结果和预测结果汇总在一起
# add groups back + other metrics of interest
lts_all <- lapply(names(lts), function(x, lts) {
output_fit <- lts[[x]][["res_fit_tbl"]] %>%
mutate(group = x,
model = lts[[x]][["model"]],
mse = lts[[x]][["mse"]])
output_fcst <- lts[[x]][["res_fcst_tbl"]] %>%
mutate(group = x)
return(list(output_fit=output_fit, output_fcst=output_fcst))
}, lts)
示例输出:
> lts_all[[1]][["output_fit"]]
# A tibble: 72 x 6
date value residuals CI95_upper CI95_lower group
<date> <dbl> <dbl> <dbl> <dbl> <chr>
1 1973-01-01 8509. 498. 9083. 7936. 1
2 1973-02-01 8006. 99.5 8580. 7433. 1
3 1973-03-01 8864. 64.4 9437. 8290. 1
4 1973-04-01 9152. -15.2 9726. 8579. 1
5 1973-05-01 9939. 77.9 10513. 9365. 1
6 1973-06-01 10435. 391. 11009. 9861. 1
7 1973-07-01 11595. -278. 12168. 11021. 1
8 1973-08-01 10717. 26.9 11291. 10143. 1
9 1973-09-01 9641. 72.4 10214. 9067. 1
10 1973-10-01 10024. -85.7 10597. 9450. 1
# ... with 62 more rows
> lts_all[[2]][["output_fit"]]
# A tibble: 72 x 6
date value residuals CI95_upper CI95_lower group
<date> <dbl> <dbl> <dbl> <dbl> <chr>
1 1973-01-01 8509. 498. 9083. 7936. 2
2 1973-02-01 8006. 99.5 8580. 7433. 2
3 1973-03-01 8864. 64.4 9437. 8290. 2
4 1973-04-01 9152. -15.2 9726. 8579. 2
5 1973-05-01 9939. 77.9 10513. 9365. 2
6 1973-06-01 10435. 391. 11009. 9861. 2
7 1973-07-01 11595. -278. 12168. 11021. 2
8 1973-08-01 10717. 26.9 11291. 10143. 2
9 1973-09-01 9641. 72.4 10214. 9067. 2
10 1973-10-01 10024. -85.7 10597. 9450. 2
# ... with 62 more rows
然后
# bring together the fitted respectively forecasting results
output_fit_all <- lapply(lts_all, function(x) x[[1]])
output_fit_all <- bind_rows(output_fit_all)
output_fcst_all <- lapply(lts_all, function(x) x[[2]])
output_fcst_all <- bind_rows(output_fcst_all)