我有数据集,我必须执行按组分割的每日预测。 该组是客户端+东西
ts <- read.csv("C:/Users/Admin/Desktop/mydat.csv",sep=";", dec=",")
这里是mydat
structure(list(Data = structure(c(1L, 3L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 2L, 4L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L), .Label = c("01.04.2017",
"01.06.2017", "02.04.2017", "02.06.2017", "03.04.2017", "04.04.2017",
"05.04.2017", "06.04.2017", "07.04.2017", "08.04.2017", "09.04.2017",
"10.04.2017", "11.04.2017", "12.05.2017", "13.05.2017", "14.05.2017",
"15.05.2017", "16.05.2017", "17.05.2017", "18.05.2017", "19.05.2017",
"20.05.2017", "21.05.2017", "22.05.2017", "23.05.2017", "24.05.2017",
"25.05.2017", "26.05.2017", "27.05.2017", "28.05.2017", "29.05.2017",
"30.05.2017", "31.05.2017"), class = "factor"), client = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Horns and hooves", "Kornev & Co."
), class = "factor"), stuff = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("chickens", "hooves", "Oysters"), class = "factor"),
Продажи = c(374L, 12L, 120L, 242L, 227L, 268L, 280L, 419L,
12L, 172L, 336L, 117L, 108L, 150L, 90L, 117L, 116L, 146L,
120L, 211L, 213L, 67L, 146L, 118L, 152L, 122L, 201L, 497L,
522L, 65L, 268L, 441L, 247L, 348L, 445L, 477L, 62L, 226L,
476L, 306L)), .Names = c("Data", "client", "stuff", "Продажи"
), class = "data.frame", row.names = c(NA, -40L))
当然我可以手动分离三个数据集
horns and hooves + hooves
Horns and hooves + chickens
Kornev & Co. + oysters
但是当我拥有庞大的数据集并且有数百个组时,该怎么做。不要手动拆分。 是否可以将它在R中分成组然后执行预测?
预测代码很简单
第一个我这样做
library(forecast)
library(lubridate)
msts <- msts(ts$sales,seasonal.periods = c(7,365.25),start = decimal_date(as.Date("2017-05-12")))
plot(msts, main="sales", xlab="Year", ylab="sales")
tbats <- tbats(msts)
plot(tbats, main="Multiple Season Decomposition")
sp<- predict(tbats,h=14) #14 days forecast
plot(sp, main = "TBATS Forecast", include=14)
print(sp)
如果结果不适合我,我会通过虚拟变量执行预测
tsw <- ts(ts$Sales, start = decimal_date(as.Date("2017-05-12")), frequency = 7)
View(tsw)
mytslm <- tslm(tsw ~ trend + season)
print(mytslm)
residarima1 <- auto.arima(mytslm$residuals)
residualsArimaForecast <- forecast(residarima1, h=14)
residualsF <- as.numeric(residualsArimaForecast$mean)
regressionForecast <- forecast(mytslm,h=14)
regressionF <- as.numeric(regressionForecast$mean)
forecastR <- regressionF+residualsF
print(forecastR)
答案 0 :(得分:1)
您可以使用split
通过多种因素将数据拆分为组,在这种情况下,列为client
和stuff
。
group_list <- split(mydat, list(mydat$client, mydat$stuff))
group_list <- group_list[sapply(group_list, function(x) nrow(x) != 0)]
然后您可以使用此列表和lapply
您想要的任何功能。以下是您执行第一次预测的方法。请注意,我已将预测代码与图表代码分开,并且预测的每个步骤都由一个函数完成,首先应用函数msts
并生成此类对象的列表,然后应用函数tbats
和制作另一份清单。
fun_msts <- function(ts){
msts(ts$Sales, seasonal.periods = c(7,365.25), start = decimal_date(as.Date("2017-05-12")))
}
fun_sp <- function(m){
tbats <- tbats(m)
predict(tbats, h=14) #14 days forecast
}
msts_list <- lapply(group_list, fun_msts)
sp_list <- lapply(msts_list, fun_sp)
现在,如果您愿意,可以绘制结果。为此,请将另外两个函数定义为lapply
ed。
plot_msts <- function(m, new.window = TRUE){
if(new.window) windows()
plot(m, main="Sales", xlab="Year", ylab="Sales")
}
plot_sp <- function(sp, new.window = TRUE){
if(new.window) windows()
plot(sp, main = "TBATS Forecast", include = 14)
}
lapply(msts_list, plot_msts)
lapply(sp_list, plot_sp)
在这些功能中,打开一个功能为windows
的新图形设备。如果您不使用Microsoft Windows或者想要打开其他类型的设备,请更改该说明,但保留if(new.window)
。
编辑。
对于使用虚拟变量的回归,您可以执行以下操作。
fun_tslm <- function(x, start = "2017-05-12", freq = 7){
tsw <- ts(x[["Sales"]], start = decimal_date(as.Date(start)), frequency = freq)
#View(tsw)
mytslm <- tslm(tsw ~ trend + season)
mytslm
}
fun_forecast <- function(x, h = 14){
residarima1 <- auto.arima(x[["residuals"]])
residualsArimaForecast <- forecast(residarima1, h = h)
residualsF <- as.numeric(residualsArimaForecast$mean)
regressionForecast <- forecast(x, h = h)
regressionF <- as.numeric(regressionForecast$mean)
forecastR <- regressionF + residualsF
forecastR
}
tslm_list <- lapply(group_list, fun_tslm)
fore_list <- lapply(tslm_list, fun_forecast)