R:将数据集拆分为组以进行预测

时间:2018-02-11 15:16:07

标签: r time-series forecasting

我有数据集,我必须执行按组分割的每日预测。 该组是客户端+东西

ts <- read.csv("C:/Users/Admin/Desktop/mydat.csv",sep=";", dec=",")

这里是mydat

structure(list(Data = structure(c(1L, 3L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 12L, 13L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 
29L, 30L, 31L, 32L, 33L, 2L, 4L, 14L, 15L, 16L, 17L, 18L, 19L, 
20L, 21L, 22L, 23L, 24L, 25L, 26L), .Label = c("01.04.2017", 
"01.06.2017", "02.04.2017", "02.06.2017", "03.04.2017", "04.04.2017", 
"05.04.2017", "06.04.2017", "07.04.2017", "08.04.2017", "09.04.2017", 
"10.04.2017", "11.04.2017", "12.05.2017", "13.05.2017", "14.05.2017", 
"15.05.2017", "16.05.2017", "17.05.2017", "18.05.2017", "19.05.2017", 
"20.05.2017", "21.05.2017", "22.05.2017", "23.05.2017", "24.05.2017", 
"25.05.2017", "26.05.2017", "27.05.2017", "28.05.2017", "29.05.2017", 
"30.05.2017", "31.05.2017"), class = "factor"), client = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Horns and hooves", "Kornev & Co."
), class = "factor"), stuff = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("chickens", "hooves", "Oysters"), class = "factor"), 
    Продажи = c(374L, 12L, 120L, 242L, 227L, 268L, 280L, 419L, 
    12L, 172L, 336L, 117L, 108L, 150L, 90L, 117L, 116L, 146L, 
    120L, 211L, 213L, 67L, 146L, 118L, 152L, 122L, 201L, 497L, 
    522L, 65L, 268L, 441L, 247L, 348L, 445L, 477L, 62L, 226L, 
    476L, 306L)), .Names = c("Data", "client", "stuff", "Продажи"
), class = "data.frame", row.names = c(NA, -40L))

当然我可以手动分离三个数据集

horns and hooves + hooves
Horns and hooves + chickens
Kornev & Co. + oysters

但是当我拥有庞大的数据集并且有数百个组时,该怎么做。不要手动拆分。 是否可以将它在R中分成组然后执行预测?

预测代码很简单

第一个我这样做

library(forecast)
library(lubridate)

msts <- msts(ts$sales,seasonal.periods = c(7,365.25),start = decimal_date(as.Date("2017-05-12")))
plot(msts, main="sales", xlab="Year", ylab="sales")

tbats <- tbats(msts)
plot(tbats, main="Multiple Season Decomposition")
sp<- predict(tbats,h=14) #14 days forecast
plot(sp, main = "TBATS Forecast", include=14)
print(sp)

如果结果不适合我,我会通过虚拟变量执行预测

tsw <- ts(ts$Sales, start = decimal_date(as.Date("2017-05-12")), frequency = 7)
View(tsw)
mytslm <- tslm(tsw ~ trend + season)
print(mytslm)


residarima1 <- auto.arima(mytslm$residuals)
residualsArimaForecast <- forecast(residarima1, h=14)
residualsF <- as.numeric(residualsArimaForecast$mean)
regressionForecast <- forecast(mytslm,h=14)
regressionF <- as.numeric(regressionForecast$mean)
forecastR <- regressionF+residualsF
print(forecastR)

1 个答案:

答案 0 :(得分:1)

您可以使用split通过多种因素将数据拆分为组,在这种情况下,列为clientstuff

group_list <- split(mydat, list(mydat$client, mydat$stuff))
group_list <- group_list[sapply(group_list, function(x) nrow(x) != 0)]

然后您可以使用此列表和lapply您想要的任何功能。以下是您执行第一次预测的方法。请注意,我已将预测代码与图表代码分开,并且预测的每个步骤都由一个函数完成,首先应用函数msts并生成此类对象的列表,然后应用函数tbats和制作另一份清单。

fun_msts <- function(ts){
    msts(ts$Sales, seasonal.periods = c(7,365.25), start = decimal_date(as.Date("2017-05-12")))
}

fun_sp <- function(m){
    tbats <- tbats(m)
    predict(tbats, h=14) #14 days forecast
}

msts_list <- lapply(group_list, fun_msts)
sp_list <- lapply(msts_list, fun_sp)

现在,如果您愿意,可以绘制结果。为此,请将另外两个函数定义为lapply ed。

plot_msts <- function(m, new.window = TRUE){
    if(new.window) windows()
    plot(m, main="Sales", xlab="Year", ylab="Sales")
}

plot_sp <- function(sp, new.window = TRUE){
    if(new.window) windows()
    plot(sp, main = "TBATS Forecast", include = 14)
}

lapply(msts_list, plot_msts)
lapply(sp_list, plot_sp)

在这些功能中,打开一个功能为windows的新图形设备。如果您不使用Microsoft Windows或者想要打开其他类型的设备,请更改该说明,但保留if(new.window)

编辑。

对于使用虚拟变量的回归,您可以执行以下操作。

fun_tslm <- function(x, start = "2017-05-12", freq = 7){
    tsw <- ts(x[["Sales"]], start = decimal_date(as.Date(start)), frequency = freq)
    #View(tsw)
    mytslm <- tslm(tsw ~ trend + season)
    mytslm
}

fun_forecast <- function(x, h = 14){
    residarima1 <- auto.arima(x[["residuals"]])
    residualsArimaForecast <- forecast(residarima1, h = h)
    residualsF <- as.numeric(residualsArimaForecast$mean)
    regressionForecast <- forecast(x, h = h)
    regressionF <- as.numeric(regressionForecast$mean)
    forecastR <- regressionF + residualsF
    forecastR
}

tslm_list <- lapply(group_list, fun_tslm)
fore_list <- lapply(tslm_list, fun_forecast)