我有2年的销售数据,并且我使用美国的特殊假期作为外部回归变量,这些假期都为零,在假期期间填充为1,所以我可以说所有值本身都是零,使用傅立叶变换并将我的数据传递到AUTO.ARIMA以及预测混合模型中。但是,我很不幸,mt错误很高。
这是我的代码和数据。
require("ggplot2")
library("tseries")
library("forecast")
library(chron)
library(Holidays)
library(HolidayCalendars)
setwd("D://Users/Shivam/Desktop/go2venky/")
#Load sales data -- columns ds-date and y-sales volume
sales_data <- read.csv("indirect.csv",header=TRUE, stringsAsFactors = FALSE)
head(sales_data)
#format the date into Date column as we are reading from CSV
sales_data$Date <- as.Date(sales_data$ds)
#ggplot(sales_data, aes(x=Date, y=y))+geom_line() + scale_x_date('month') + ylab ('Daily Sales Volume')
#remove any outliers from the data set
No_outliers <- ts(sales_data[,c('y')])
sales_data$smoothvolume <- tsclean(No_outliers)
#plot the chart
ggplot(sales_data, aes(x=Date, y=smoothvolume)) + geom_line() +
scale_x_date('month') + ylab('Daily Sales Volume') + coord_cartesian(ylim=c(0,300)) +
geom_smooth(method="lm")
#Decompose data and see STL - Season, Trend, Reminder
sales_ma <- ts(na.omit(sales_data$smoothvolume),frequency = 30)
decomp_sales <- stl(sales_ma, s.window = "periodic")
plot(decomp_sales)
adj_sales = seasadj(decomp_sales)
#Build Holidays set to see in Arima
#end <- length(adj_sales[,1])
end <- 895 #count data is hardcoded to number of rows in the data file
thanksgiving <- rep(0,end)
christmas <- rep(0,end)
newyear <- rep(0,end)
memorial <- rep(0,end)
independence <- rep(0,end)
labor <- rep(0,end)
veterans <- rep(0,end)
goodfriday <- rep(0,end)
easter1 <- rep(0,end)
Product1Launch <- rep(0,end)
year <- rep(0,end)
for (i in 1:end) {
date <- as.Date(sales_data[i,1],format="%m/%d/%Y")
year[i] <- as.numeric(format(date, "%Y"))
thanksgivingday <- holiday(as.numeric(format(date, "%Y")), Holiday="USThanksgivingDay")
christmasday <- holiday(as.numeric(format(date, "%Y")), Holiday="USChristmasDay")
newyearday <- holiday(as.numeric(format(date, "%Y")), Holiday="USNewYearsDay")
memorialday <- holiday(as.numeric(format(date, "%Y")), Holiday="USMemorialDay")
independenceday <- holiday(as.numeric(format(date, "%Y")), Holiday="USIndependenceDay")
laborday <- holiday(as.numeric(format(date, "%Y")), Holiday="USLaborDay")
veteransday <- holiday(as.numeric(format(date, "%Y")), Holiday="USVeteransDay")
goodfridayday <- holiday(as.numeric(format(date, "%Y")), Holiday="USGoodFriday")
easterday1 <- holiday(as.numeric(format(date, "%Y")), Holiday="Easter")
Product1LaunchDay1 <- as.Date('2017-09-14', format="%Y-%m-%d") #date hardcoded for the year
#USNewYearsDay, USInaugurationDay, USMLKingsBirthday, USLincolnsBirthday, USWashingtonsBirthday,
#USMemorialDay, USIndependenceDay, USLaborDay, USColumbusDay, USElectionDay, USVeteransDay
#USThanksgivingDay, USChristmasDay, USCPulaskisBirthday, USGoodFriday
if(as.numeric(date) == as.numeric(thanksgivingday)){thanksgiving[i:(i+4)]<-1} #consider 4 days after Thanksgiving as holiday peak
if(as.numeric(date) == as.numeric(christmasday)){christmas[(i-10):(i+5)]<-1} #consider days before and after christmas also as holiday peak
if(as.numeric(date) == as.numeric(newyearday)){newyear[i]<-1}
if(as.numeric(date) == as.numeric(memorialday)){memorial[i]<-1}
if(as.numeric(date) == as.numeric(independenceday)){independence[i]<-1}
if(as.numeric(date) == as.numeric(laborday)){labor[i]<-1}
if(as.numeric(date) == as.numeric(veteransday)){veterans[i]<-1}
if(as.numeric(date) == as.numeric(goodfridayday)){goodfriday[i]<-1}
if(as.numeric(date) == as.numeric(easterday1)){easter1[i]<-1}
if(as.numeric(date) == as.numeric(Product1LaunchDay1)){Product1Launch[i]<-1}
}
special_days <- cbind(thanksgiving, christmas, newyear, memorial, independence, labor, veterans, goodfriday, easter1, Product1Launch)
View(special_days)
#-----------
endf <- 5
thanksgivingf <- rep(0,endf)
christmasf <- rep(0,endf)
newyearf <- rep(0,endf)
memorialf <- rep(0,endf)
independencef <- rep(0,endf)
laborf <- rep(0,endf)
veteransf <- rep(0,endf)
goodfridayf <- rep(0,endf)
easterf <- rep(0,endf)
Product1Launchf <- rep(0,endf)
yearf <- rep(0,endf)
for (i in 1:endf) {
datef <- as.Date(sales_data[i,1],format="%m/%d/%Y")
yearf[i] <- as.numeric(format(datef, "%Y"))
thanksgivingdayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USThanksgivingDay")
christmasdayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USChristmasDay")
newyeardayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USNewYearsDay")
memorialdayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USMemorialDay")
independencedayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USIndependenceDay")
labordayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USLaborDay")
veteransdayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USVeteransDay")
goodfridaydayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USGoodFriday")
easterdayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="Easter")
Product1LaunchDayf <- as.Date('2017-09-14', format="%Y-%m-%d") #date hardcoded for the year
#USNewYearsDay, USInaugurationDay, USMLKingsBirthday, USLincolnsBirthday, USWashingtonsBirthday,
#USMemorialDay, USIndependenceDay, USLaborDay, USColumbusDay, USElectionDay, USVeteransDay
#USThanksgivingDay, USChristmasDay, USCPulaskisBirthday, USGoodFriday.
if(as.numeric(date) == as.numeric(thanksgivingdayf)){thanksgivingf[i:(i+4)]<-1}
if(as.numeric(date) == as.numeric(christmasdayf)){christmasf[(i-10):(i+5)]<-1}
if(as.numeric(date) == as.numeric(newyeardayf)){newyearf[i]<-1}
if(as.numeric(date) == as.numeric(memorialdayf)){memorialf[i]<-1}
if(as.numeric(date) == as.numeric(independencedayf)){independencef[i]<-1}
if(as.numeric(date) == as.numeric(labordayf)){laborf[i]<-1}
if(as.numeric(date) == as.numeric(veteransdayf)){veteransf[i]<-1}
if(as.numeric(date) == as.numeric(goodfridaydayf)){goodfridayf[i]<-1}
if(as.numeric(date) == as.numeric(easterdayf)){easterf[i]<-1}
if(as.numeric(date) == as.numeric(Product1LaunchDayf)){Product1Launchf[i]<-1}
}
special_daysf <- cbind(thanksgivingf, christmasf, newyearf, memorialf, independencef, laborf, veteransf, goodfridayf, easterf, Product1Launchf)
#Build the Auto Arima with z - full dataset, zf - future for 5 days
#Auto.Arima with external regression variables (holidays as covariates)
x <- msts(adj_sales,seasonal.periods=c(7,365.25))
z <- fourier(x, K=c(2,5))
zf <- fourierf(x, K=c(2,5), h=5)
fit <- auto.arima(x, xreg=cbind(z,special_days), seasonal=FALSE)
fc <- forecast(fit, xreg=cbind(zf,special_daysf), h=5)
fc
plot(fc)
accuracy(fc)
# ME RMSE MAE MPE
# Training set 0.8078107 8610.461 6407.322 79.02104
# MAPE MASE ACF1
# Training set 247.9248 0.4788379 0.003789123
##########################################
####### Forecast Hybrid Model ############
##########################################
library(forecastHybrid)
# Create the model
hy_model <- hybridModel(x,
models = "at",
a.args = list(xreg = cbind(z,special_days)))
# Forecast future values
hy_model_fc <- forecast(hy_model, xreg = cbind(zf,special_daysf))
plot(hy_model_fc)
accuracy(hy_model_fc)
# ME RMSE MAE MPE MAPE MASE
# Training set -175.1741 7948.943 5668.673 31.12881 167.3496 0.4236366
# ACF1
# Training set 0.007117223
数据-https://drive.google.com/open?id=1wcKKeldFfrPEOx_6fHf2rMCRGKpPGFPy xreg-火车-https://drive.google.com/open?id=1X39bMZGLWL5L3NrVLTu8JUfEWeGNyXnM xreg-测试-https://drive.google.com/open?id=11IX-VoVV4C_zd8XCCtbNzhY0UVBZmJrt