使用假日作为Auto.ARIMA中的外部回归变量,但产生非常MAPE和错误

时间:2018-07-20 17:46:17

标签: r dataframe arima forecast

我有2年的销售数据,并且我使用美国的特殊假期作为外部回归变量,这些假期都为零,在假期期间填充为1,所以我可以说所有值本身都是零,使用傅立叶变换并将我的数据传递到AUTO.ARIMA以及预测混合模型中。但是,我很不幸,mt错误很高。

这是我的代码和数据。

require("ggplot2")
library("tseries")
library("forecast")
library(chron)
library(Holidays)
library(HolidayCalendars)

setwd("D://Users/Shivam/Desktop/go2venky/")
#Load sales data -- columns ds-date and y-sales volume
sales_data <- read.csv("indirect.csv",header=TRUE, stringsAsFactors = FALSE)
head(sales_data)


#format the date into Date column as we are reading from CSV
sales_data$Date <- as.Date(sales_data$ds)
#ggplot(sales_data, aes(x=Date, y=y))+geom_line() + scale_x_date('month') + ylab ('Daily Sales Volume')

#remove any outliers from the data set
No_outliers <- ts(sales_data[,c('y')])
sales_data$smoothvolume <- tsclean(No_outliers)

#plot the chart
ggplot(sales_data, aes(x=Date, y=smoothvolume)) + geom_line() + 
  scale_x_date('month') + ylab('Daily Sales Volume') + coord_cartesian(ylim=c(0,300)) +
  geom_smooth(method="lm")

#Decompose data and see STL - Season, Trend, Reminder
sales_ma <- ts(na.omit(sales_data$smoothvolume),frequency = 30)
decomp_sales <- stl(sales_ma, s.window = "periodic")
plot(decomp_sales)

adj_sales = seasadj(decomp_sales)

#Build Holidays set to see in Arima
#end <- length(adj_sales[,1])
end <- 895  #count data is hardcoded to number of rows in the data file 

thanksgiving <- rep(0,end)
christmas <- rep(0,end)
newyear <- rep(0,end)
memorial <- rep(0,end)
independence <- rep(0,end)
labor <- rep(0,end)
veterans <- rep(0,end)
goodfriday <- rep(0,end)
easter1 <- rep(0,end)
Product1Launch <- rep(0,end)
year <- rep(0,end)

for (i in 1:end) {
  date <- as.Date(sales_data[i,1],format="%m/%d/%Y")
  year[i] <- as.numeric(format(date, "%Y"))
  thanksgivingday <- holiday(as.numeric(format(date, "%Y")), Holiday="USThanksgivingDay")
  christmasday <- holiday(as.numeric(format(date, "%Y")), Holiday="USChristmasDay")
  newyearday <- holiday(as.numeric(format(date, "%Y")), Holiday="USNewYearsDay")
  memorialday <- holiday(as.numeric(format(date, "%Y")), Holiday="USMemorialDay")
  independenceday <- holiday(as.numeric(format(date, "%Y")), Holiday="USIndependenceDay")
  laborday <- holiday(as.numeric(format(date, "%Y")), Holiday="USLaborDay")
  veteransday <- holiday(as.numeric(format(date, "%Y")), Holiday="USVeteransDay")
  goodfridayday <- holiday(as.numeric(format(date, "%Y")), Holiday="USGoodFriday")
  easterday1 <- holiday(as.numeric(format(date, "%Y")), Holiday="Easter")
  Product1LaunchDay1 <- as.Date('2017-09-14', format="%Y-%m-%d")           #date hardcoded for the year

  #USNewYearsDay, USInaugurationDay, USMLKingsBirthday, USLincolnsBirthday, USWashingtonsBirthday, 
  #USMemorialDay, USIndependenceDay, USLaborDay, USColumbusDay, USElectionDay, USVeteransDay
  #USThanksgivingDay, USChristmasDay, USCPulaskisBirthday, USGoodFriday

  if(as.numeric(date) == as.numeric(thanksgivingday)){thanksgiving[i:(i+4)]<-1} #consider 4 days after Thanksgiving as holiday peak
  if(as.numeric(date) == as.numeric(christmasday)){christmas[(i-10):(i+5)]<-1}  #consider days before and after christmas also as holiday peak
  if(as.numeric(date) == as.numeric(newyearday)){newyear[i]<-1}
  if(as.numeric(date) == as.numeric(memorialday)){memorial[i]<-1}
  if(as.numeric(date) == as.numeric(independenceday)){independence[i]<-1}
  if(as.numeric(date) == as.numeric(laborday)){labor[i]<-1}
  if(as.numeric(date) == as.numeric(veteransday)){veterans[i]<-1}
  if(as.numeric(date) == as.numeric(goodfridayday)){goodfriday[i]<-1}
  if(as.numeric(date) == as.numeric(easterday1)){easter1[i]<-1}
  if(as.numeric(date) == as.numeric(Product1LaunchDay1)){Product1Launch[i]<-1}
}

special_days <- cbind(thanksgiving, christmas, newyear, memorial, independence, labor, veterans, goodfriday, easter1, Product1Launch)


View(special_days)
#-----------

endf <- 5 

thanksgivingf <- rep(0,endf)
christmasf <- rep(0,endf)
newyearf <- rep(0,endf)
memorialf <- rep(0,endf)
independencef <- rep(0,endf)
laborf <- rep(0,endf)
veteransf <- rep(0,endf)
goodfridayf <- rep(0,endf)
easterf <- rep(0,endf)
Product1Launchf <- rep(0,endf)
yearf <- rep(0,endf)

for (i in 1:endf) {
  datef <- as.Date(sales_data[i,1],format="%m/%d/%Y")
  yearf[i] <- as.numeric(format(datef, "%Y"))
  thanksgivingdayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USThanksgivingDay")
  christmasdayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USChristmasDay")
  newyeardayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USNewYearsDay")
  memorialdayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USMemorialDay")
  independencedayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USIndependenceDay")
  labordayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USLaborDay")
  veteransdayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USVeteransDay")
  goodfridaydayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="USGoodFriday")
  easterdayf <- holiday(as.numeric(format(datef, "%Y")), Holiday="Easter")
  Product1LaunchDayf <- as.Date('2017-09-14', format="%Y-%m-%d")           #date hardcoded for the year

  #USNewYearsDay, USInaugurationDay, USMLKingsBirthday, USLincolnsBirthday, USWashingtonsBirthday, 
  #USMemorialDay, USIndependenceDay, USLaborDay, USColumbusDay, USElectionDay, USVeteransDay
  #USThanksgivingDay, USChristmasDay, USCPulaskisBirthday, USGoodFriday.

  if(as.numeric(date) == as.numeric(thanksgivingdayf)){thanksgivingf[i:(i+4)]<-1}
  if(as.numeric(date) == as.numeric(christmasdayf)){christmasf[(i-10):(i+5)]<-1}
  if(as.numeric(date) == as.numeric(newyeardayf)){newyearf[i]<-1}
  if(as.numeric(date) == as.numeric(memorialdayf)){memorialf[i]<-1}
  if(as.numeric(date) == as.numeric(independencedayf)){independencef[i]<-1}
  if(as.numeric(date) == as.numeric(labordayf)){laborf[i]<-1}
  if(as.numeric(date) == as.numeric(veteransdayf)){veteransf[i]<-1}
  if(as.numeric(date) == as.numeric(goodfridaydayf)){goodfridayf[i]<-1}
  if(as.numeric(date) == as.numeric(easterdayf)){easterf[i]<-1}
  if(as.numeric(date) == as.numeric(Product1LaunchDayf)){Product1Launchf[i]<-1}
}

special_daysf <- cbind(thanksgivingf, christmasf, newyearf, memorialf, independencef, laborf, veteransf, goodfridayf, easterf, Product1Launchf)




#Build the Auto Arima with z - full dataset, zf - future for 5 days 

#Auto.Arima with external regression variables (holidays as covariates)
x <- msts(adj_sales,seasonal.periods=c(7,365.25))
z <- fourier(x, K=c(2,5))
zf <- fourierf(x, K=c(2,5), h=5)
fit <- auto.arima(x, xreg=cbind(z,special_days), seasonal=FALSE)
fc <- forecast(fit, xreg=cbind(zf,special_daysf), h=5)
fc
plot(fc)


accuracy(fc)
#                 ME     RMSE      MAE      MPE
# Training set 0.8078107 8610.461 6407.322 79.02104
#                MAPE      MASE        ACF1
# Training set 247.9248 0.4788379 0.003789123



##########################################
####### Forecast Hybrid Model ############
##########################################

library(forecastHybrid)   

# Create the model
hy_model <- hybridModel(x,
                        models = "at",
                        a.args = list(xreg = cbind(z,special_days)))
# Forecast future values
hy_model_fc <- forecast(hy_model, xreg = cbind(zf,special_daysf))
plot(hy_model_fc)
accuracy(hy_model_fc)
#                 ME     RMSE      MAE      MPE     MAPE      MASE
# Training set -175.1741 7948.943 5668.673 31.12881 167.3496 0.4236366
#                 ACF1
# Training set 0.007117223

数据-https://drive.google.com/open?id=1wcKKeldFfrPEOx_6fHf2rMCRGKpPGFPy xreg-火车-https://drive.google.com/open?id=1X39bMZGLWL5L3NrVLTu8JUfEWeGNyXnM xreg-测试-https://drive.google.com/open?id=11IX-VoVV4C_zd8XCCtbNzhY0UVBZmJrt

0 个答案:

没有答案