我有这个数据集。我必须使用商店的数据进行六个星期的预测。使用训练和测试样本。我可以执行的预测,但是我需要 可视化。在这里,我的部分数据集可以训练和测试样本(如果需要)
combinedTrainingData=structure(list(id = 1:19, Store = 1:19, DayOfWeek = c(5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L), Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "30.01.2015", class = "factor"),
Sales = c(5577L, 5919L, 6911L, 13307L, 5640L, 6555L, 11430L,
6401L, 8072L, 6350L, 10031L, 9156L, 7004L, 6491L, 8898L,
9546L, 7929L, 9941L, 7121L), Open = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
Promo = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), StateHoliday = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), SchoolHoliday = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), WeekOfYear = c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L), Weekend = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), DateDiff = c(65L,
65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L,
65L, 65L, 65L, 65L, 65L, 65L)), .Names = c("id", "Store",
"DayOfWeek", "Date", "Sales", "Open", "Promo", "StateHoliday",
"SchoolHoliday", "WeekOfYear", "Weekend", "DateDiff"), class = "data.frame", row.names = c(NA,
-19L))
现在的测试样品
testingData=structure(list(Id = 1:20, Store = 1:20, DayOfWeek = c(5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L), Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "31.07.2015", class = "factor"),
Open = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Promo = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), StateHoliday = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), SchoolHoliday = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 0L), WeekOfYear = c(31L, 31L, 31L, 31L, 31L,
31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L,
31L, 31L, 31L), Weekend = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), DateDiff = c(29L,
29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L,
29L, 29L, 29L, 29L, 29L, 29L, 29L)), .Names = c("Id", "Store",
"DayOfWeek", "Date", "Open", "Promo", "StateHoliday", "SchoolHoliday",
"WeekOfYear", "Weekend", "DateDiff"), class = "data.frame", row.names = c(NA,
-20L))
这是预测
regularSales <- combinedTrainingData[combinedTrainingData$Promo == 0 &
combinedTrainingData$Open == 1, ]
testingForecast <- testingData
for(store in storeData$Store) {
coeff <- lm(data = regularSales[regularSales$Store == store, ],
Sales ~ DateDiff)$coefficients
storeData[storeData$Store == store, 'reg_intercept'] <- coeff[1]
storeData[storeData$Store == store, 'reg_slope'] <- coeff[2]
combinedTrainingData[combinedTrainingData$Store == store, 'LinearRegressionForecast'] <-
coeff[1] + coeff[2] * combinedTrainingData[combinedTrainingData$Store == store, 'DateDiff']
testingForecast[testingForecast$Store == store, 'LinearRegressionForecast'] <-
coeff[1] + coeff[2] * testingForecast[testingForecast$Store == store, 'DateDiff']
}
predictors <- c('Store', 'WeekOfYear', 'DayOfWeek', 'Promo')
modelForecast <- combinedTrainingData[combinedTrainingData$Open == 1, ] %>%
group_by_(.dots=predictors) %>%
summarize(salesMinusForecast=mean(Sales - LinearRegressionForecast)) %>%
ungroup()
testingForecast <- testingForecast %>%
left_join(modelForecast, by=predictors) %>%
mutate(Sales=salesMinusForecast + LinearRegressionForecast) %>%
select(Id, Store, DayOfWeek, Date, Sales, Open, Promo, StateHoliday,
SchoolHoliday, WeekOfYear, Weekend, DateDiff, LinearRegressionForecast)
testingForecast[!is.na(testingForecast$Open) & testingForecast$Open == 0, 'Sales'] <- 0.0
index <- which(is.na(testingForecast$Sales))
for(i in index) {
iStore <- testingForecast[i, 'Store']
iWeekOfYear <- testingForecast[i, 'WeekOfYear']
iDayOfWeek <- testingForecast[i, 'DayOfWeek']
# 1 - Check to see if we have data for a previous day
iDayOfWeek <- ifelse(iDayOfWeek %in% 2:5, iDayOfWeek - 1, iDayOfWeek)
match = filter(modelForecast,
Store == iStore &
WeekOfYear == iWeekOfYear &
DayOfWeek == iDayOfWeek )
if(dim(match)[1] <= 0)
{
iDayOfWeek <- testingForecast[i, 'DayOfWeek']
# 2 - Check to see if we have data for a previous day
iDayOfWeek <- ifelse(iDayOfWeek %in% 1:4, iDayOfWeek + 1, iDayOfWeek)
match = filter(modelForecast,
Store == iStore &
WeekOfYear == iWeekOfYear &
DayOfWeek == iDayOfWeek )
}
iDayOfWeek <- testingForecast[i, 'DayOfWeek']
if(dim(match)[1] <= 0)
{
# 3 - Check to see if we have data for a previous Week
iWeekOfYear <- ifelse(iWeekOfYear > 1, iWeekOfYear - 1, iWeekOfYear)
match = filter(modelForecast,
Store == iStore &
WeekOfYear == iWeekOfYear &
DayOfWeek == iDayOfWeek )
}
iWeekOfYear <- testingForecast[i, 'WeekOfYear']
if(dim(match)[1] <= 0)
{
# 4 - Check to see if we have data for a next Week
iWeekOfYear <- ifelse(iWeekOfYear < 51, iWeekOfYear + 1, iWeekOfYear)
match = filter(modelForecast,
Store == iStore &
WeekOfYear == iWeekOfYear &
DayOfWeek == iDayOfWeek )
}
iWeekOfYear <- testingForecast[i, 'WeekOfYear']
if(dim(match)[1] <= 0)
{
# 5 - Check to see if we have data for two weeks ago
iWeekOfYear <- ifelse(iWeekOfYear > 2, iWeekOfYear - 2, iWeekOfYear)
match = filter(modelForecast,
Store == iStore &
WeekOfYear == iWeekOfYear &
DayOfWeek == iDayOfWeek )
}
iWeekOfYear <- testingForecast[i, 'WeekOfYear']
if(dim(match)[1] <= 0)
{
# 6 - Check to see if we have data for two Weeks later
iWeekOfYear <- ifelse(iWeekOfYear < 50, iWeekOfYear + 2, iWeekOfYear)
match = filter(modelForecast,
Store == iStore &
WeekOfYear == iWeekOfYear &
DayOfWeek == iDayOfWeek )
}
iWeekOfYear <- testingForecast[i, 'WeekOfYear']
if(dim(match)[1] > 0)
{
testingForecast[i, 'Sales'] <-
match[1, 'salesMinusForecast'] +
testingForecast[i, 'LinearRegressionForecast']
if(match[1, 'Promo'] == 0){
testingForecast[i, 'Sales'] <-
testingForecast[i, 'Sales'] *
avgSalesRatios[avgSalesRatios$Store == iStore, 'Ratio']
}
}
}
combinedTrainingTestingData <- rbind(combinedTrainingData[, c(1:4, 6:15)],
testingForecast[, 2:15])
combinedTrainingTestingData[combinedTrainingTestingData$Forecast == 1,
'Type'] <- "Forecast"
combinedTrainingTestingData[combinedTrainingTestingData$Imputed == 0 &
combinedTrainingTestingData$Forecast == 0,
'Type'] <- "Observed"
finalForecast <- data.frame(Id=testingForecast$Id, Sales=testingForecast$Sales)
比我想创建的预测图
# Convert finalForecast from list to data frame object
df1 <- fortify(finalForecast) %>% as_tibble()
# Create Date column, remove Index column and rename other columns
df1 %<>%
mutate(Date = as.Date(Index, "%Y-%m-%d")) %>%
select(-Index) %>%
rename("Low95" = "Lo 95",
"Low80" = "Lo 80",
"High95" = "Hi 95",
"High80" = "Hi 80",
"Forecast" = "Point Forecast")
df1
### Avoid the gap between data and forcast
# Find the last non missing NA values in obs then use that
# one to initialize all forecast columns
lastNonNAinData <- max(which(complete.cases(df1$Data)))
df1[lastNonNAinData,
!(colnames(df1) %in% c("Data", "Fitted", "Date"))] <- df1$Data[lastNonNAinData]
#To obtain a complex graph with overlapping of the forecast value of the time series by the initial values
ggplot(df1, aes(x = Date)) +
geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
geom_point(aes(y = Data, colour = "Data"), size = 4) +
geom_line(aes(y = Data, group = 1, colour = "Data"),
linetype = "dotted", size = 0.75) +
geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
scale_fill_brewer(name = "Intervals") +
guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
theme_bw(base_size = 42)
因此,我要如何保存每个商店的预测图(以这种格式,上面表示的图代码),并保存在pdf文件中。因此,作为输出,我必须具有包含1115个预测图的pdf文件(即每个存储区都有自己的图)
答案 0 :(得分:1)
我无法运行您的代码,因此这里是通用答案:
# Compute your forecasts by store
forecasts <- list()
# Create PDF
pdf(file = path_fo_file, width = your_width, height = your_height)
# Iterate over your forcasts
for (f in forecasts) {
# Plot forecast f
pl <- ggplot(f)
# Print forecast to new page in PDF file
print(pl)
}
# Cloe file connection
dev.off()