执行定制的预测图以将其保存在R中的pdf文件中

时间:2018-07-14 16:02:21

标签: r ggplot2 dplyr lm

我有这个数据集。我必须使用商店的数据进行六个星期的预测。使用训练和测试样本。我可以执行的预测,但是我需要 可视化。在这里,我的部分数据集可以训练和测试样本(如果需要)

combinedTrainingData=structure(list(id = 1:19, Store = 1:19, DayOfWeek = c(5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L), Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "30.01.2015", class = "factor"), 
    Sales = c(5577L, 5919L, 6911L, 13307L, 5640L, 6555L, 11430L, 
    6401L, 8072L, 6350L, 10031L, 9156L, 7004L, 6491L, 8898L, 
    9546L, 7929L, 9941L, 7121L), Open = c(1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
    Promo = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L), StateHoliday = c(0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L), SchoolHoliday = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), WeekOfYear = c(5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L), Weekend = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), DateDiff = c(65L, 
    65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 
    65L, 65L, 65L, 65L, 65L, 65L)), .Names = c("id", "Store", 
"DayOfWeek", "Date", "Sales", "Open", "Promo", "StateHoliday", 
"SchoolHoliday", "WeekOfYear", "Weekend", "DateDiff"), class = "data.frame", row.names = c(NA, 
-19L))

现在的测试样品

testingData=structure(list(Id = 1:20, Store = 1:20, DayOfWeek = c(5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L), Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "31.07.2015", class = "factor"), 
    Open = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Promo = c(1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), StateHoliday = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), SchoolHoliday = c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 
    1L, 1L, 1L, 0L), WeekOfYear = c(31L, 31L, 31L, 31L, 31L, 
    31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 
    31L, 31L, 31L), Weekend = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), DateDiff = c(29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
    29L, 29L, 29L, 29L, 29L, 29L, 29L)), .Names = c("Id", "Store", 
"DayOfWeek", "Date", "Open", "Promo", "StateHoliday", "SchoolHoliday", 
"WeekOfYear", "Weekend", "DateDiff"), class = "data.frame", row.names = c(NA, 
-20L))

这是预测

regularSales <- combinedTrainingData[combinedTrainingData$Promo == 0 & 
                                       combinedTrainingData$Open == 1, ]

testingForecast <- testingData


for(store in storeData$Store) {
  coeff <- lm(data = regularSales[regularSales$Store == store, ], 
              Sales ~ DateDiff)$coefficients

  storeData[storeData$Store == store, 'reg_intercept'] <- coeff[1]
  storeData[storeData$Store == store, 'reg_slope'] <- coeff[2]

  combinedTrainingData[combinedTrainingData$Store == store, 'LinearRegressionForecast'] <- 
    coeff[1] + coeff[2] * combinedTrainingData[combinedTrainingData$Store == store, 'DateDiff']

  testingForecast[testingForecast$Store == store, 'LinearRegressionForecast'] <- 
    coeff[1] + coeff[2] * testingForecast[testingForecast$Store == store, 'DateDiff']
}

predictors <- c('Store', 'WeekOfYear', 'DayOfWeek', 'Promo')
modelForecast <- combinedTrainingData[combinedTrainingData$Open == 1, ] %>% 
                  group_by_(.dots=predictors) %>% 
                  summarize(salesMinusForecast=mean(Sales - LinearRegressionForecast)) %>% 
                  ungroup()

testingForecast <-  testingForecast %>% 
                left_join(modelForecast, by=predictors) %>% 
                mutate(Sales=salesMinusForecast + LinearRegressionForecast) %>% 
                select(Id, Store, DayOfWeek, Date, Sales, Open, Promo, StateHoliday, 
                       SchoolHoliday, WeekOfYear, Weekend, DateDiff, LinearRegressionForecast)


testingForecast[!is.na(testingForecast$Open) & testingForecast$Open == 0, 'Sales'] <- 0.0



index <- which(is.na(testingForecast$Sales))

for(i in index) {
  iStore <- testingForecast[i, 'Store']
  iWeekOfYear <- testingForecast[i, 'WeekOfYear']
  iDayOfWeek <- testingForecast[i, 'DayOfWeek']

  # 1 - Check to see if we have data for a previous day
  iDayOfWeek <- ifelse(iDayOfWeek %in% 2:5, iDayOfWeek - 1, iDayOfWeek)

  match = filter(modelForecast, 
                 Store == iStore & 
                  WeekOfYear == iWeekOfYear & 
                  DayOfWeek == iDayOfWeek )

  if(dim(match)[1] <= 0)
  {
    iDayOfWeek <- testingForecast[i, 'DayOfWeek']

    # 2 - Check to see if we have data for a previous day
    iDayOfWeek <- ifelse(iDayOfWeek %in% 1:4, iDayOfWeek + 1, iDayOfWeek)

    match = filter(modelForecast, 
                   Store == iStore & 
                    WeekOfYear == iWeekOfYear & 
                    DayOfWeek == iDayOfWeek )
  }

  iDayOfWeek <- testingForecast[i, 'DayOfWeek']

  if(dim(match)[1] <= 0)
  {
    # 3 - Check to see if we have data for a previous Week
    iWeekOfYear <- ifelse(iWeekOfYear > 1, iWeekOfYear - 1, iWeekOfYear)

    match = filter(modelForecast, 
                   Store == iStore & 
                    WeekOfYear == iWeekOfYear & 
                    DayOfWeek == iDayOfWeek )
  }

  iWeekOfYear <- testingForecast[i, 'WeekOfYear']  

  if(dim(match)[1] <= 0)
  {
    # 4 - Check to see if we have data for a next Week
    iWeekOfYear <- ifelse(iWeekOfYear < 51, iWeekOfYear + 1, iWeekOfYear)

    match = filter(modelForecast, 
                   Store == iStore & 
                    WeekOfYear == iWeekOfYear & 
                    DayOfWeek == iDayOfWeek )
  } 

  iWeekOfYear <- testingForecast[i, 'WeekOfYear']

  if(dim(match)[1] <= 0)
  {
    # 5 - Check to see if we have data for two weeks ago
    iWeekOfYear <- ifelse(iWeekOfYear > 2, iWeekOfYear - 2, iWeekOfYear)

    match = filter(modelForecast, 
                   Store == iStore & 
                    WeekOfYear == iWeekOfYear & 
                    DayOfWeek == iDayOfWeek )
  }
  iWeekOfYear <- testingForecast[i, 'WeekOfYear']

  if(dim(match)[1] <= 0)
  {
    # 6 - Check to see if we have data for two Weeks later
    iWeekOfYear <- ifelse(iWeekOfYear < 50, iWeekOfYear + 2, iWeekOfYear)

    match = filter(modelForecast, 
                   Store == iStore & 
                    WeekOfYear == iWeekOfYear & 
                    DayOfWeek == iDayOfWeek )
  }     
  iWeekOfYear <- testingForecast[i, 'WeekOfYear']

  if(dim(match)[1] > 0)
  {
    testingForecast[i, 'Sales'] <- 
      match[1, 'salesMinusForecast'] + 
      testingForecast[i, 'LinearRegressionForecast']

    if(match[1, 'Promo'] == 0){
      testingForecast[i, 'Sales'] <- 
        testingForecast[i, 'Sales'] * 
        avgSalesRatios[avgSalesRatios$Store == iStore, 'Ratio']
    }
  }
}


combinedTrainingTestingData <- rbind(combinedTrainingData[, c(1:4, 6:15)], 
                                     testingForecast[, 2:15])


combinedTrainingTestingData[combinedTrainingTestingData$Forecast == 1, 
                            'Type'] <- "Forecast"

combinedTrainingTestingData[combinedTrainingTestingData$Imputed == 0 & 
                              combinedTrainingTestingData$Forecast == 0, 
                            'Type'] <- "Observed"


finalForecast <- data.frame(Id=testingForecast$Id, Sales=testingForecast$Sales) 

比我想创建的预测图

# Convert finalForecast from list to data frame object
df1 <- fortify(finalForecast) %>% as_tibble()

# Create Date column, remove Index column and rename other columns 
df1 %<>% 
  mutate(Date = as.Date(Index, "%Y-%m-%d")) %>% 
  select(-Index) %>% 
  rename("Low95" = "Lo 95",
         "Low80" = "Lo 80",
         "High95" = "Hi 95",
         "High80" = "Hi 80",
         "Forecast" = "Point Forecast")
df1

### Avoid the gap between data and forcast
# Find the last non missing NA values in obs then use that
# one to initialize all forecast columns
lastNonNAinData <- max(which(complete.cases(df1$Data)))
df1[lastNonNAinData, 
    !(colnames(df1) %in% c("Data", "Fitted", "Date"))] <- df1$Data[lastNonNAinData]


#To obtain a complex graph with overlapping of the forecast value of the time series by the initial values
ggplot(df1, aes(x = Date)) + 
  geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
  geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
  geom_point(aes(y = Data, colour = "Data"), size = 4) +
  geom_line(aes(y = Data, group = 1, colour = "Data"), 
            linetype = "dotted", size = 0.75) +
  geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
  geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
  scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
  scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
  scale_fill_brewer(name = "Intervals") +
  guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
  theme_bw(base_size = 42)

因此,我要如何保存每个商店的预测图(以这种格式,上面表示的图代码),并保存在pdf文件中。因此,作为输出,我必须具有包含1115个预测图的pdf文件(即每个存储区都有自己的图)

1 个答案:

答案 0 :(得分:1)

我无法运行您的代码,因此这里是通用答案:

# Compute your forecasts by store
forecasts <- list()


# Create PDF 

pdf(file = path_fo_file, width = your_width, height = your_height)

  # Iterate over your forcasts

for (f in forecasts) {

    # Plot forecast f
    pl <- ggplot(f)

    # Print forecast to new page in PDF file
    print(pl)
  }

# Cloe file connection
dev.off()