r循环遍历excel文件中的不同列来做Holt Winter预测&在单独的Excel工作表中创建结果(预测,图表)

时间:2015-08-21 21:35:22

标签: r excel for-loop forecasting holtwinters

我有这个excel文件(参考最左边的图像),它有两列 - 列A的周期值从2005年1月到2014年12月,列B包含AA15的权重值。我想对AA15专栏进行Holt Winters预测(接下来的24个月)并创建一个excel文件作为输出(参考中间图像),它将具有预测值和excel表中的图。

R代码允许用户选择.csv excel文件,该文件具有用于执行Holt Winters预测的输入数据。 Image containing input excel sheet, output excel sheet and final excel file for which looping is required in different columns

适用于AA15 Holt Winter预测的R代码:

# Install the packages : tseries, forecast, WriteXLS, ggplot2, reshape2


library(tseries)
library(forecast)
library(WriteXLS)

# Let user select the excel (.csv) file for importing the input data

AA15file <- read.csv(file.choose(), header = T)

# convert to time series
AA15 <- ts(AA15file[,2], start=c(2005,1), end= c(2014,12),frequency = 12)
AA15

# convert to Holt Winter
AA15HW <- HoltWinters(AA15, seasonal = "multiplicative",optim.start =    c(alpha=0.3, beta=0.3, gamma =0.3))
AA15HW
AA15HW$fitted
summary(AA15HW)

##########################################################################################



# Combine Holt Winter Forecast plots

library(ggplot2)
library(reshape2)


HWplot<-function(AA15,  n.ahead=12,  CI=.95,  error.ribbon='green',  line.size=1){

  hw_object<-HoltWinters(AA15)

  forecast<-predict(hw_object,  n.ahead=24,  prediction.interval=T,   level=0.95)


  for_values<-data.frame(time=round(time(forecast),  3),   value_forecast=as.data.frame(forecast)$fit,  dev=as.data.frame(forecast)$upr- as.data.frame(forecast)$fit)

  fitted_values<-data.frame(time=round(time(hw_object$fitted),  3),   value_fitted=as.data.frame(hw_object$fitted)$xhat)

  actual_values<-data.frame(time=round(time(hw_object$x),  3),  Actual=c(hw_object$x))


  graphset<-merge(actual_values,  fitted_values,  by='time',  all=TRUE)
  graphset<-merge(graphset,  for_values,  all=TRUE,  by='time')
  graphset[is.na(graphset$dev),  ]$dev<-0

  graphset$Fitted<-c(rep(NA,  NROW(graphset)-(NROW(for_values) +  NROW(fitted_values))),  fitted_values$value_fitted,  for_values$value_forecast)


  graphset.melt<-melt(graphset[, c('time', 'Actual', 'Fitted')], id='time')

  p<-ggplot(graphset.melt,  aes(x=time,  y=value)) +     geom_ribbon(data=graphset, aes(x=time, y=Fitted, ymin=Fitted-dev,  ymax=Fitted +    dev),  alpha=.2,  fill=error.ribbon) + geom_line(aes(colour=variable),    size=line.size) + geom_vline(x=max(actual_values$time),  lty=2) + xlab('Time') +     ylab('Value') + theme(legend.position='bottom') + scale_colour_hue('')
  return(p)

}

# Calculate the Holt Winter Forecast values

HoltWinters(AA15)
forecast<-forecast.HoltWinters(AA15HW)
forecast$mean

forecastvalues<-data.frame(forecast$mean)

forecastvalues


# Save the plot

ggsave(filename = "zipggplotAA15.png")
png(filename = "zipggplotAA15.png", units = "px", width = 600, height = 600)
HWplot(AA15, n.ahead=12, CI=.95, error.ribbon='blue',line.size=1)
dev.off()


library(xlsx)

wb<-createWorkbook(type="xlsx")

# Create a new sheet to contain the plot
sheet <-createSheet(wb, sheetName = "ggplotFORECASTAA15")

# Add the plot created previously
addPicture("zipggplotAA15.png", sheet, scale = 1, startRow = 4,
           startColumn = 5)

# Add title
xlsx.addTitle(sheet, rowIndex=1, title="ForecastPlotsggplot2AA15",
          titleStyle = TITLE_STYLE)

# remove the plot from the disk
res<-file.remove("zipggplotAA15.png")

# Save the workbook to a file...

saveWorkbook(wb, "ggplotforecastplotAA15.xlsx")

#Add forecast data to excel sheet

addDataFrame(forecastvalues, sheet, startRow = 1, startColumn = 1)
saveWorkbook(wb, "ggplotforecastplotAA15.xlsx")

# The excel file and the sheet will be created in the working directory

getwd()

现在我想应用相同的代码并调用循环中的所有函数,以便我可以计算输入文件的不同列的Holt Winter预测(参见最右边的图像),其具有AA15,AA16的权重值,在与工作目录中创建的同一个excel工作簿中的ggplotFORECASTAA15,ggplotFORECASTAA16,ggp​​lotFORECASTAA17等对应于每个列值AA15,AA16等的单独excel表中生成相同的输出,如上所述。

另外在Holt Winter Forecast的输出excel表中,我能够打印未来24个月的预测值,但无法打印日期(2015年1月至2016年12月),请告诉我如何在输出文件中获取日期。

在R中创建循环的任何帮助都将受到高度赞赏。 谢谢。

1 个答案:

答案 0 :(得分:0)

考虑以下内容,基本上在AA15,AA16,AA17列表上运行for loop。所有库在一开始就被调用一次。

相当多的assign()paste0()get()函数用于在整个循环中传递列表项。希望我没有错过任何东西。请根据需要调整!

library(tseries)
library(forecast)
library(WriteXLS)
library(ggplot2)
library(reshape2)
library(xlsx)

forecasts <- c("AA15", "AA16", "AA17")

for (AA in forecasts) {

    # Let user select the excel (.csv) file for importing the input data            
    assign(paste0(AA, "file"), read.csv(file.choose(), header = T))

    # convert to time series
    assign(AA, ts(get(paste0(AA, "file"))[,2], start=c(2005,1), end= c(2014,12),frequency = 12))

    get(AA)

    # convert to Holt Winter
    assign(paste0(AA, "HW"), HoltWinters(get(AA), seasonal = "multiplicative",optim.start =    c(alpha=0.3, beta=0.3, gamma =0.3)))

    get(paste0(AA, "HW"))

    get(past0(AA, "HW"))$fitted
    summary(get(paste0(AA, "HW"))

    ###########################################################################

    # Combine Holt Winter Forecast plots

    HWplot<-function(get(AA),  n.ahead=12,  CI=.95,  error.ribbon='green',  line.size=1){

      hw_object<-HoltWinters(get(AA))

      forecast<-predict(hw_object,  n.ahead=24,  prediction.interval=T,   level=0.95)    

      for_values<-data.frame(time=round(time(forecast),  3), 
                  value_forecast=as.data.frame(forecast)$fit,  dev=as.data.frame(forecast)$upr- as.data.frame(forecast)$fit)

      fitted_values<-data.frame(time=round(time(hw_object$fitted),  3),   value_fitted=as.data.frame(hw_object$fitted)$xhat)

      actual_values<-data.frame(time=round(time(hw_object$x),  3),  Actual=c(hw_object$x))


      graphset<-merge(actual_values,  fitted_values,  by='time',  all=TRUE)
      graphset<-merge(graphset,  for_values,  all=TRUE,  by='time')
      graphset[is.na(graphset$dev),  ]$dev<-0

      graphset$Fitted<-c(rep(NA,  NROW(graphset)-(NROW(for_values) +  NROW(fitted_values))),  
                      fitted_values$value_fitted,  for_values$value_forecast)


      graphset.melt<-melt(graphset[, c('time', 'Actual', 'Fitted')], id='time')

      p<-ggplot(graphset.melt,  aes(x=time,  y=value)) +  
                geom_ribbon(data=graphset, aes(x=time, y=Fitted, ymin=Fitted-dev,  
                ymax=Fitted +    dev),  alpha=.2,  fill=error.ribbon) +
                geom_line(aes(colour=variable),    size=line.size) + 
                geom_vline(x=max(actual_values$time),  lty=2) + xlab('Time') +     
                ylab('Value') + theme(legend.position='bottom') + scale_colour_hue('')
      return(p)

    }

   # Calculate the Holt Winter Forecast values

   HoltWinters(get(AA))
   forecast<-forecast.HoltWinters(get(paste0(AA, "HW")))
   forecast$mean

   forecastvalues<-data.frame(forecast$mean)

   forecastvalues


   # Save the plot

   ggsave(filename = paste0("zipggplot", AA ".png"))
   png(filename = paste0("zipggplot", AA ".png"), units = "px", width = 600, height = 600)
   HWplot(get(AA), n.ahead=12, CI=.95, error.ribbon='blue',line.size=1)
   dev.off()


   wb<-createWorkbook(type="xlsx")

   # Create a new sheet to contain the plot
   sheet <-createSheet(wb, sheetName = paste0("ggplotFORECAST", AA))

   # Add the plot created previously
   addPicture(paste0("zipggplot", AA ".png"), sheet, scale = 1, startRow = 4,
           startColumn = 5)

   # Add title
   xlsx.addTitle(sheet, rowIndex=1, title= paste0("ForecastPlotsggplot2", AA),
          titleStyle = TITLE_STYLE)

   # remove the plot from the disk
   res<-file.remove(paste0("zipggplot", AA, ".png"))

   # Save the workbook to a file...    
   saveWorkbook(wb, paste0("ggplotforecastplot", AA ".xlsx"))

   #Add forecast data to excel sheet    
   addDataFrame(forecastvalues, sheet, startRow = 1, startColumn = 1)
   saveWorkbook(wb, paste0("ggplotforecastplot", AA ".xlsx"))

   # The excel file and the sheet will be created in the working directory    
   getwd()

}