如何使用H20预测未来的一天

时间:2018-09-02 15:01:51

标签: python r python-3.x prediction h2o

我希望你一切都好, 我想分别对每一天进行预测-每次选择96个值-每天- 15分钟采样-并计算每天的MAPE。这是一个可重现的示例。请随时编辑代码或根据需要提供示例。

注意:R / Python中H2o的任何实现都会受到赞赏。

library(tidyverse)
    library(h2o)
    h2o.init(ip="localhost",port=54322,max_mem_size = "128g")


data(Boston, package = "MASS")

names(Boston)
[1] "crim"    "zn"      "indus"   "chas"    "nox"     "rm"      "age"     "dis"     "rad"     "tax"     "ptratio"
[12] "black"   "lstat"   "medv"   


set.seed(4984)
#Added 15 minute Time and date interval 
Boston$date<- seq(as.POSIXct("01-09-2017 03:00", format = "%d-%m-%Y %H:%M",tz=""), by = "15 min", length = 506)

#select first 333 values to be trained and the rest to be test data
train = Boston[1:333,]
test = Boston[334:506,]

#Dropped the date and time
train_data_finialized  <- subset(train, select=-c(date))

test_data_finialized <- test

#Converted the dataset to h2o object.
train_h2o<- as.h2o(train_data_finialized)
#test_h2o<- as.h2o(test)

#Select the target and feature variables for h2o model
y <- "medv" #target
x <- names(train_data_finialized) %>% setdiff(y) #feature variables

# Number of CV folds (to generate level-one data for stacking)
nfolds <- 5

#Replaced RF model by GBM because GBM run faster
# Train & Cross-validate a GBM
my_gbm <- h2o.gbm(x = x,
                  y = y,
                          training_frame = train_h2o,
                          nfolds = nfolds,
                          fold_assignment = "Modulo",
                          keep_cross_validation_predictions = TRUE,
                          seed = 1)

mape_calc <- function(sub_df) {
  p <- h2o.predict(my_gbm, as.h2o(sub_df))
  pred <- as.vector(p)
  actual <- sub_df$medv
  mape <- 100 * mean(abs((actual - pred)/actual))
  new_df <- data.frame(date = sub_df$date[[1]], mape = mape)
  return(new_df)
}


# LIST OF ONE-ROW DATAFRAMES
df_list <- by(test_data_finialized, test_data_finialized$date, mape_calc)

final_df <- do.call(rbind, df_list)

0 个答案:

没有答案