我正在使用此示例 H2O AutoML example with multivariate time series 并从R的即时广播libray中获取数据以使其可再现。我需要帮助,以使其适合于对所给数据造成的打击领先一步。还有其他方法可以进行一步预测
library(nowcasting)
data(BRGDP)
vintage <- PRTDB(mts = BRGDP$base, delay = BRGDP$delay, vintage = "2015-06-01")
base <- window(vintage, start = c(2005,06), frequency = 12)
x <- Bpanel(base = base, trans = BRGDP$trans)
data <- as.data.frame(x)
dataQ = subset(data, select = -c(20:88))
write.csv(dataQ, file = "dataQ.csv")
数据如下图所示,此示例使其可重现。
apt-get install openjdk-8-jdk -qq > /dev/null
!pip install h2o -qq
import pandas as pd
import h2o
from h2o.automl import H2OAutoML
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
h2o.init(nthreads=-1)
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
print('User uploaded file "{name}" with length {length} bytes'.format(
name=fn, length=len(uploaded[fn])))
df = pd.read_csv(io.StringIO(uploaded['GTallcombine.csv'].decode('utf-8')))
df.head()
df.reset_index(drop=True,inplace=True)
df_train = df.loc[:int(df.shape[0]*0.8),:]
df_test = df.loc[int(df.shape[0]*0.8):,:]
df_train.head()
df_train.tail()
hf_train = h2o.H2OFrame(df_train)
hf_test = h2o.H2OFrame(df_test)
如何使其提前预报的问题
df2 = df.copy()
num_lags = 3 # number of lags and window lenghts for mean aggregation
delay = 1 # predict target one step ahead
for column in df2:
for lag in range(1,num_lags+1):
df2[column + '_lag' + str(lag)] = df2[column].shift(lag*-1-(delay-1))
df2[column + '_avg_window_length' + str(lag+1)] = df2[column].shift(-1-(delay-1)).rolling(window=lag+1,center=False).mean().shift(1-(lag+1))
df2.dropna(inplace=True)
mask = (df2.columns.str.contains('pollution') | df2.columns.str.contains('lag') | df2.columns.str.contains('window'))
df_processed = df2[df2.columns[mask]]
# the columns in the processed dataframe
df_processed.columns
y = 'PIM_BI'
X = hf_train.columns
X.remove(y)
aml = H2OAutoML(max_runtime_secs = 600,
seed = 42)
aml.train(x = X,
y = y,
training_frame = hf_train,
leaderboard_frame = hf_test)