Question

我正在喀拉拉邦使用LSTM建立时间序列预测模型。对于模型训练，我正在尝试从下一篇文章中找到的实验建模器中通过HPO获得最佳的超参数组合。

https://medium.com/ibm-watson/automating-and-accelerating-hyperparameter-tuning-for-deep-learning-52184944eeb4

经过多次尝试后，我开始进行训练，但是我不断收到错误消息，说“无法确定训练的运行时间”。我检查了日志，但没有看到错误。下面是我使用的代码，它在日志中成功运行到显示“平均绝对错误”的位置。

除此代码外，我还在存储训练数据的存储桶中提供了输入文件“ Global_temperatures_final.csv”，并创建了一个实验，在其中通过在训练定义中手动添加超参数来提供超参数。

import keras
import pandas as pd
import numpy as np
import json
import os
import os.path
from os import environ
from keras.callbacks import TensorBoard
from emetrics import EMetrics
import h5py
from keras.layers import Dense, Input, LSTM
from keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
# input data
if __name__=="__main__":
    input_data_folder=os.environ["DATA_DIR"]
    Global_Temp=pd.read_csv(input_data_folder+'/Global_temperature_final.csv')
# file containing hyperparameters
config_file="config.json"
if os.path.exists(config_file):
    with open(config_file,'r') as f:
        json_obj=json.load(f)
    Timesteps=json_obj["Timesteps"] 
    Ep=json_obj["Ep"]
    Neurons=json_obj["Neurons"] 
    BatchSizes=json_obj["BatchSizes"]
else:
    Timesteps=1
    Ep=1
    Neurons=1 
    BatchSizes=2
model_filename = "lstm_gtemp.h5"
# writing the train model and getting input data
if environ.get('RESULT_DIR') is not None:
    output_model_folder = os.path.join(os.environ["RESULT_DIR"], "model")
    output_model_path = os.path.join(output_model_folder, model_filename)
else:
    output_model_folder = "model"
    output_model_path = os.path.join("model", model_filename)

os.makedirs(output_model_folder, exist_ok=True)

#writing metrics
if environ.get('JOB_STATE_DIR') is not None:
    tb_directory = os.path.join(os.environ["JOB_STATE_DIR"], "logs", "tb", "test")
else:
    tb_directory = os.path.join("logs", "tb", "test")

os.makedirs(tb_directory, exist_ok=True)
tensorboard = TensorBoard(log_dir=tb_directory)
def getCurrentSubID():
    if "SUBID" in os.environ:
        return os.environ["SUBID"]
    else:
        return None

class HPOMetrics(keras.callbacks.Callback):
    def __init__(self):
        self.emetrics = EMetrics.open(getCurrentSubID())

    def on_epoch_end(self, epoch, logs={}):
        train_results = {}
        test_results = {}

        for key, value in logs.items():
            if 'val_' in key:
                test_results.update({key: value})
            else:
                train_results.update({key: value})

        print('EPOCH ' + str(epoch))
        self.emetrics.record("train", epoch, train_results)
        self.emetrics.record(EMetrics.TEST_GROUP, epoch, test_results)

    def close(self):
        self.emetrics.close()
Global_Temp['Date']=pd.to_datetime(Global_Temp['Date'])
Global_Temp.set_index('Date',drop=True,inplace=True)
Training_data,Testing_data=train_test_split(Global_Temp,test_size=0.1,shuffle=False)
def Data_preperation(Data,BatchSize,Timesteps):
    #For the Stateful LSTM we need to have [(length of training_data)-timesteps] exactly divisible by the batch-size so that 
    #we can divide the whole training data set into batchs to be supplied for training the model
    length=int((len(Data)-2*Timesteps)/BatchSize)*BatchSize
    #Clipping the training data-set
    Data=Data[:length+Timesteps*2]
    # reshape the data for feature scaling
    Data_1=Data['Monthly_Temperature'].values.reshape(-1,1)
    # Feature scaling
    Scaling=MinMaxScaler(feature_range=(0,1))
    Data_scaled=Scaling.fit_transform(np.float64(Data_1))
    # Create x,y data for LSTM training or prediction
    x=[]
    y=[]
    for i in range(Timesteps,length+Timesteps):
        x.append(Data_scaled[i-Timesteps:i,0])
        y.append(Data_scaled[i:i+Timesteps,0])
    #Reshaping for LSTM with dimensions [Length X Timesteps X 1]
    #By creating strips of time-series of size of timesteps
    x, y = np.array(x), np.array(y)
    x=np.reshape(x,(x.shape[0],x.shape[1],1))
    y=np.reshape(y,(y.shape[0],y.shape[1],1))
    return x,y
x_train,y_train=Data_preperation(Training_data,BatchSize=BatchSizes,Timesteps=Timesteps)
x_test,y_test=Data_preperation(Testing_data,BatchSize=BatchSizes,Timesteps=Timesteps)
input_layer=Input(batch_shape=(BatchSizes,Timesteps,1))
lstm_layer=LSTM(Neurons,stateful=True,return_sequences=True)(input_layer)
output_layer=Dense(units=1)(lstm_layer)
model=Model(inputs=input_layer,outputs=output_layer)
model.summary()
model.compile(optimizer='adam',loss='mae')

hpo = HPOMetrics()

#Because the LSTM network is stateful therefore I reset the internal states after each epoch.
for e in range(Ep):
    model.fit(x_train,y_train,shuffle=False,epochs=1,batch_size=BatchSizes,verbose=0,callbacks=[tensorboard, hpo])
    model.reset_states()
hpo.close()
def Model_Forecast(x,model,BatchSize):
    yhat=model.predict(x,batch_size=BatchSize)
    return yhat
def Model_evaluation(y,yhat):
    MAE=mean_absolute_error(np.reshape(y,y.shape[0]*y.shape[1],1),np.reshape(yhat,yhat.shape[0]*yhat.shape[1],1))
    return MAE 
y_pred=Model_Forecast(x_test,model=model,BatchSize=BatchSizes)
MAE_score=Model_evaluation(y_test,y_pred)
print('Mean absolute error ',MAE_score)
# save the model
model.save(output_model_path)

通过ibm watson中的Experiment Modeler执行超参数优化时，如何解决错误“无法确定运行时间”

0 个答案: