如何改善lstm预测

时间:2019-12-19 23:17:55

标签: python keras lstm recurrent-neural-network

import warnings
warnings.filterwarnings("ignore")
import keras
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd
from keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, LeakyReLU
from tensorflow.keras.layers import LSTM, GRU, Bidirectional
from sklearn.preprocessing import MinMaxScaler, RobustScaler, MaxAbsScaler, QuantileTransformer
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import os
import datetime
import ta
from keras.layers import Bidirectional 
from keras import optimizers
import time
import math
import tensorflow
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:100% !important;}</style>"))
from sklearn.preprocessing import StandardScaler
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.callbacks import TensorBoard
from keras.optimizers import SGD
os.chdir("C:\\Users\\pie4e\\OneDrive\\Desktop\\Analysis\\lib\\")
import json
from os import path
import random
GOOG_unreal= pd.read_csv('C:\\Users\\pie4e\\OneDrive\\Desktop\\Analysis\\get_data\\sp500_df\\MS.csv', parse_dates=True, index_col='Date')`
class DataLoader():

    def __init__(self, filename, split, cols):
        df= pd.read_csv(filename)
        i_split= int(len(df)*split)
        print('length of the file is: {}'.format(len(df)))
        self.data_train= df.get(cols).values[:i_split]
        self.data_test= df.get(cols).values[i_split:]
        self.len_train= len(self.data_train)
        self.len_test= len(self.data_test)
        self.len_train_windows= None

    def normalise_windows(self, windows):
        normalized_data= []
        for window in windows:
            scalers= {}
            normalized_window= []
            for col_i in range(window.shape[1]):
                scalers[col_i]= MinMaxScaler(feature_range=(0,1))
                normalized_col= scalers[col_i].fit_transform(window[:,col_i].reshape(-1,1))
                normalized_window.append(normalized_col)
            normalized_window = np.array(normalized_window).T 
            normalized_data.append(normalized_window)
        normalized_data= np.array(normalized_data)
        return normalized_data.reshape(normalized_data.shape[0],normalized_data.shape[2],normalized_data.shape[-1])


    def get_test_data(self, seq_len, normalise):
        print('Starting the loading of the test_data...')
        print('...the length of the data test will be of: {}'.format(self.len_test))
        data_windows= []
        for i in range(self.len_test - seq_len):
            data_windows.append(self.data_test[i: i+seq_len])
        data_windows= np.array(data_windows).astype(float)
        x_test= data_windows[:, :-1]
        y_test= data_windows[:, -1, [0]]

        normalized_xtest= self.normalise_windows(x_test)
        scaler_ytest= MinMaxScaler(feature_range=(0,1))
        normalized_ytest= scaler_ytest.fit_transform(y_test)
        print('Test_data loaded with shape: x_test has shape {}, y_test has shape {}'.format(x_test.shape,y_test.shape))
        print('Normalized test data loaded with shape: x_test has shape {}, y_test has shape {}'.format(normalized_xtest.shape,normalized_ytest.shape))
        return x_test, y_test, normalized_xtest, normalized_ytest, scaler_ytest

    def get_train_data(self, seq_len, normalise):
        print('Starting the loading of the train_data...')
        print('...the length of the data train will be of: {}'.format(self.len_train))        
        windows=[]
        for i in range(self.len_train - seq_len):
            windows.append(self.data_train[i:i+seq_len])          
        windows= np.array(windows).astype(float)
        x_train = windows[:, :-1]
        y_train = windows[:, -1, [0]]

        normalized_xtrain= self.normalise_windows(x_train)
        scaler_ytrain= MinMaxScaler(feature_range=(0,1))
        normalized_ytrain= scaler_ytrain.fit_transform(y_train)
        print('Train_data loaded with shape: data_x has shape {}, data_y has shape {}'.format(x_train.shape,y_train.shape))
        print('Normalized test data loaded with shape: x_test has shape {}, y_test has shape {}'.format(normalized_xtrain.shape,normalized_ytrain.shape))
        return x_train, y_train, normalized_xtrain, normalized_ytrain


class Model():

    def __init__(self):
        self.model= Sequential()

    def build_model(self, configs, x_train):

        self.model.add(LSTM(10, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[-1])))
        self.model.add(Dropout(0.2))
        self.model.add(LSTM(10, return_sequences=True))
        self.model.add(LSTM(10, return_sequences=False))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(1))

        self.model.compile(loss=configs['model']['loss'], optimizer=configs['model']['optimizer'], metrics=[configs['model']['metrics']])
        print('[Model] Model Compiled')
        print(self.model.summary())

    def train(self, x_train, y_train, epochs, batch_size, x_test, y_test):
        print('[Model] Starting compiling the model...')
        print('The following model will be settled with: %s epochs, %s batch_size' %(epochs, batch_size))
        save_fname= "C:\\Users\\pie4e\\OneDrive\\Desktop\\"
        callbacks=[
            EarlyStopping(monitor='mean_squared_error', patience=10),
            ModelCheckpoint(filepath=save_fname, monitor='mse', save_best_only=True)]
        history= self.model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2, validation_data=(x_test, y_test))
        print('[Model] The model has been trained')
        return history

    def predict(self, x_test, y_test, ynormalized, history, stock_name, scaler_ytest):
        y_hat= self.model.predict(x_test)
        normalized_yhat= scaler_ytest.inverse_transform(y_hat)
        print('the model has a r2_score of: '+str(r2_score(y_test,normalized_yhat)))
        if not os.path.exists("C:\\Users\\pie4e\\OneDrive\\Desktop\\Analysis\\LSTM_logs\\imgs\\imgs_{}".format(stock_name)):
            os.makedirs("C:\\Users\\pie4e\\OneDrive\\Desktop\\Analysis\\LSTM_logs\\imgs\\imgs_{}".format(stock_name))
        fig, axs= plt.subplots(4, figsize=(25,20))
        plt.title('model metrics')
        axs[0].plot(history.history['loss'], lw=2)
        axs[0].set_ylabel('loss')
        axs[1].plot(history.history['mean_squared_error'], lw=2)
        axs[1].set_ylabel('mean_squared_error')
        axs[2].plot(normalized_yhat, label='LSTM-prediction')
        axs[2].plot(y_test, label='{} real closing price'.format(stock_name))
        axs[3].plot(y_hat, label='LSTM-prediction')
        axs[3].plot(ynormalized, label='{} real closing price'.format(stock_name))
        axs[3].legend(loc=4)
        plt.xlabel('epoch')



def main():
    configs = json.load(open('config.json', 'r'))

    data= DataLoader('C:\\Users\\pie4e\\OneDrive\\Desktop\\Analysis\\get_data\\sp500_df\\MS.csv', configs['data']['train_test_split'], configs['data']['columns'])
    x_test, y_test, normalized_xtest, normalized_ytest, scaler_ytest= data.get_test_data(configs['data']['sequence_length'], configs['data']['normalise'])
    x_train, y_train, normalized_xtrain, normalized_ytrain= data.get_train_data(configs['data']['sequence_length'], configs['data']['normalise'])
    model= Model()
    model.build_model(configs, normalized_xtrain)
    history= model.train(normalized_xtrain, normalized_ytrain, configs['training']['epochs'], configs['training']['batch_size'], normalized_xtest, normalized_ytest)
    model.predict(normalized_xtest, y_test, normalized_ytest, history, 'MS', scaler_ytest)
main()

enter image description here enter image description here

enter image description here

我正在尝试使用LSTM RNN测试一些时间序列。我第一次使用: 时期:1000,批次大小:10,损失:毫秒,神经元:10,输入时间步长:9,inp_dim = 2,seq_len:10,train_size:0.8

第二个图是:时期:1000,批量大小:16,损失:毫秒,神经元:10,input_timesteps:49,inp_dim = 2,seq_len:50,train_size:0.8

我如何改善考试成绩?

enter image description here

0 个答案:

没有答案