我正在训练获取数据的模型,并使用新数据测试这些模型。如果模型的性能不如我希望的那样,ı必须使用新数据(new_data + old_data)生成新模型。由于训练需要时间,因此我尝试训练我拥有的模型,但是由于使用新数据(new_data + old_data)对旧模型进行缩放(标准化),因此这些模型在新的训练过程中将无法很好地发挥作用,因此无法进行训练他们而不是生成新模型。这些解决方案是否存在消除标准化错误的解决方案,从而可以有效地训练旧模型? 在ı训练预训练模型时,我使用以下代码:
import time
import warnings
import numpy as np
import random
import pandas as pd
import smtplib
import schedule
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.layers import Bidirectional
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau
from keras import optimizers
from keras import metrics
from keras.models import load_model
from keras.layers import Dense, LSTM, CuDNNLSTM, BatchNormalization, Bidirectional, Dropout, Activation, LeakyReLU
warnings.filterwarnings("ignore", category = FutureWarning)
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
seq_len = 50
train_test_ratio = 0.0
df = pd.read_csv('datasets/dk1007-0810LTCabc.csv', dtype=float, header=None)
model = load_model('models/LTC4/RNN_Final-56-0.02')
df[df == 0] = 1
df1 = df.iloc[:, :]
##y_df1 = df1.iloc[:,10:11]#,asks1[1:2],askamount[2:3],bids1[3:4],bidsamount[4:5],price[5:6],price_5m_change[6:7],price_10m_change[7:8],price_15m_change[8:9],price_20m_change[9:10],price_30m_change[10:11],price_60m_change[11:12],asks1_change,bids1_change,asksamount/bidsamount,price_change_1m
y_df1 = df1.iloc[:,6:7]#,asks1[1:2],askamount[2:3],bids1[3:4],bidsamount[4:5],price[5:6],price_5m_change[6:7],price_10m_change[7:8],price_15m_change[8:9],price_20m_change[9:10],price_30m_change[10:11],price_60m_change[11:12],price_60m_change_target[12:13],price_30m_change_target[13:14],price_20m_change_target[14:15],price_15m_change_target[15:16],price_10m_change_target[16:17],price_5m_change_target[17:18],asks1_change,bids1_change,asksamount/bidsamount,price_change_1m
df = df1.iloc[:,12:]
ts = 0
scaler = MinMaxScaler()
df = scaler.fit_transform(df)
scaler2 = MinMaxScaler()
y_df = scaler2.fit_transform(y_df1)
df1 = (df,y_df)
seq_data = []
data_X = []
data_Y = []
for i in range(0,len(df)-seq_len):
dft = df1[0][i:i+seq_len]
seq_data.append([dft, df1[1][i+seq_len]])
random.shuffle(seq_data)
for i in range(0, len(df)-seq_len):
## if seq_data[i][1][0] != 0.5417594375446555:
data_X.append(seq_data[i][0])
data_Y.append(seq_data[i][1])
X_train = np.array(data_X)
Y_train = np.array(data_Y)
#split into train and testing data
row = round(train_test_ratio * X_train.shape[0])
x_test = X_train[:int(row), :]
x_train = X_train[int(row):,:]
y_test = Y_train[:int(row)]
y_train= Y_train[int(row):]
print("X_train:", x_train.shape)
print("y_train:", y_train.shape)
print("X_test:", x_test.shape)
print("y_test:", y_test.shape)
filepath = "RNN_Final-{epoch:02d}-{val_loss:.2f}" # unique file name that will include the epoch and the validation loss for that epoch
checkpoint = ModelCheckpoint("models/{}".format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')) # saves only the best ones
reduce_lr = ReduceLROnPlateau(monitor = "val_loss", factor = 0.75, patience = 15,
verbose = 1, mode = "auto", min_delta = 1e-04, cooldown = 5,
min_lr = 0)
start = time.time()
model.fit(x_train, y_train, batch_size=16, epochs=500, validation_split=0.1, callbacks=[checkpoint, reduce_lr])
model.save('0310_5m')
print('training time : ', time.time() - start)