我尝试为时间序列预测实现LSTM模型。以下是我的试用代码。此代码运行没有错误。你也可以毫无依赖地尝试它。
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, TimeDistributed, Bidirectional
from sklearn.metrics import mean_squared_error, accuracy_score
from scipy.stats import linregress
from sklearn.utils import shuffle
fi = 'pollution.csv'
raw = pd.read_csv(fi, delimiter=',')
raw = raw.drop('Dates', axis=1)
print (raw.shape)
scaler = MinMaxScaler(feature_range=(-1, 1))
raw = scaler.fit_transform(raw)
time_steps = 7
def create_ds(data, t_steps):
data = pd.DataFrame(data)
data_s = data.copy()
for i in range(time_steps):
data = pd.concat([data, data_s.shift(-(i+1))], axis = 1)
data.dropna(axis=0, inplace=True)
return data.values
ds = create_ds(raw, time_steps)
print (ds.shape)
n_feats = raw.shape[1]
n_obs = time_steps * n_feats
n_rows = ds.shape[0]
train_size = int(n_rows * 0.8)
train_data = ds[:train_size, :]
train_data = shuffle(train_data)
test_data = ds[train_size:, :]
x_train = train_data[:, :n_obs]
y_train = train_data[:, n_obs:]
x_test = test_data[:, :n_obs]
y_test = test_data[:, n_obs:]
x_train = x_train.reshape(1, x_train.shape[0], x_train.shape[1])
y_train = y_train.reshape(1, y_train.shape[0], y_train.shape[1])
x_test = x_test.reshape(1, x_test.shape[0], x_test.shape[1])
print (x_train.shape)
print (y_train.shape)
print (x_test.shape)
print (y_test.shape)
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(None, x_train.shape[2]), stateful=True, batch_size=1))
model.add(LSTM(32, return_sequences=True, stateful=True))
model.add(LSTM(n_feats, return_sequences=True, stateful=True))
model.compile(loss='mse', optimizer='rmsprop')
model.fit(x_train, y_train, epochs=10, batch_size=1, verbose=2)
y_predict = model.predict(x_test)
y_predict = y_predict.reshape(y_predict.shape[1], y_predict.shape[2])
y_predict = scaler.inverse_transform(y_predict)
y_test = scaler.inverse_transform(y_test)
y_test = y_test[:,0]
y_predict = y_predict[:,0]
print (y_test.shape)
print (y_predict.shape)
plt.plot(y_test, label='True')
plt.plot(y_predict, label='Predict')
plt.legend()
plt.show()
但是,预测极差。如何改善预测?你有什么想法来改善吗?
通过重新设计架构和/或层来改进预测的任何想法?
污染数据在这里: https://github.com/sirjanrocky/some-neural-tests-for-study/blob/master/pollution.csv
答案 0 :(得分:3)
您可以考虑更改模型:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, TimeDistributed, Bidirectional
from sklearn.metrics import mean_squared_error, accuracy_score
from scipy.stats import linregress
from sklearn.utils import shuffle
fi = 'pollution.csv'
raw = pd.read_csv(fi, delimiter=',')
raw = raw.drop('Dates', axis=1)
print (raw.shape)
scaler = MinMaxScaler(feature_range=(-1, 1))
raw = scaler.fit_transform(raw)
time_steps = 7
def create_ds(data, t_steps):
data = pd.DataFrame(data)
data_s = data.copy()
for i in range(time_steps):
data = pd.concat([data, data_s.shift(-(i+1))], axis = 1)
data.dropna(axis=0, inplace=True)
return data.values
ds = create_ds(raw, time_steps)
print (ds.shape)
n_feats = raw.shape[1]
n_obs = time_steps * n_feats
n_rows = ds.shape[0]
train_size = int(n_rows * 0.8)
train_data = ds[:train_size, :]
train_data = shuffle(train_data)
test_data = ds[train_size:, :]
x_train = train_data[:, :n_obs]
y_train = train_data[:, n_obs:]
x_test = test_data[:, :n_obs]
y_test = test_data[:, n_obs:]
print (x_train.shape)
print (x_test.shape)
print (y_train.shape)
print (y_test.shape)
x_train = x_train.reshape(x_train.shape[0], time_steps, n_feats)
x_test = x_test.reshape(x_test.shape[0], time_steps, n_feats)
print (x_train.shape)
print (x_test.shape)
print (y_train.shape)
print (y_test.shape)
model = Sequential()
model.add(LSTM(64, input_shape=(time_steps, n_feats), return_sequences=True))
model.add(LSTM(32, return_sequences=False))
model.add(Dense(n_feats))
model.compile(loss='mse', optimizer='rmsprop')
model.fit(x_train, y_train, epochs=10, batch_size=1, verbose=1, shuffle=False)
y_predict = model.predict(x_test)
print (y_predict.shape)
y_predict = scaler.inverse_transform(y_predict)
y_test = scaler.inverse_transform(y_test)
y_test = y_test[:,0]
y_predict = y_predict[:,0]
print (y_test.shape)
print (y_predict.shape)
plt.plot(y_test, label='True')
plt.plot(y_predict, label='Predict')
plt.legend()
plt.show()
但我真的不知道你的实施的优点:
* both x and y are 3d (1,steps,features) rather than x in 3d (samples, time-steps, features) and y in 2d (samples, features)
* input_shape=(None, x_train.shape[2])
* last layer - model.add(LSTM(n_feats, return_sequences=True, stateful=True))
有人可能会提供更好的答案。
答案 1 :(得分:2)
如果你想在我的代码中使用该模型(你传递的链接),你需要正确塑造数据:(1序列,total_time_steps,5个特征)
重要提示:我不知道这是最佳方式还是最佳模式,但此模型预测输入前7个步骤({{1} })
time_shift=7
你的模型对于这个任务来说并不是非常强大,所以我尝试了一个更大的模型(另一方面这个功能太强大了)
fi = 'pollution.csv'
raw = pd.read_csv(fi, delimiter=',')
raw = raw.drop('Dates', axis=1)
print("raw shape:")
print (raw.shape)
#(1789,5) - 1789 time steps / 5 features
scaler = MinMaxScaler(feature_range=(-1, 1))
raw = scaler.fit_transform(raw)
time_shift = 7 #shift is the number of steps we are predicting ahead
n_rows = raw.shape[0] #n_rows is the number of time steps of our sequence
n_feats = raw.shape[1]
train_size = int(n_rows * 0.8)
#I couldn't understand how "ds" worked, so I simply removed it because in the code below it's not necessary
#getting the train part of the sequence
train_data = raw[:train_size, :] #first train_size steps, all 5 features
test_data = raw[train_size:, :] #I'll use the beginning of the data as state adjuster
#train_data = shuffle(train_data) !!!!!! we cannot shuffle time steps!!! we lose the sequence doing this
x_train = train_data[:-time_shift, :] #the entire train data, except the last shift steps
x_test = test_data[:-time_shift,:] #the entire test data, except the last shift steps
x_predict = raw[:-time_shift,:] #the entire raw data, except the last shift steps
y_train = train_data[time_shift:, :]
y_test = test_data[time_shift:,:]
y_predict_true = raw[time_shift:,:]
x_train = x_train.reshape(1, x_train.shape[0], x_train.shape[1]) #ok shape (1,steps,5) - 1 sequence, many steps, 5 features
y_train = y_train.reshape(1, y_train.shape[0], y_train.shape[1])
x_test = x_test.reshape(1, x_test.shape[0], x_test.shape[1])
y_test = y_test.reshape(1, y_test.shape[0], y_test.shape[1])
x_predict = x_predict.reshape(1, x_predict.shape[0], x_predict.shape[1])
y_predict_true = y_predict_true.reshape(1, y_predict_true.shape[0], y_predict_true.shape[1])
print("\nx_train:")
print (x_train.shape)
print("y_train")
print (y_train.shape)
print("x_test")
print (x_test.shape)
print("y_test")
print (y_test.shape)
请注意,我必须为该模型训练2000多个时代以获得良好的结果 我添加了验证数据,因此我们可以比较火车和测试的损失。
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(None, x_train.shape[2])))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(256, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(64, return_sequences=True))
model.add(LSTM(n_feats, return_sequences=True))
model.compile(loss='mse', optimizer='adam')
重要:至于根据开头预测序列的结束,模型看到开始调整内部状态非常重要,因此我预测整个数据({{1 }),而不仅仅是测试数据。
#notice that I'm predicting from the ENTIRE sequence, including x_train
#is important for the model to adjust its states before predicting the end
model.fit(x_train, y_train, epochs=1000, batch_size=1, verbose=2, validation_data=(x_test,y_test))
请注意,此型号过度拟合,这意味着它可以学习训练数据并在测试数据中获得不良结果。
要解决此问题,您必须通过实验尝试较小的模型,使用辍学图层和其他技术来防止过度拟合。
另请注意,此数据很可能包含大量随机因素,这意味着模型无法从中学到任何有用的信息。当您制作较小的模型以避免过度拟合时,您可能还会发现模型将对训练数据提出更糟糕的预测。
找到完美的模型并非易事,这是一个悬而未决的问题,你必须进行实验。也许LSTM模型根本不是解决方案。也许您的数据根本无法预测,等等。对此没有明确的答案。
通过训练中的验证数据,您可以比较火车和测试数据的损失。
x_predict
两者应该一起走下去。当测试数据停止下降,但列车数据继续改善时,您的模型开始过度拟合。
我能做的最好的事情(但我没有真正尝试过)是使用这个模型:
y_predict_model = model.predict(x_predict)
print("\ny_predict_true:")
print (y_predict_true.shape)
print("y_predict_model: ")
print (y_predict_model.shape)
def plot(true, predicted, divider):
predict_plot = scaler.inverse_transform(predicted[0])
true_plot = scaler.inverse_transform(true[0])
predict_plot = predict_plot[:,0]
true_plot = true_plot[:,0]
plt.figure(figsize=(16,6))
plt.plot(true_plot, label='True',linewidth=5)
plt.plot(predict_plot, label='Predict',color='y')
if divider > 0:
maxVal = max(true_plot.max(),predict_plot.max())
minVal = min(true_plot.min(),predict_plot.min())
plt.plot([divider,divider],[minVal,maxVal],label='train/test limit',color='k')
plt.legend()
plt.show()
test_size = n_rows - train_size
print("test length: " + str(test_size))
plot(y_predict_true,y_predict_model,train_size)
plot(y_predict_true[:,-2*test_size:],y_predict_model[:,-2*test_size:],test_size)
当损失大约是:
Train on 1 samples, validate on 1 samples
Epoch 1/1000
9s - loss: 0.4040 - val_loss: 0.3348
Epoch 2/1000
4s - loss: 0.3332 - val_loss: 0.2651
Epoch 3/1000
4s - loss: 0.2656 - val_loss: 0.2035
Epoch 4/1000
4s - loss: 0.2061 - val_loss: 0.1696
Epoch 5/1000
4s - loss: 0.1761 - val_loss: 0.1601
Epoch 6/1000
4s - loss: 0.1697 - val_loss: 0.1476
Epoch 7/1000
4s - loss: 0.1536 - val_loss: 0.1287
Epoch 8/1000
.....
在此之后,验证损失开始上升(因此超出此点的训练完全没用)
结果:
这表明所有这些模型都可以学习非常全面的行为,例如值较高的区域。
但高频率要么太随机,要么模型不够好......
答案 2 :(得分:2)
我不确定你能做什么,数据看起来好像没有可辨别的模式。如果我看不到一个我怀疑LSTM可以。你的预测确实看起来像一条好的回归线。
答案 3 :(得分:0)
阅读原始代码后,作者似乎首先缩放了数据集,然后将其拆分为Training和Testing子集。这意味着有关“测试”子集的信息(例如,波动率等)已“泄漏”到“训练”子集中。
推荐的方法是先执行“训练/测试”拆分,仅使用“训练”子集计算缩放参数,然后使用这些参数分别对“训练”和“测试”子集进行缩放。
答案 4 :(得分:0)
我自己正在创建一个模型来预测这样的数据 我创建了一个 SMOTErnn 灵魂作为过去的数据添加,我发现在 batch_size 上使用 TimeSeriesGenrator 更高,步幅更高,它的表现要好得多。