Question

我尝试为时间序列预测实现LSTM模型。以下是我的试用代码。此代码运行没有错误。你也可以毫无依赖地尝试它。

import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, TimeDistributed, Bidirectional
from sklearn.metrics import mean_squared_error, accuracy_score
from scipy.stats import linregress
from sklearn.utils import shuffle

fi = 'pollution.csv'
raw = pd.read_csv(fi, delimiter=',')
raw = raw.drop('Dates', axis=1)
print (raw.shape)

scaler = MinMaxScaler(feature_range=(-1, 1))
raw = scaler.fit_transform(raw)

time_steps = 7
def create_ds(data, t_steps):
    data = pd.DataFrame(data)
    data_s = data.copy()
    for i in range(time_steps):
        data = pd.concat([data, data_s.shift(-(i+1))], axis = 1)   
    data.dropna(axis=0, inplace=True)
    return data.values

ds = create_ds(raw, time_steps)
print (ds.shape)
n_feats = raw.shape[1]
n_obs = time_steps * n_feats

n_rows = ds.shape[0]
train_size = int(n_rows * 0.8)

train_data = ds[:train_size, :]
train_data = shuffle(train_data)

test_data = ds[train_size:, :]

x_train = train_data[:, :n_obs]
y_train = train_data[:, n_obs:]
x_test = test_data[:, :n_obs]
y_test = test_data[:, n_obs:]

x_train = x_train.reshape(1, x_train.shape[0], x_train.shape[1])
y_train = y_train.reshape(1, y_train.shape[0], y_train.shape[1])
x_test = x_test.reshape(1, x_test.shape[0], x_test.shape[1])

print (x_train.shape)
print (y_train.shape)
print (x_test.shape)
print (y_test.shape)

model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(None, x_train.shape[2]), stateful=True, batch_size=1))
model.add(LSTM(32, return_sequences=True, stateful=True))
model.add(LSTM(n_feats, return_sequences=True, stateful=True)) 

model.compile(loss='mse', optimizer='rmsprop')
model.fit(x_train, y_train, epochs=10, batch_size=1, verbose=2)  
y_predict = model.predict(x_test)
y_predict = y_predict.reshape(y_predict.shape[1], y_predict.shape[2])

y_predict = scaler.inverse_transform(y_predict)

y_test = scaler.inverse_transform(y_test)
y_test = y_test[:,0]
y_predict = y_predict[:,0]

print (y_test.shape)
print (y_predict.shape)

plt.plot(y_test, label='True')
plt.plot(y_predict,  label='Predict')
plt.legend()
plt.show()

但是，预测极差。如何改善预测？你有什么想法来改善吗？

通过重新设计架构和/或层来改进预测的任何想法？

污染数据在这里： https://github.com/sirjanrocky/some-neural-tests-for-study/blob/master/pollution.csv

Answer 1

您可以考虑更改模型：

import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, TimeDistributed, Bidirectional
from sklearn.metrics import mean_squared_error, accuracy_score
from scipy.stats import linregress
from sklearn.utils import shuffle

fi = 'pollution.csv'
raw = pd.read_csv(fi, delimiter=',')
raw = raw.drop('Dates', axis=1)
print (raw.shape)

scaler = MinMaxScaler(feature_range=(-1, 1))
raw = scaler.fit_transform(raw)

time_steps = 7
def create_ds(data, t_steps):
    data = pd.DataFrame(data)
    data_s = data.copy()
    for i in range(time_steps):
        data = pd.concat([data, data_s.shift(-(i+1))], axis = 1)   
    data.dropna(axis=0, inplace=True)
    return data.values

ds = create_ds(raw, time_steps)
print (ds.shape)
n_feats = raw.shape[1]
n_obs = time_steps * n_feats

n_rows = ds.shape[0]
train_size = int(n_rows * 0.8)

train_data = ds[:train_size, :]
train_data = shuffle(train_data)

test_data = ds[train_size:, :]

x_train = train_data[:, :n_obs]
y_train = train_data[:, n_obs:]
x_test = test_data[:, :n_obs]
y_test = test_data[:, n_obs:]

print (x_train.shape)
print (x_test.shape)
print (y_train.shape)
print (y_test.shape)

x_train = x_train.reshape(x_train.shape[0], time_steps, n_feats)
x_test = x_test.reshape(x_test.shape[0], time_steps, n_feats)

print (x_train.shape)
print (x_test.shape)
print (y_train.shape)
print (y_test.shape)

model = Sequential()
model.add(LSTM(64, input_shape=(time_steps, n_feats), return_sequences=True))
model.add(LSTM(32, return_sequences=False))
model.add(Dense(n_feats))

model.compile(loss='mse', optimizer='rmsprop')
model.fit(x_train, y_train, epochs=10, batch_size=1, verbose=1, shuffle=False)

y_predict = model.predict(x_test)
print (y_predict.shape)
y_predict = scaler.inverse_transform(y_predict)

y_test = scaler.inverse_transform(y_test)
y_test = y_test[:,0]
y_predict = y_predict[:,0]

print (y_test.shape)
print (y_predict.shape)

plt.plot(y_test, label='True')
plt.plot(y_predict,  label='Predict')
plt.legend()
plt.show()

但我真的不知道你的实施的优点：

* both x and y are 3d (1,steps,features) rather than x in 3d (samples, time-steps, features) and y in 2d (samples, features)
* input_shape=(None, x_train.shape[2])
* last layer - model.add(LSTM(n_feats, return_sequences=True, stateful=True))

有人可能会提供更好的答案。

Answer 2

如果你想在我的代码中使用该模型（你传递的链接），你需要正确塑造数据：（1序列，total_time_steps，5个特征）

重要提示：我不知道这是最佳方式还是最佳模式，但此模型预测输入前7个步骤（{{1} }）

数据和初始变量

time_shift=7

模型

你的模型对于这个任务来说并不是非常强大，所以我尝试了一个更大的模型（另一方面这个功能太强大了）

    fi = 'pollution.csv'
raw = pd.read_csv(fi, delimiter=',')
raw = raw.drop('Dates', axis=1)
print("raw shape:")
print (raw.shape)
#(1789,5) - 1789 time steps / 5 features

scaler = MinMaxScaler(feature_range=(-1, 1))
raw = scaler.fit_transform(raw)

time_shift = 7 #shift is the number of steps we are predicting ahead
n_rows = raw.shape[0] #n_rows is the number of time steps of our sequence
n_feats = raw.shape[1]
train_size = int(n_rows * 0.8)


#I couldn't understand how "ds" worked, so I simply removed it because in the code below it's not necessary

#getting the train part of the sequence
train_data = raw[:train_size, :] #first train_size steps, all 5 features
test_data = raw[train_size:, :] #I'll use the beginning of the data as state adjuster


#train_data = shuffle(train_data) !!!!!! we cannot shuffle time steps!!! we lose the sequence doing this

x_train = train_data[:-time_shift, :] #the entire train data, except the last shift steps 
x_test = test_data[:-time_shift,:] #the entire test data, except the last shift steps
x_predict = raw[:-time_shift,:] #the entire raw data, except the last shift steps

y_train = train_data[time_shift:, :] 
y_test = test_data[time_shift:,:]
y_predict_true = raw[time_shift:,:]

x_train = x_train.reshape(1, x_train.shape[0], x_train.shape[1]) #ok shape (1,steps,5) - 1 sequence, many steps, 5 features
y_train = y_train.reshape(1, y_train.shape[0], y_train.shape[1])
x_test = x_test.reshape(1, x_test.shape[0], x_test.shape[1])
y_test = y_test.reshape(1, y_test.shape[0], y_test.shape[1])
x_predict = x_predict.reshape(1, x_predict.shape[0], x_predict.shape[1])
y_predict_true = y_predict_true.reshape(1, y_predict_true.shape[0], y_predict_true.shape[1])

print("\nx_train:")
print (x_train.shape)
print("y_train")
print (y_train.shape)
print("x_test")
print (x_test.shape)
print("y_test")
print (y_test.shape)

件

请注意，我必须为该模型训练2000多个时代以获得良好的结果我添加了验证数据，因此我们可以比较火车和测试的损失。

model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(None, x_train.shape[2])))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(256, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(64, return_sequences=True))
model.add(LSTM(n_feats, return_sequences=True)) 

model.compile(loss='mse', optimizer='adam')

预测

重要：至于根据开头预测序列的结束，模型看到开始调整内部状态非常重要，因此我预测整个数据（{{1 }），而不仅仅是测试数据。

#notice that I'm predicting from the ENTIRE sequence, including x_train      
#is important for the model to adjust its states before predicting the end
model.fit(x_train, y_train, epochs=1000, batch_size=1, verbose=2, validation_data=(x_test,y_test))

显示整个数据

显示其末尾部分以获取更多详细信息

请注意，此型号过度拟合，这意味着它可以学习训练数据并在测试数据中获得不良结果。

要解决此问题，您必须通过实验尝试较小的模型，使用辍学图层和其他技术来防止过度拟合。

另请注意，此数据很可能包含大量随机因素，这意味着模型无法从中学到任何有用的信息。当您制作较小的模型以避免过度拟合时，您可能还会发现模型将对训练数据提出更糟糕的预测。

找到完美的模型并非易事，这是一个悬而未决的问题，你必须进行实验。也许LSTM模型根本不是解决方案。也许您的数据根本无法预测，等等。对此没有明确的答案。

如何知道模型是好的

通过训练中的验证数据，您可以比较火车和测试数据的损失。

x_predict

两者应该一起走下去。当测试数据停止下降，但列车数据继续改善时，您的模型开始过度拟合。

尝试其他模型

我能做的最好的事情（但我没有真正尝试过）是使用这个模型：

y_predict_model = model.predict(x_predict)

print("\ny_predict_true:")
print (y_predict_true.shape)
print("y_predict_model: ")
print (y_predict_model.shape)


def plot(true, predicted, divider):

    predict_plot = scaler.inverse_transform(predicted[0])
    true_plot = scaler.inverse_transform(true[0])

    predict_plot = predict_plot[:,0]
    true_plot = true_plot[:,0]

    plt.figure(figsize=(16,6))
    plt.plot(true_plot, label='True',linewidth=5)
    plt.plot(predict_plot,  label='Predict',color='y')

    if divider > 0:
        maxVal = max(true_plot.max(),predict_plot.max())
        minVal = min(true_plot.min(),predict_plot.min())

        plt.plot([divider,divider],[minVal,maxVal],label='train/test limit',color='k')

    plt.legend()
    plt.show()

test_size = n_rows - train_size
print("test length: " + str(test_size))

plot(y_predict_true,y_predict_model,train_size)
plot(y_predict_true[:,-2*test_size:],y_predict_model[:,-2*test_size:],test_size)

当损失大约是：

Train on 1 samples, validate on 1 samples
Epoch 1/1000
9s - loss: 0.4040 - val_loss: 0.3348
Epoch 2/1000
4s - loss: 0.3332 - val_loss: 0.2651
Epoch 3/1000
4s - loss: 0.2656 - val_loss: 0.2035
Epoch 4/1000
4s - loss: 0.2061 - val_loss: 0.1696
Epoch 5/1000
4s - loss: 0.1761 - val_loss: 0.1601
Epoch 6/1000
4s - loss: 0.1697 - val_loss: 0.1476
Epoch 7/1000
4s - loss: 0.1536 - val_loss: 0.1287
Epoch 8/1000
.....

在此之后，验证损失开始上升（因此超出此点的训练完全没用）

结果：

这表明所有这些模型都可以学习非常全面的行为，例如值较高的区域。

但高频率要么太随机，要么模型不够好......

Answer 3

我不确定你能做什么，数据看起来好像没有可辨别的模式。如果我看不到一个我怀疑LSTM可以。你的预测确实看起来像一条好的回归线。

Answer 4

阅读原始代码后，作者似乎首先缩放了数据集，然后将其拆分为Training和Testing子集。这意味着有关“测试”子集的信息（例如，波动率等）已“泄漏”到“训练”子集中。

推荐的方法是先执行“训练/测试”拆分，仅使用“训练”子集计算缩放参数，然后使用这些参数分别对“训练”和“测试”子集进行缩放。

Answer 5

我自己正在创建一个模型来预测这样的数据我创建了一个 SMOTErnn 灵魂作为过去的数据添加，我发现在 batch_size 上使用 TimeSeriesGenrator 更高，步幅更高，它的表现要好得多。

预测极差：LSTM时间序列

5 个答案:

数据和初始变量

模型

件

预测

显示整个数据

显示其末尾部分以获取更多详细信息

如何知道模型是好的

尝试其他模型