我一直在关注如何使用多通道 CNN 进行多步时间序列预测的指南:Guide
我使用了不同的数据并试图预测 30 天而不是 7 天。
出于某种原因,其中一个函数出现“元组超出范围”错误。这不是我运行代码 7 天时发生的事情,这表明我所做的更改是我认为的问题吗?我尝试了几次修复,但没有任何效果。我可以看到,第二次通过方法 forecast() 时,它打印了历史形状 (31,),这让我很困惑,第一次打印的是 (30, 74, 7)。它应该改变形状吗?除非我错过了,否则本教程不会提及或显示这一点。为什么会发生这种情况,我该如何解决?
我的代码如下:
# split a univariate dataset into train/test sets
def split_dataset(data):
# split into standard weeks
print(len(data))
train, test = data[1:-77], data[-77:-6]#328
# restructure into windows of weekly/monthly data
print("Pre-split: ")
print(train.shape)
max_chunks = 30
max_nb_value_1 = len(train)//30 # euclidean division : a = bq+r with r < q, this means that q (max_nb_value here) is the maximum number of element that we can take
max_nb_value_2 =len(test)//30
train = array([train[i:i+max_nb_value_1] for i in range(0, max_chunks*max_nb_value_1, max_nb_value_1)])
test = array([test[i:i+max_nb_value_2] for i in range(0, max_chunks*max_nb_value_2, max_nb_value_2)])
print("Post-split: ")
print(train.shape)
print(test.shape)
return train, test
# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
scores = list()
# calculate an RMSE score for each day
for i in range(actual.shape[1]):
# calculate mse
mse = mean_squared_error(actual[:, i], predicted[:, i])
# calculate rmse
rmse = sqrt(mse)
# store
scores.append(rmse)
# calculate overall RMSE
s = 0
for row in range(actual.shape[0]):
for col in range(actual.shape[1]):
s += (actual[row, col] - predicted[row, col])**2
score = sqrt(s / (actual.shape[0] * actual.shape[1]))
return score, scores
# summarize scores
def summarize_scores(name, score, scores):
s_scores = ', '.join(['%.1f' % s for s in scores])
print('%s: [%.3f] %s' % (name, score, s_scores))
# convert history into inputs and outputs
def to_supervised(train, n_input, n_out=30):
# flatten data
data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
X, y = list(), list()
in_start = 0
# step over the entire history one time step at a time
for _ in range(len(data)):
# define the end of the input sequence
in_end = in_start + n_input
out_end = in_end + n_out
# ensure we have enough data for this instance
if out_end <= len(data):
#This differs from univariate. Here we make sure to take the whole dataset.
X.append(data[in_start:in_end, :])
y.append(data[in_end:out_end, 0])
# move along one time step
in_start += 1
return array(X), array(y)
# train the model
def build_model(train, n_input):
# prepare data
train_x, train_y = to_supervised(train, n_input)
# define parameters
verbose, epochs, batch_size = 0, 70, 16
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
# define model
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=16, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs))
model.compile(loss='mse', optimizer='adam')
# fit network
model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)
return model
# Make a forecast.
def forecast(model, history, n_input):
# flatten data
data = array(history)
print("History shape in forecast(): ")
print(data.shape)
data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
# retrieve last observations for input data
# For multivariate, make sure to use all features.
input_x = data[-n_input:, :]
# reshape into [1, n_input, 1]
# We need to change the shape as well to take all features.
input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
# forecast the next week
yhat = model.predict(input_x, verbose=0)
# we only want the vector forecast
yhat = yhat[0]
return that
# evaluate a single model
def evaluate_model(train, test, n_input):
# fit model
model = build_model(train, n_input)
# history is a list of weekly data
history = [x for x in train]
# walk-forward validation over each week
predictions = list()
for i in range(len(test)):
# predict the week
yhat_sequence = forecast(model, history, n_input)
# store the predictions
predictions.append(yhat_sequence)
# get real observation and add to history for predicting the next week
history.append(test[i, :])
# evaluate predictions days for each week
predictions = array(predictions)
score, scores = evaluate_forecasts(test[:, :, 0], predictions)
return score, scores
# IMPORT & NORMALIZE DATA
URL = 'https://raw.githubusercontent.com/victordahl/dataset/master/MSFT_10yrs.csv'
#URL = 'https://raw.githubusercontent.com/victordahl/dataset/master/MSFT_10yrs_open.csv'
csvfile = pd.read_csv(URL, header=0, infer_datetime_format=True, parse_dates=['Date'], index_col=['Date'])
#print("cvs type", type(csvfile))
#print(csvfile.iloc[:, 1:])
min_max_scaler = preprocessing.MinMaxScaler()
# Normalize last five columns (excluding date in first columns)
x_scaled = min_max_scaler.fit_transform(csvfile.iloc[:, 0:])
df = pd.DataFrame(x_scaled)
# Rename all columns
print(df)
df.columns = ["Open","High","Low","Close","Volume"]
# Derived features
short_SMA = 50
long_SMA = 200
df["50SMA"] = df["Open"].rolling(window=short_SMA).mean()
df["200SMA"] = df["Open"].rolling(window=long_SMA).mean()
# Trim head of dataset to remove the NaN days in SMA feature head
df = df.iloc[long_SMA:len(df)]
train, test = split_dataset(df.values)
# evaluate model and get scores
n_input = 30
score, scores = evaluate_model(train, test, n_input)
# summarize scores
summarize_scores('cnn', score, scores)
# plot scores
# Plot
days = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14','15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30']
pyplot.title("$MSFT prediction")
#pyplot.plot(days, scores, marker='o', label='Predicted close')
pyplot.plot(days, scores, marker='o', label='Predicted close')
pyplot.legend()
pyplot.show()