在下面的示例中,有三个时间序列,我想预测另一个时间序列y
,这是三个时间序列的函数。如何使用四个输入来预测时间序列,其中第四个输入是前一时间步长的输出?
import tensorflow as tf
import numpy as np
import pandas as pd
#clean computation graph
tf.reset_default_graph()
tf.set_random_seed(777) # reproducibility
np.random.seed(0)
import matplotlib.pyplot as plt
def MinMaxScaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
class generate_data(object):
def __init__(self, data_len, in_series, y_pred, seq_lengths, method='sum' ):
self.data_len = data_len
self.data = None
self.in_series = in_series #number of input series
self.y_pred = y_pred #number of final outputs from model
self.seq_lengths = seq_lengths
self.method = method
def _f(self, x):
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(scale=1)
return np.array(result)
def _runningMean(self, x, N):
return np.convolve(x, np.ones((N,))/N)[(N-1):]
def sine(self):
DATA = np.zeros((self.data_len, self.in_series))
xx = [None]
data_0 = np.sin(np.arange(self.data_len*self.in_series))
xx = data_0.reshape(self.data_len, self.in_series)
DATA[:,0: self.in_series] = xx
y = self._get_y(DATA)
return xx,y, DATA
def _get_y(self, xx):
if self.method=='sum':
yy = np.array([np.sum(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'self_mul':
yy = np.array([np.prod(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean_mirror':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
return yy
def normalize(self, xx1,yy1):
yy = [None]*len(yy1)
YMinMax = {}
xx = MinMaxScaler(xx1)
for i in range(self.y_pred):
YMinMax['ymin_' + str(i)] = np.min(yy1[0])
YMinMax['ymax_' + str(i)] = np.max(yy1[0])
yy[i] = MinMaxScaler(yy1[0])
setattr(self, 'YMinMax', YMinMax)
return xx,yy
def create_dataset(self, xx, yy):
'''creates a dataset consisting of windows for x and y data'''
dataX = self._build_input_windows(xx, self.seq_lengths)
if self.y_pred > 1:
pass
elif self.y_pred > 1 and self.seq_lengths != any(self.seq_lengths):
pass
else:
dataY = self._build_y_windows(yy[0] , self.seq_lengths)
return dataX, dataY
def _build_input_windows(self, time_series, seq_length):
dataX = []
for i in range(0, len(time_series) - seq_length):
_x = time_series[i:i + seq_length, :]
dataX.append(_x)
return np.array(dataX)
def _build_y_windows(self, iny, seq_length):
dataY = []
for i in range(0, len(iny) - seq_length):
_y = iny[i + seq_length, ] # Next close price
dataY.append(_y)
return np.array(dataY)
def TrainTestSplit(self, dataX, dataY, train_frac):
train_size = int(len(dataY) * train_frac)
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
trainY = trainY.reshape(len(trainY), 1)
testY = testY.reshape(len(testY), 1)
return trainX, trainY, testX, testY, train_size
#training/hyper parameters
tot_epochs = 500
batch_size = 32
learning_rate = 0.01
seq_lengths = 5 #sequence lengths/window size for RNN
rnn_inputs = 3 # no of inputs for RNN
y_pred = 1
data_length = 105 #this can be overwritten or useless
gen_data = generate_data(data_length, rnn_inputs, y_pred, seq_lengths, 'sum')
xx,yy,data_1 = gen_data.sine()
train_frac = 0.8
xx1,yy1 = gen_data.normalize(xx,[yy])
dataX, dataY = gen_data.create_dataset(xx1,yy1)
trainX, trainY, testX, testY, train_size = gen_data.TrainTestSplit( dataX, dataY, train_frac)
keep_prob = tf.placeholder(tf.float32)
x_placeholders = tf.placeholder(tf.float32, [None, 5, 3])
Y = tf.placeholder(tf.float32, [None, 1])
with tf.variable_scope('scope0'): #defining RNN
cell = tf.contrib.rnn.BasicLSTMCell(num_units= 7, state_is_tuple=True, activation=tf.tanh)
outputs1, _states = tf.nn.dynamic_rnn(cell, x_placeholders, dtype=tf.float32)
Y_pred1 = tf.contrib.layers.fully_connected(outputs1[:, -1], 1, activation_fn=None)
Y_pred = Y_pred1
## cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y)) # sum of the squares
## optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
#
## RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
with tf.Session() as sess:
saver = tf.train.Saver(max_to_keep=41)
writer = tf.summary.FileWriter('./laos_2out/cnntest', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for epoch in range(tot_epochs):
total_batches = int(train_size / batch_size) ##total batches/ no. of steps in an epoch
#for batch in range(total_batches):
_, step_loss = sess.run([train, loss], feed_dict= {x_placeholders:trainX, Y:trainY, keep_prob:0.5} )
# # evaluating on test data
test_predict = sess.run(Y_pred, feed_dict= {x_placeholders:testX, Y:trainY, keep_prob:0.5} )
#evaluating on training data
train_predict = sess.run(Y_pred, feed_dict={x_placeholders:trainX, Y:trainY, keep_prob:0.5})
rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict})
print("RMSE: {}".format(rmse_val))
# Plot predictions
fig, (ax1,ax2) = plt.subplots(1,2, sharey=True)
fig.set_figwidth(14)
fig.set_figheight(5)
ax2.plot(testY, 'b', label='observed')
ax2.plot(test_predict, 'k', label='predicted')
ax2.legend(loc="best")
ax2.set_xlabel("Time Period")
ax2.set_title('Testing')
ax1.plot(trainY, 'b', label='observed')
ax1.plot(train_predict, 'k',label= 'predicted')
ax1.legend(loc="best")
ax1.set_xlabel("Time Period")
ax1.set_ylabel("discharge (cms)")
ax1.set_title('Training')
plt.show()
我查看了使用tf.nn.raw_rnn
的答案here和here,但在这些情况下,仅使用先前时间步长的预测输出,但我希望先前的时间步长为预测输出时间步长以及其他三个要用于预测的输入。