对于时间序列预测问题,我想在时间步t
上反馈LSTM的输出作为时间步t+1
上的输入。为此,我要覆盖BasicLSTMCell
,就像用以下修改在tensorflow/python/ops/rnn_cell_imp.py
中写的一样。
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
#__all__ = ["FcLSTMCell"]
from tensorflow.python.eager import context
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.keras import activations
from tensorflow.python.keras.engine import input_spec
from tensorflow.python.keras.utils import tf_utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.platform import tf_logging as logging
import tensorflow as tf
from my_rnns import LayerRNNCell, _check_supported_dtypes, LSTMStateTuple,_check_rnn_cell_input_dtypes
_BIAS_VARIABLE_NAME = "bias"
_WEIGHTS_VARIABLE_NAME = "kernel"
class BasicLSTMCell(LayerRNNCell):
"""a fully connected LSTM which feeds the output back as input.
Lines ending with #* are lines where I made changes
"""
def __init__(self,
num_units,
forget_bias=1.0,
state_is_tuple=True,
activation=None,
reuse=None,
name=None,
dtype=None,
fc_units=0, #*
**kwargs):
"""Initialize the basic LSTM cell.
if `fc_units` are more than one, than it will feed the outputs back otherwise
it will act as normal LSTM.
"""
super(BasicLSTMCell, self).__init__(
_reuse=reuse, name=name, dtype=dtype, **kwargs)
_check_supported_dtypes(self.dtype)
if not state_is_tuple:
logging.warn(
"%s: Using a concatenated state is slower and will soon be "
"deprecated. Use state_is_tuple=True.", self)
if context.executing_eagerly() and context.num_gpus() > 0:
logging.warn(
"%s: Note that this cell is not optimized for performance. "
"Please use tf.contrib.cudnn_rnn.CudnnLSTM for better "
"performance on GPU.", self)
# Inputs must be 2-dimensional.
self.input_spec = input_spec.InputSpec(ndim=2)
self._num_units = num_units #*
self._forget_bias = forget_bias
self._state_is_tuple = state_is_tuple
if activation:
self._activation = activations.get(activation)
else:
self._activation = math_ops.tanh
self.fc_nodes = fc_units #*
self.first_time_step_passed = False #*
@property
def state_size(self):
return (LSTMStateTuple(self._num_units, self._num_units)
if self._state_is_tuple else 2 * self._num_units)
@property
def output_size(self):
if self.fc_nodes>0: #*
return self.fc_nodes #*
else: #*
return self._num_units #*
@tf_utils.shape_type_conversion
def build(self, inputs_shape):
if inputs_shape[-1] is None:
raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" %
str(inputs_shape))
_check_supported_dtypes(self.dtype)
input_depth = inputs_shape[-1]
h_depth = self._num_units
self._kernel = self.add_variable(
_WEIGHTS_VARIABLE_NAME,
shape=[input_depth + h_depth, 4 * self._num_units])
self._bias = self.add_variable(
_BIAS_VARIABLE_NAME,
shape=[4 * self._num_units],
initializer=init_ops.zeros_initializer(dtype=self.dtype))
self.built = True
def call(self, inputs, state):
"""Long short-term memory cell (LSTM)."""
if self.fc_nodes>0: #*
# we want to apply this from second timestep #*
if self.first_time_step_passed: #*
prev_outputs = self.outputs #*
inputs = prev_outputs #*
_check_rnn_cell_input_dtypes([inputs, state]) #*
print(inputs.shape, 'inputs to FcLSTMCell') #*
sigmoid = math_ops.sigmoid
one = constant_op.constant(1, dtype=dtypes.int32)
# Parameters of gates are concatenated into one multiply for efficiency.
if self._state_is_tuple:
c, h = state
else:
c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)
gate_inputs = math_ops.matmul(
array_ops.concat([inputs, h], 1), self._kernel)
gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
# i = input_gate, j = new_input, f = forget_gate, o = output_gate
i, j, f, o = array_ops.split(
value=gate_inputs, num_or_size_splits=4, axis=one)
forget_bias_tensor = constant_op.constant(self._forget_bias, dtype=f.dtype)
# Note that using `add` and `multiply` instead of `+` and `*` gives a
# performance improvement. So using those at the cost of readability.
add = math_ops.add
multiply = math_ops.multiply
new_c = add(
multiply(c, sigmoid(add(f, forget_bias_tensor))),
multiply(sigmoid(i), self._activation(j)))
new_h = multiply(self._activation(new_c), sigmoid(o))
if self._state_is_tuple:
new_state = LSTMStateTuple(new_c, new_h)
else:
new_state = array_ops.concat([new_c, new_h], 1)
if self.fc_nodes>0: #*
print('performing dnese in LSTM') #*
self.first_time_step_passed = True #*
self.outputs = new_h = tf.layers.dense(new_h, self.fc_nodes) #*
print(new_h.shape, 'outputs from FcLSTMCell')
return new_h, new_state
def get_config(self):
config = {
"num_units": self._num_units,
"forget_bias": self._forget_bias,
"state_is_tuple": self._state_is_tuple,
"activation": activations.serialize(self._activation),
"reuse": self._reuse,
}
base_config = super(BasicLSTMCell, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
我正在使用它来预测以下时间序列
from my_rnns2 import BasicLSTMCell as FcLSTMCell
fc_lstm_nodes = n_outputs
cell = FcLSTMCell(num_units=n_neurons, activation=tf.nn.relu, fc_units=fc_lstm_nodes)
rnn_outputs, states = dynamic_rnn(cell, X, dtype=tf.float32)
if fc_lstm_nodes>0:
stacked_rnn_outputs = rnn_outputs[:,-1,:]
stacked_outputs = stacked_rnn_outputs
else:
stacked_rnn_outputs = tf.reshape(rnn_outputs[:,-1,:], [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_outputs])
loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
但是,在将输出反馈到LSTM的情况下以及在不输出的情况下,我得到的损耗值降低结果几乎相似。以下是以对数标度和线性标度绘制的损耗值曲线。
对于输出反馈的情况,我希望损失的减少在开始时会非常平稳。有人可以告诉我我在哪里犯错。
输入和输出时间序列如下图所示,正如我所说的是many to one
情况。