Question

对于时间序列预测问题，我想在时间步t上反馈LSTM的输出作为时间步t+1上的输入。为此，我要覆盖BasicLSTMCell，就像用以下修改在tensorflow/python/ops/rnn_cell_imp.py中写的一样。

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

#__all__ = ["FcLSTMCell"]

from tensorflow.python.eager import context
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.keras import activations
from tensorflow.python.keras.engine import input_spec
from tensorflow.python.keras.utils import tf_utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.platform import tf_logging as logging
import tensorflow as tf


from my_rnns import LayerRNNCell, _check_supported_dtypes, LSTMStateTuple,_check_rnn_cell_input_dtypes

_BIAS_VARIABLE_NAME = "bias"
_WEIGHTS_VARIABLE_NAME = "kernel"

class BasicLSTMCell(LayerRNNCell):
  """a fully connected LSTM which feeds the output back as input.
Lines ending with #* are lines where I made changes
  """

  def __init__(self,
               num_units,
               forget_bias=1.0,
               state_is_tuple=True,
               activation=None,
               reuse=None,
               name=None,
               dtype=None,
               fc_units=0,                                           #*
               **kwargs):
    """Initialize the basic LSTM cell.
if `fc_units` are more than one, than it will feed the outputs back otherwise
it will act as normal LSTM.
    """
    super(BasicLSTMCell, self).__init__(
        _reuse=reuse, name=name, dtype=dtype, **kwargs)
    _check_supported_dtypes(self.dtype)
    if not state_is_tuple:
      logging.warn(
          "%s: Using a concatenated state is slower and will soon be "
          "deprecated.  Use state_is_tuple=True.", self)
    if context.executing_eagerly() and context.num_gpus() > 0:
      logging.warn(
          "%s: Note that this cell is not optimized for performance. "
          "Please use tf.contrib.cudnn_rnn.CudnnLSTM for better "
          "performance on GPU.", self)

    # Inputs must be 2-dimensional.
    self.input_spec = input_spec.InputSpec(ndim=2)

    self._num_units = num_units                                 #*
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    if activation:
      self._activation = activations.get(activation)
    else:
      self._activation = math_ops.tanh

    self.fc_nodes = fc_units                             #*
    self.first_time_step_passed = False                  #*


  @property
  def state_size(self):
    return (LSTMStateTuple(self._num_units, self._num_units)
            if self._state_is_tuple else 2 * self._num_units)


  @property
  def output_size(self):
    if self.fc_nodes>0:                       #*
      return self.fc_nodes                    #*
    else:                                     #*
      return self._num_units                  #*

  @tf_utils.shape_type_conversion
  def build(self, inputs_shape):
    if inputs_shape[-1] is None:
      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" %
                       str(inputs_shape))
    _check_supported_dtypes(self.dtype)
    input_depth = inputs_shape[-1]
    h_depth = self._num_units
    self._kernel = self.add_variable(
        _WEIGHTS_VARIABLE_NAME,
        shape=[input_depth + h_depth, 4 * self._num_units])
    self._bias = self.add_variable(
        _BIAS_VARIABLE_NAME,
        shape=[4 * self._num_units],
        initializer=init_ops.zeros_initializer(dtype=self.dtype))

    self.built = True

  def call(self, inputs, state):
    """Long short-term memory cell (LSTM)."""
    if self.fc_nodes>0:                                #*
      # we want to apply this from second timestep     #*
      if self.first_time_step_passed:                  #*
        prev_outputs = self.outputs                    #*
        inputs = prev_outputs                          #*
    _check_rnn_cell_input_dtypes([inputs, state])      #*
    print(inputs.shape, 'inputs to FcLSTMCell')        #*

    sigmoid = math_ops.sigmoid
    one = constant_op.constant(1, dtype=dtypes.int32)
    # Parameters of gates are concatenated into one multiply for efficiency.
    if self._state_is_tuple:
      c, h = state
    else:
      c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)

    gate_inputs = math_ops.matmul(
        array_ops.concat([inputs, h], 1), self._kernel)
    gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)

    # i = input_gate, j = new_input, f = forget_gate, o = output_gate
    i, j, f, o = array_ops.split(
        value=gate_inputs, num_or_size_splits=4, axis=one)

    forget_bias_tensor = constant_op.constant(self._forget_bias, dtype=f.dtype)
    # Note that using `add` and `multiply` instead of `+` and `*` gives a
    # performance improvement. So using those at the cost of readability.
    add = math_ops.add
    multiply = math_ops.multiply
    new_c = add(
        multiply(c, sigmoid(add(f, forget_bias_tensor))),
        multiply(sigmoid(i), self._activation(j)))
    new_h = multiply(self._activation(new_c), sigmoid(o))

    if self._state_is_tuple:
      new_state = LSTMStateTuple(new_c, new_h)
    else:
      new_state = array_ops.concat([new_c, new_h], 1)

    if self.fc_nodes>0:                                              #*
      print('performing dnese in LSTM')                              #*
      self.first_time_step_passed = True                             #*
      self.outputs = new_h = tf.layers.dense(new_h, self.fc_nodes)   #*

    print(new_h.shape, 'outputs from FcLSTMCell')

    return new_h, new_state

  def get_config(self):
    config = {
        "num_units": self._num_units,
        "forget_bias": self._forget_bias,
        "state_is_tuple": self._state_is_tuple,
        "activation": activations.serialize(self._activation),
        "reuse": self._reuse,
    }
    base_config = super(BasicLSTMCell, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

我正在使用它来预测以下时间序列

from my_rnns2 import BasicLSTMCell as FcLSTMCell

fc_lstm_nodes = n_outputs

cell = FcLSTMCell(num_units=n_neurons, activation=tf.nn.relu, fc_units=fc_lstm_nodes)

rnn_outputs, states = dynamic_rnn(cell, X, dtype=tf.float32)

if fc_lstm_nodes>0:
  stacked_rnn_outputs = rnn_outputs[:,-1,:]
  stacked_outputs = stacked_rnn_outputs
else:
  stacked_rnn_outputs = tf.reshape(rnn_outputs[:,-1,:], [-1, n_neurons])
  stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)

outputs = tf.reshape(stacked_outputs, [-1, n_outputs])
loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()

但是，在将输出反馈到LSTM的情况下以及在不输出的情况下，我得到的损耗值降低结果几乎相似。以下是以对数标度和线性标度绘制的损耗值曲线。

对于输出反馈的情况，我希望损失的减少在开始时会非常平稳。有人可以告诉我我在哪里犯错。输入和输出时间序列如下图所示，正如我所说的是many to one情况。

将输出反馈为LSTM的输入，与Tensorflow中的损耗相同

0 个答案: