Question

我正在尝试训练一个LSTM网络，它以一种方式成功训练，但另一方面却抛出错误。在第一个例子中，我使用numpy reshape重塑输入数组X，另一方面，我使用tensorflow重塑来重塑它。

工作正常：

import numpy as np
import tensorflow as tf
import tensorflow.contrib.learn as learn


# Parameters
learning_rate = 0.1
training_steps = 3000
batch_size = 128

# Network Parameters
n_input = 4
n_steps = 10
n_hidden = 128
n_classes = 6

X = np.ones([1770,4])
y = np.ones([177])

# NUMPY RESHAPE OUTSIDE RNN_MODEL
X = np.reshape(X, (-1, n_steps, n_input))

def rnn_model(X, y):

  # TENSORFLOW RESHAPE INSIDE RNN_MODEL
  #X = tf.reshape(X, [-1, n_steps, n_input])  # (batch_size, n_steps, n_input)

  # # permute n_steps and batch_size
  X = tf.transpose(X, [1, 0, 2])

  # # Reshape to prepare input to hidden activation
  X = tf.reshape(X, [-1, n_input])  # (n_steps*batch_size, n_input)
  # # Split data because rnn cell needs a list of inputs for the RNN inner loop
  X = tf.split(0, n_steps, X)  # n_steps * (batch_size, n_input)

  # Define a GRU cell with tensorflow
  lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
  # Get lstm cell output
  _, encoding = tf.nn.rnn(lstm_cell, X, dtype=tf.float32)

  return learn.models.logistic_regression(encoding, y)


classifier = learn.TensorFlowEstimator(model_fn=rnn_model, n_classes=n_classes,
                                       batch_size=batch_size,
                                       steps=training_steps,
                                       learning_rate=learning_rate)

classifier.fit(X,y)

不起作用：

import numpy as np
import tensorflow as tf
import tensorflow.contrib.learn as learn


# Parameters
learning_rate = 0.1
training_steps = 3000
batch_size = 128

# Network Parameters
n_input = 4
n_steps = 10
n_hidden = 128
n_classes = 6

X = np.ones([1770,4])
y = np.ones([177])

# NUMPY RESHAPE OUTSIDE RNN_MODEL
#X = np.reshape(X, (-1, n_steps, n_input))

def rnn_model(X, y):

  # TENSORFLOW RESHAPE INSIDE RNN_MODEL
  X = tf.reshape(X, [-1, n_steps, n_input])  # (batch_size, n_steps, n_input)

  # # permute n_steps and batch_size
  X = tf.transpose(X, [1, 0, 2])

  # # Reshape to prepare input to hidden activation
  X = tf.reshape(X, [-1, n_input])  # (n_steps*batch_size, n_input)
  # # Split data because rnn cell needs a list of inputs for the RNN inner loop
  X = tf.split(0, n_steps, X)  # n_steps * (batch_size, n_input)

  # Define a GRU cell with tensorflow
  lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
  # Get lstm cell output
  _, encoding = tf.nn.rnn(lstm_cell, X, dtype=tf.float32)

  return learn.models.logistic_regression(encoding, y)


classifier = learn.TensorFlowEstimator(model_fn=rnn_model, n_classes=n_classes,
                                       batch_size=batch_size,
                                       steps=training_steps,
                                       learning_rate=learning_rate)

classifier.fit(X,y)

后者抛出以下错误：

WARNING:tensorflow:<tensorflow.python.ops.rnn_cell.BasicLSTMCell object at 0x7f1c67c6f750>: Using a concatenated state is slower and will soon be deprecated.  Use state_is_tuple=True.
Traceback (most recent call last):
  File "/home/blabla/test.py", line 47, in <module>
    classifier.fit(X,y)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/base.py", line 160, in fit
    monitors=monitors)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 484, in _train_model
    monitors=monitors)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/graph_actions.py", line 328, in train
    reraise(*excinfo)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/graph_actions.py", line 254, in train
    feed_dict = feed_fn() if feed_fn is not None else None
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/io/data_feeder.py", line 366, in _feed_dict_fn
    out.itemset((i, self.y[sample]), 1.0)
IndexError: index 974 is out of bounds for axis 0 with size 177

Answer 1

一些建议： *使用fit代替X，Y到{{1}} *使用learn.Estimator而不是learn.TensorFlowEstimator

因为你有小数据，所以下面应该有效。否则，您需要批量处理数据。 ``` def _my_inputs（）： return tf.constant（np.ones（[1770,4]）），tf.constant（np.ones（[177]））

Answer 2

我能够通过一些小改动来实现这个目标：

# Parameters
learning_rate = 0.1
training_steps = 10
batch_size = 8

# Network Parameters
n_input = 4
n_steps = 10
n_hidden = 128
n_classes = 6

X = np.ones([177, 10, 4])  # <---- Use shape [batch_size, n_steps, n_input] here.
y = np.ones([177])

def rnn_model(X, y):
  X = tf.transpose(X, [1, 0, 2])  #|
  X = tf.unpack(X)                #| These two lines do the same thing as your code, just a bit simpler ;)

  # Define a LSTM cell with tensorflow
  lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
  # Get lstm cell output
  outputs, _ = tf.nn.rnn(lstm_cell, X, dtype=tf.float64)  # <---- I think you want to use the first return value here.

  return tf.contrib.learn.models.logistic_regression(outputs[-1], y)  # <----uses just the last output for classification, as is typical with RNNs.


classifier = tf.contrib.learn.TensorFlowEstimator(model_fn=rnn_model,
                                                  n_classes=n_classes,
                                                  batch_size=batch_size,
                                                  steps=training_steps,
                                                  learning_rate=learning_rate)

classifier.fit(X,y)

我认为你遇到的核心问题是，当传递给fit（...）时，X必须是形状[batch，...]。当您使用numpy在rnn_model（）函数之外重新整形时，X具有这种形状，因此训练有效。

我不能说这个解决方案会产生的模型的质量，但至少它会运行！

Tensorflow tf.reshape（）似乎与numpy.reshape（）

2 个答案: