尝试运行双向MultiLSTM神经网络时的ValueError

时间:2017-06-28 17:57:18

标签: python-3.x tensorflow lstm recurrent-neural-network bidirectional

Traceback (most recent call last):
  File "train_rnn.py", line 92, in <module>
    batch_size=FLAGS.batch_size)
  File "/home/iit/sourab/conv_extractive/codes/cnn-text-classification-tf/rnn_code/text_rnn.py", line 65, in __init__
    initial_state_bw=self.rnn_tuple_state_bw)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 375, in bidirectional_dynamic_rnn
    time_major=time_major, scope=fw_scope)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 574, in dynamic_rnn
    dtype=dtype)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 737, in _dynamic_rnn_loop
    swap_memory=swap_memory)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2770, in while_loop
    result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2599, in BuildLoop
    pred, body, original_loop_vars, loop_vars, shape_invariants)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2549, in _BuildLoop
    body_result = body(*packed_vars_for_body)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 722, in _time_step
    (output, new_state) = call_cell()
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 708, in <lambda>
    call_cell = lambda: cell(input_t, state)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
    return super(RNNCell, self).__call__(inputs, state)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 916, in call
    cur_inp, new_state = cell(cur_inp, cur_state)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 752, in __call__
    output, new_state = self._cell(inputs, state, scope)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
    return super(RNNCell, self).__call__(inputs, state)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 383, in call
    concat = _linear([inputs, h], 4 * self._num_units, True)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1017, in _linear
    initializer=kernel_initializer)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
    use_resource=use_resource, custom_getter=custom_getter)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
    use_resource=use_resource, custom_getter=custom_getter)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 360, in get_variable
    validate_shape=validate_shape, use_resource=use_resource)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1405, in wrapped_custom_getter
    *args, **kwargs)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
    variable = getter(*args, **kwargs)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
    variable = getter(*args, **kwargs)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
    use_resource=use_resource)
  File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 669, in _get_single_variable
    found_var.get_shape()))

**ValueError: Trying to share variable bidirectional_rnn/fw/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, but specified shape (1024, 2048) and found shape (640, 2048).**

下面我将参数传递给TextRNN类的构造函数。

rnn = TextRNN(
    sequence_size=x.shape[1],
    truncated_backprop_length=FLAGS.truncated_backprop_length,
    state_size=FLAGS.state_size,
    num_classes=y.shape[1],
    vocab_size=len(vocab_processor.vocabulary_),
    embedding_size=FLAGS.embedding_dim,
    num_layers=FLAGS.num_layers,
    batch_size=FLAGS.batch_size)

这里是TextRNN类:

from __future__ import print_function, division
import tensorflow as tf
import numpy as np
import os
import sys

class TextRNN(object):
"""
An RNN for text classification
Uses an embedding layer followed by multilayered Bi-Directional LSTMs followed by a softmax layer
"""

def __init__(
    self, sequence_size, truncated_backprop_length, state_size, num_classes, vocab_size, embedding_size, num_layers, batch_size):

    #placeholders for input, output and dropout probability
    self.input_x = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length, sequence_size], name="input_x")
    self.input_y = tf.placeholder(tf.float32, [batch_size,truncated_backprop_length, num_classes], name="input_y")
    self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

    self.fw_init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
    self.bw_init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])

    # Embedding layer
    with tf.device('/cpu:0'), tf.name_scope("embedding"):
        self.W = tf.Variable(
            tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
            name="W")
        embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
        #embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
        self.embedded_chars_expanded = tf.reduce_mean(embedded_chars, axis=2)



    state_per_layer_list_fw = tf.unstack(self.fw_init_state, axis=0)
    self.rnn_tuple_state_fw = tuple(
        [tf.contrib.rnn.LSTMStateTuple(state_per_layer_list_fw[idx][0,:,:], state_per_layer_list_fw[idx][1,:,:])
        for idx in range(num_layers)]
        )

    state_per_layer_list_bw = tf.unstack(self.bw_init_state, axis=0)
    self.rnn_tuple_state_bw = tuple(
    [tf.contrib.rnn.LSTMStateTuple(state_per_layer_list_bw[idx][0,:,:], state_per_layer_list_bw[idx][1,:,:])
        for idx in range(num_layers)]
        )

    W2 = tf.Variable(np.random.rand(2*state_size, num_classes), dtype=tf.float32)
    b2 = tf.Variable(np.random.rand(1,num_classes), dtype=tf.float32)
    with tf.name_scope('BiMultiLSTM'):
        with tf.name_scope('forward_cell'):
            cell_fw = tf.contrib.rnn.BasicLSTMCell( num_units=state_size, state_is_tuple=True)
            cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, output_keep_prob=self.dropout_keep_prob)
            cell_fw = tf.contrib.rnn.MultiRNNCell([cell_fw] * num_layers, state_is_tuple=True)
        with tf.name_scope('Backward_cell'):
            cell_bw = tf.contrib.rnn.BasicLSTMCell( num_units=state_size, state_is_tuple=True)
            cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, output_keep_prob=self.dropout_keep_prob)
            cell_bw = tf.contrib.rnn.MultiRNNCell([cell_bw] * num_layers, state_is_tuple=True)

        self.output_hidden_states, self.current_states = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=cell_fw,
        cell_bw=cell_bw,
        inputs=self.embedded_chars_expanded,
        initial_state_fw=self.rnn_tuple_state_fw,
        initial_state_bw=self.rnn_tuple_state_bw)

    self.outputs_concat = tf.concat(self.output_hidden_states, 2)
    self.output_series = tf.reshape(self.outputs_concat, [-1, 2*state_size])
    self._current_state_fw = self.current_states[0]
    self._current_state_bw = self.current_states[1]
    #output
    with tf.name_scope("output"):
        self.logits = tf.matmul(self.output_series, W2) + b2 #Broadcasted addition
        self.labels = tf.reshape(self.input_y, [-1, num_classes])

    self.logits_series = tf.unstack(tf.reshape(self.logits, [batch_size, truncated_backprop_length, num_classes]), axis=1)
    self.predictions_series = [tf.nn.softmax(logit) for logit in self.logits_series]
    self.labels_series = tf.unstack(tf.reshape(self.labels, [batch_size, truncated_backprop_length, num_classes]), axis=1)

    #loss
    with tf.name_scope("loss"):
        self.losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.labels)
        self.total_loss = tf.reduce_mean(self.losses)

    #accuracy
    self.correct_predictions = []
    with tf.name_scope("accuracy"):
        for predictions, labels in zip(self.predictions_series, self.labels_series):
            self.correct_predictions.append(tf.equal(tf.argmax(predictions, axis=1), tf.argmax(labels, axis=1)))
        self.sum_predictions = tf.reduce_sum(tf.cast(self.correct_predictions, tf.float32))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, tf.float32))

这些是我通过的参数:

Parameters:
ALLOW_SOFT_PLACEMENT=True
BATCH_SIZE=50
CHECKPOINT_EVERY=100
DATA_FILE=./../data/cnn_train.txt
DEV_FILE=./../data/cnn_test.txt
DROPOUT_KEEP_PROB=1.0
EMBEDDING_DIM=128
EVALUATE_EVERY=100
LOG_DEVICE_PLACEMENT=False
NUM_CHECKPOINTS=5
NUM_CLASSES=2
NUM_EPOCHS=200
NUM_LAYERS=3
STATE_SIZE=512
TRUNCATED_BACKPROP_LENGTH=10

我在网上搜索但未能解决错误。如果我将 state_size embedding_size 相同,则该程序有效,并且对于 state_size 不等于 embedding_size的所有其他情况,会出现上述错误

0 个答案:

没有答案