Traceback (most recent call last):
File "train_rnn.py", line 92, in <module>
batch_size=FLAGS.batch_size)
File "/home/iit/sourab/conv_extractive/codes/cnn-text-classification-tf/rnn_code/text_rnn.py", line 65, in __init__
initial_state_bw=self.rnn_tuple_state_bw)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 375, in bidirectional_dynamic_rnn
time_major=time_major, scope=fw_scope)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 574, in dynamic_rnn
dtype=dtype)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 737, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2770, in while_loop
result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2599, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2549, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 722, in _time_step
(output, new_state) = call_cell()
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 708, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 752, in __call__
output, new_state = self._cell(inputs, state, scope)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1017, in _linear
initializer=kernel_initializer)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 360, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1405, in wrapped_custom_getter
*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 669, in _get_single_variable
found_var.get_shape()))
**ValueError: Trying to share variable bidirectional_rnn/fw/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, but specified shape (1024, 2048) and found shape (640, 2048).**
下面我将参数传递给TextRNN类的构造函数。
rnn = TextRNN(
sequence_size=x.shape[1],
truncated_backprop_length=FLAGS.truncated_backprop_length,
state_size=FLAGS.state_size,
num_classes=y.shape[1],
vocab_size=len(vocab_processor.vocabulary_),
embedding_size=FLAGS.embedding_dim,
num_layers=FLAGS.num_layers,
batch_size=FLAGS.batch_size)
这里是TextRNN类:
from __future__ import print_function, division
import tensorflow as tf
import numpy as np
import os
import sys
class TextRNN(object):
"""
An RNN for text classification
Uses an embedding layer followed by multilayered Bi-Directional LSTMs followed by a softmax layer
"""
def __init__(
self, sequence_size, truncated_backprop_length, state_size, num_classes, vocab_size, embedding_size, num_layers, batch_size):
#placeholders for input, output and dropout probability
self.input_x = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length, sequence_size], name="input_x")
self.input_y = tf.placeholder(tf.float32, [batch_size,truncated_backprop_length, num_classes], name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
self.fw_init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
self.bw_init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
# Embedding layer
with tf.device('/cpu:0'), tf.name_scope("embedding"):
self.W = tf.Variable(
tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
name="W")
embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
#embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
self.embedded_chars_expanded = tf.reduce_mean(embedded_chars, axis=2)
state_per_layer_list_fw = tf.unstack(self.fw_init_state, axis=0)
self.rnn_tuple_state_fw = tuple(
[tf.contrib.rnn.LSTMStateTuple(state_per_layer_list_fw[idx][0,:,:], state_per_layer_list_fw[idx][1,:,:])
for idx in range(num_layers)]
)
state_per_layer_list_bw = tf.unstack(self.bw_init_state, axis=0)
self.rnn_tuple_state_bw = tuple(
[tf.contrib.rnn.LSTMStateTuple(state_per_layer_list_bw[idx][0,:,:], state_per_layer_list_bw[idx][1,:,:])
for idx in range(num_layers)]
)
W2 = tf.Variable(np.random.rand(2*state_size, num_classes), dtype=tf.float32)
b2 = tf.Variable(np.random.rand(1,num_classes), dtype=tf.float32)
with tf.name_scope('BiMultiLSTM'):
with tf.name_scope('forward_cell'):
cell_fw = tf.contrib.rnn.BasicLSTMCell( num_units=state_size, state_is_tuple=True)
cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, output_keep_prob=self.dropout_keep_prob)
cell_fw = tf.contrib.rnn.MultiRNNCell([cell_fw] * num_layers, state_is_tuple=True)
with tf.name_scope('Backward_cell'):
cell_bw = tf.contrib.rnn.BasicLSTMCell( num_units=state_size, state_is_tuple=True)
cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, output_keep_prob=self.dropout_keep_prob)
cell_bw = tf.contrib.rnn.MultiRNNCell([cell_bw] * num_layers, state_is_tuple=True)
self.output_hidden_states, self.current_states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=cell_fw,
cell_bw=cell_bw,
inputs=self.embedded_chars_expanded,
initial_state_fw=self.rnn_tuple_state_fw,
initial_state_bw=self.rnn_tuple_state_bw)
self.outputs_concat = tf.concat(self.output_hidden_states, 2)
self.output_series = tf.reshape(self.outputs_concat, [-1, 2*state_size])
self._current_state_fw = self.current_states[0]
self._current_state_bw = self.current_states[1]
#output
with tf.name_scope("output"):
self.logits = tf.matmul(self.output_series, W2) + b2 #Broadcasted addition
self.labels = tf.reshape(self.input_y, [-1, num_classes])
self.logits_series = tf.unstack(tf.reshape(self.logits, [batch_size, truncated_backprop_length, num_classes]), axis=1)
self.predictions_series = [tf.nn.softmax(logit) for logit in self.logits_series]
self.labels_series = tf.unstack(tf.reshape(self.labels, [batch_size, truncated_backprop_length, num_classes]), axis=1)
#loss
with tf.name_scope("loss"):
self.losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.labels)
self.total_loss = tf.reduce_mean(self.losses)
#accuracy
self.correct_predictions = []
with tf.name_scope("accuracy"):
for predictions, labels in zip(self.predictions_series, self.labels_series):
self.correct_predictions.append(tf.equal(tf.argmax(predictions, axis=1), tf.argmax(labels, axis=1)))
self.sum_predictions = tf.reduce_sum(tf.cast(self.correct_predictions, tf.float32))
self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, tf.float32))
这些是我通过的参数:
Parameters:
ALLOW_SOFT_PLACEMENT=True
BATCH_SIZE=50
CHECKPOINT_EVERY=100
DATA_FILE=./../data/cnn_train.txt
DEV_FILE=./../data/cnn_test.txt
DROPOUT_KEEP_PROB=1.0
EMBEDDING_DIM=128
EVALUATE_EVERY=100
LOG_DEVICE_PLACEMENT=False
NUM_CHECKPOINTS=5
NUM_CLASSES=2
NUM_EPOCHS=200
NUM_LAYERS=3
STATE_SIZE=512
TRUNCATED_BACKPROP_LENGTH=10
我在网上搜索但未能解决错误。如果我将 state_size 与 embedding_size 相同,则该程序有效,并且对于 state_size 不等于 embedding_size的所有其他情况,会出现上述错误