我创建了一个相当复杂的seq2seq类型模型(基于“A Neural Transducer”),在最新版本的Tensorflow中,以下代码返回错误:
Cannot use 'transducer_training/while/rnn/strided_slice' as input to 'gradients/transducer_training/while/rnn/while/Select_1_grad/Select/f_acc' because 'transducer_training/while/rnn/strided_slice' is in a while loop
之前的代码工作,只是因为最新版本停止了: numpy(1.14.0) protobuf(3.5.1)tensorflow(1.5.0)tensorflow-gpu (1.3.0)tensorflow-tensorboard(1.5.1)Ubuntu版本16.04.3 LTS (Xenial Xerus)
代码(要获取错误,只需复制,粘贴并运行它):
import tensorflow as tf
from tensorflow.contrib.rnn import LSTMCell, LSTMStateTuple
from tensorflow.python.layers import core as layers_core
# NOTE: Time major
# ---------------- Constants Manager ----------------------------
class ConstantsManager(object):
def __init__(self, input_dimensions, input_embedding_size, inputs_embedded, encoder_hidden_units,
transducer_hidden_units, vocab_ids, input_block_size, beam_width):
assert transducer_hidden_units == encoder_hidden_units, 'Encoder and transducer have to have the same amount' \
'of hidden units'
self.input_dimensions = input_dimensions
self.vocab_ids = vocab_ids
self.E_SYMBOL = len(self.vocab_ids)
self.vocab_ids.append('E_SYMBOL')
self.GO_SYMBOL = len(self.vocab_ids)
self.vocab_ids.append('GO_SYMBOL')
self.vocab_size = len(self.vocab_ids)
self.input_embedding_size = input_embedding_size
self.inputs_embedded = inputs_embedded
self.encoder_hidden_units = encoder_hidden_units
self.transducer_hidden_units = transducer_hidden_units
self.input_block_size = input_block_size
self.beam_width = beam_width
self.batch_size = 1 # Cannot be increased, see paper
self.log_prob_init_value = 0
# ----------------- Model ---------------------------------------
class Model(object):
def __init__(self, cons_manager):
self.var_list = []
self.cons_manager = cons_manager
self.max_blocks, self.inputs_full_raw, self.transducer_list_outputs, self.start_block, self.encoder_hidden_init,\
self.trans_hidden_init, self.logits, self.encoder_hidden_state_new, \
self.transducer_hidden_state_new, self.train_saver = self.build_full_transducer()
self.targets, self.train_op, self.loss = self.build_training_step()
def build_full_transducer(self):
with tf.variable_scope('transducer_training'):
embeddings = tf.Variable(tf.random_uniform([self.cons_manager.vocab_size,
self.cons_manager.input_embedding_size], -1.0, 1.0),
dtype=tf.float32,
name='embedding')
# Inputs
max_blocks = tf.placeholder(dtype=tf.int32, name='max_blocks') # total amount of blocks to go through
if self.cons_manager.inputs_embedded is True:
input_type = tf.float32
else:
input_type = tf.int32
inputs_full_raw = tf.placeholder(shape=(None, self.cons_manager.batch_size,
self.cons_manager.input_dimensions), dtype=input_type,
name='inputs_full_raw') # shape [max_time, 1, input_dims]
transducer_list_outputs = tf.placeholder(shape=(None,), dtype=tf.int32,
name='transducer_list_outputs') # amount to output per block
start_block = tf.placeholder(dtype=tf.int32, name='transducer_start_block') # where to start the input
encoder_hidden_init = tf.placeholder(shape=(2, 1, self.cons_manager.encoder_hidden_units), dtype=tf.float32,
name='encoder_hidden_init')
trans_hidden_init = tf.placeholder(shape=(2, 1, self.cons_manager.transducer_hidden_units), dtype=tf.float32,
name='trans_hidden_init')
# Temporary constants, maybe changed during inference
end_symbol = tf.get_variable(name='end_symbol',
initializer=tf.constant_initializer(self.cons_manager.vocab_size),
shape=(), dtype=tf.int32)
# Turn inputs into tensor which is easily readable#
inputs_full = tf.reshape(inputs_full_raw, shape=[-1, self.cons_manager.input_block_size,
self.cons_manager.batch_size,
self.cons_manager.input_dimensions])
# Outputs
outputs_ta = tf.TensorArray(dtype=tf.float32, size=max_blocks)
init_state = (start_block, outputs_ta, encoder_hidden_init, trans_hidden_init)
# Initiate cells, NOTE: if there is a future error, put these back inside the body function
encoder_cell = tf.contrib.rnn.LSTMCell(num_units=self.cons_manager.encoder_hidden_units)
transducer_cell = tf.contrib.rnn.LSTMCell(self.cons_manager.transducer_hidden_units)
def cond(current_block, outputs_int, encoder_hidden, trans_hidden):
return current_block < start_block + max_blocks
def body(current_block, outputs_int, encoder_hidden, trans_hidden):
# --------------------- ENCODER ----------------------------------------------------------------------
encoder_inputs = inputs_full[current_block]
encoder_inputs_length = [tf.shape(encoder_inputs)[0]]
encoder_hidden_state = encoder_hidden
if self.cons_manager.inputs_embedded is True:
encoder_inputs_embedded = encoder_inputs
else:
encoder_inputs = tf.reshape(encoder_inputs, shape=[-1, self.cons_manager.batch_size])
encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)
# Build model
# Build previous state
encoder_hidden_c, encoder_hidden_h = tf.split(encoder_hidden_state, num_or_size_splits=2, axis=0)
encoder_hidden_c = tf.reshape(encoder_hidden_c, shape=[-1, self.cons_manager.encoder_hidden_units])
encoder_hidden_h = tf.reshape(encoder_hidden_h, shape=[-1, self.cons_manager.encoder_hidden_units])
encoder_hidden_state_t = LSTMStateTuple(encoder_hidden_c, encoder_hidden_h)
# encoder_outputs: [max_time, batch_size, num_units]
encoder_outputs, encoder_hidden_state_new = tf.nn.dynamic_rnn(
encoder_cell, encoder_inputs_embedded,
sequence_length=encoder_inputs_length, time_major=True,
dtype=tf.float32, initial_state=encoder_hidden_state_t)
# Modify output of encoder_hidden_state_new so that it can be fed back in again without problems.
encoder_hidden_state_new = tf.concat([encoder_hidden_state_new.c, encoder_hidden_state_new.h], axis=0)
encoder_hidden_state_new = tf.reshape(encoder_hidden_state_new,
shape=[2, -1, self.cons_manager.encoder_hidden_units])
# --------------------- TRANSDUCER --------------------------------------------------------------------
encoder_raw_outputs = encoder_outputs
# Save/load the state as one tensor, use encoder state as init if this is the first block
trans_hidden_state = tf.cond(current_block > 0, lambda: trans_hidden, lambda: encoder_hidden_state_new)
transducer_amount_outputs = transducer_list_outputs[current_block - start_block]
# Model building
helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
embedding=embeddings,
start_tokens=tf.tile([self.cons_manager.GO_SYMBOL],
[self.cons_manager.batch_size]), # TODO: check if this looks good
end_token=end_symbol) # vocab size, so that it doesn't prematurely end the decoding
attention_states = tf.transpose(encoder_raw_outputs,
[1, 0, 2]) # attention_states: [batch_size, max_time, num_units]
attention_mechanism = tf.contrib.seq2seq.LuongAttention(
self.cons_manager.encoder_hidden_units, attention_states)
decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
transducer_cell,
attention_mechanism,
attention_layer_size=self.cons_manager.transducer_hidden_units)
projection_layer = layers_core.Dense(self.cons_manager.vocab_size, use_bias=False)
# Build previous state
trans_hidden_c, trans_hidden_h = tf.split(trans_hidden_state, num_or_size_splits=2, axis=0)
trans_hidden_c = tf.reshape(trans_hidden_c, shape=[-1, self.cons_manager.transducer_hidden_units])
trans_hidden_h = tf.reshape(trans_hidden_h, shape=[-1, self.cons_manager.transducer_hidden_units])
trans_hidden_state_t = LSTMStateTuple(trans_hidden_c, trans_hidden_h)
decoder = tf.contrib.seq2seq.BasicDecoder(
decoder_cell, helper,
decoder_cell.zero_state(1, tf.float32).clone(cell_state=trans_hidden_state_t),
output_layer=projection_layer)
outputs, transducer_hidden_state_new, _ = tf.contrib.seq2seq.dynamic_decode(decoder,
output_time_major=True,
maximum_iterations=transducer_amount_outputs)
logits = outputs.rnn_output # logits of shape [max_time,batch_size,vocab_size]
decoder_prediction = outputs.sample_id # For debugging
# Modify output of transducer_hidden_state_new so that it can be fed back in again without problems.
transducer_hidden_state_new = tf.concat(
[transducer_hidden_state_new[0].c, transducer_hidden_state_new[0].h],
axis=0)
transducer_hidden_state_new = tf.reshape(transducer_hidden_state_new,
shape=[2, -1, self.cons_manager.transducer_hidden_units])
# Note the outputs
outputs_int = outputs_int.write(current_block - start_block, logits)
return current_block + 1, outputs_int, encoder_hidden_state_new, transducer_hidden_state_new
_, outputs_final, encoder_hidden_state_new, transducer_hidden_state_new = \
tf.while_loop(cond, body, init_state, parallel_iterations=1)
# Process outputs
outputs = outputs_final.concat()
logits = tf.reshape(
outputs,
shape=(-1, 1, self.cons_manager.vocab_size)) # And now its [max_output_time, batch_size, vocab]
# For loading the model later on
logits = tf.identity(logits, name='logits')
encoder_hidden_state_new = tf.identity(encoder_hidden_state_new, name='encoder_hidden_state_new')
transducer_hidden_state_new = tf.identity(transducer_hidden_state_new, name='transducer_hidden_state_new')
train_saver = tf.train.Saver() # For now save everything
return max_blocks, inputs_full_raw, transducer_list_outputs, start_block, encoder_hidden_init,\
trans_hidden_init, logits, encoder_hidden_state_new, transducer_hidden_state_new, train_saver
def build_training_step(self):
targets = tf.placeholder(shape=(None,), dtype=tf.int32, name='targets')
targets_one_hot = tf.one_hot(targets, depth=self.cons_manager.vocab_size, dtype=tf.float32)
targets_one_hot = tf.Print(targets_one_hot, [targets], message='Targets: ', summarize=10)
targets_one_hot = tf.Print(targets_one_hot, [tf.argmax(self.logits, axis=2)], message='Argmax: ', summarize=10)
stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=targets_one_hot,
logits=self.logits)
loss = tf.reduce_mean(stepwise_cross_entropy)
train_op = tf.train.AdamOptimizer().minimize(loss)
return targets, train_op, loss
constants_manager = ConstantsManager(input_dimensions=1, input_embedding_size=11, inputs_embedded=False,
encoder_hidden_units=100, transducer_hidden_units=100, vocab_ids=[0, 1, 2],
input_block_size=1, beam_width=5)
model = Model(cons_manager=constants_manager)
答案 0 :(得分:0)
在这种特殊情况下,错误似乎不正确(请参阅注释中的github问题)。但是,一般而言,此类错误意味着以下内容:
错误消息抱怨的使用模式始终是非法的。早期版本的TensorFlow对它没有很好的检查。
问题的核心是在TensorFlow的执行模型中,你不能使用你在while循环中创建的张量。有关此问题的简单说明,请查看此test case。
您可以通过立即从here返回来禁用检查,但您的计算图形将会格式错误,从而导致未定义的行为。
正确的解决方法是将您要在while循环之外访问的所有张量(cond
和body
函数之外)添加到loop_vars
并将其用作已返回来自tf.while_loop。
答案 1 :(得分:0)
我最近在dynamic_rnn
(即while循环)中放置了scan
时遇到了类似的问题。似乎该错误仅在TensorFlow 1.5中引入。您可以尝试将TensorFlow版本降级到1.4或升级到1.6。两者都应该有效。