Question

我创建了一个相当复杂的seq2seq类型模型（基于“A Neural Transducer”），在最新版本的Tensorflow中，以下代码返回错误：

Cannot use 'transducer_training/while/rnn/strided_slice' as input to 'gradients/transducer_training/while/rnn/while/Select_1_grad/Select/f_acc' because 'transducer_training/while/rnn/strided_slice' is in a while loop

之前的代码工作，只是因为最新版本停止了： numpy（1.14.0） protobuf（3.5.1）tensorflow（1.5.0）tensorflow-gpu （1.3.0）tensorflow-tensorboard（1.5.1）Ubuntu版本16.04.3 LTS （Xenial Xerus）

代码（要获取错误，只需复制，粘贴并运行它）：

import tensorflow as tf
from tensorflow.contrib.rnn import LSTMCell, LSTMStateTuple
from tensorflow.python.layers import core as layers_core

# NOTE: Time major

# ---------------- Constants Manager ----------------------------
class ConstantsManager(object):
    def __init__(self, input_dimensions, input_embedding_size, inputs_embedded, encoder_hidden_units,
                 transducer_hidden_units, vocab_ids, input_block_size, beam_width):
        assert transducer_hidden_units == encoder_hidden_units, 'Encoder and transducer have to have the same amount' \
                                                                'of hidden units'
        self.input_dimensions = input_dimensions
        self.vocab_ids = vocab_ids
        self.E_SYMBOL = len(self.vocab_ids)
        self.vocab_ids.append('E_SYMBOL')
        self.GO_SYMBOL = len(self.vocab_ids)
        self.vocab_ids.append('GO_SYMBOL')
        self.vocab_size = len(self.vocab_ids)
        self.input_embedding_size = input_embedding_size
        self.inputs_embedded = inputs_embedded
        self.encoder_hidden_units = encoder_hidden_units
        self.transducer_hidden_units = transducer_hidden_units
        self.input_block_size = input_block_size
        self.beam_width = beam_width
        self.batch_size = 1  # Cannot be increased, see paper
        self.log_prob_init_value = 0

# ----------------- Model ---------------------------------------


class Model(object):
    def __init__(self, cons_manager):
        self.var_list = []
        self.cons_manager = cons_manager
        self.max_blocks, self.inputs_full_raw, self.transducer_list_outputs, self.start_block, self.encoder_hidden_init,\
            self.trans_hidden_init, self.logits, self.encoder_hidden_state_new, \
            self.transducer_hidden_state_new, self.train_saver = self.build_full_transducer()

        self.targets, self.train_op, self.loss = self.build_training_step()

    def build_full_transducer(self):
        with tf.variable_scope('transducer_training'):

            embeddings = tf.Variable(tf.random_uniform([self.cons_manager.vocab_size,
                                                        self.cons_manager.input_embedding_size], -1.0, 1.0),
                                     dtype=tf.float32,
                                     name='embedding')
            # Inputs
            max_blocks = tf.placeholder(dtype=tf.int32, name='max_blocks')  # total amount of blocks to go through
            if self.cons_manager.inputs_embedded is True:
                input_type = tf.float32
            else:
                input_type = tf.int32
            inputs_full_raw = tf.placeholder(shape=(None, self.cons_manager.batch_size,
                                                    self.cons_manager.input_dimensions), dtype=input_type,
                                             name='inputs_full_raw')  # shape [max_time, 1, input_dims]
            transducer_list_outputs = tf.placeholder(shape=(None,), dtype=tf.int32,
                                                     name='transducer_list_outputs')  # amount to output per block
            start_block = tf.placeholder(dtype=tf.int32, name='transducer_start_block')  # where to start the input

            encoder_hidden_init = tf.placeholder(shape=(2, 1, self.cons_manager.encoder_hidden_units), dtype=tf.float32,
                                                 name='encoder_hidden_init')
            trans_hidden_init = tf.placeholder(shape=(2, 1, self.cons_manager.transducer_hidden_units), dtype=tf.float32,
                                               name='trans_hidden_init')

            # Temporary constants, maybe changed during inference
            end_symbol = tf.get_variable(name='end_symbol',
                                         initializer=tf.constant_initializer(self.cons_manager.vocab_size),
                                         shape=(), dtype=tf.int32)

            # Turn inputs into tensor which is easily readable#

            inputs_full = tf.reshape(inputs_full_raw, shape=[-1, self.cons_manager.input_block_size,
                                                             self.cons_manager.batch_size,
                                                             self.cons_manager.input_dimensions])

            # Outputs
            outputs_ta = tf.TensorArray(dtype=tf.float32, size=max_blocks)

            init_state = (start_block, outputs_ta, encoder_hidden_init, trans_hidden_init)

            # Initiate cells, NOTE: if there is a future error, put these back inside the body function
            encoder_cell = tf.contrib.rnn.LSTMCell(num_units=self.cons_manager.encoder_hidden_units)
            transducer_cell = tf.contrib.rnn.LSTMCell(self.cons_manager.transducer_hidden_units)

            def cond(current_block, outputs_int, encoder_hidden, trans_hidden):
                return current_block < start_block + max_blocks

            def body(current_block, outputs_int, encoder_hidden, trans_hidden):

                # --------------------- ENCODER ----------------------------------------------------------------------
                encoder_inputs = inputs_full[current_block]
                encoder_inputs_length = [tf.shape(encoder_inputs)[0]]
                encoder_hidden_state = encoder_hidden

                if self.cons_manager.inputs_embedded is True:
                    encoder_inputs_embedded = encoder_inputs
                else:
                    encoder_inputs = tf.reshape(encoder_inputs, shape=[-1, self.cons_manager.batch_size])
                    encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)

                # Build model

                # Build previous state
                encoder_hidden_c, encoder_hidden_h = tf.split(encoder_hidden_state, num_or_size_splits=2, axis=0)
                encoder_hidden_c = tf.reshape(encoder_hidden_c, shape=[-1, self.cons_manager.encoder_hidden_units])
                encoder_hidden_h = tf.reshape(encoder_hidden_h, shape=[-1, self.cons_manager.encoder_hidden_units])
                encoder_hidden_state_t = LSTMStateTuple(encoder_hidden_c, encoder_hidden_h)

                #   encoder_outputs: [max_time, batch_size, num_units]
                encoder_outputs, encoder_hidden_state_new = tf.nn.dynamic_rnn(
                    encoder_cell, encoder_inputs_embedded,
                    sequence_length=encoder_inputs_length, time_major=True,
                    dtype=tf.float32, initial_state=encoder_hidden_state_t)

                # Modify output of encoder_hidden_state_new so that it can be fed back in again without problems.
                encoder_hidden_state_new = tf.concat([encoder_hidden_state_new.c, encoder_hidden_state_new.h], axis=0)
                encoder_hidden_state_new = tf.reshape(encoder_hidden_state_new,
                                                      shape=[2, -1, self.cons_manager.encoder_hidden_units])

                # --------------------- TRANSDUCER --------------------------------------------------------------------
                encoder_raw_outputs = encoder_outputs
                # Save/load the state as one tensor, use encoder state as init if this is the first block
                trans_hidden_state = tf.cond(current_block > 0, lambda: trans_hidden, lambda: encoder_hidden_state_new)
                transducer_amount_outputs = transducer_list_outputs[current_block - start_block]

                # Model building
                helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    embedding=embeddings,
                    start_tokens=tf.tile([self.cons_manager.GO_SYMBOL],
                                         [self.cons_manager.batch_size]),  # TODO: check if this looks good
                    end_token=end_symbol)  # vocab size, so that it doesn't prematurely end the decoding

                attention_states = tf.transpose(encoder_raw_outputs,
                                                [1, 0, 2])  # attention_states: [batch_size, max_time, num_units]

                attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                    self.cons_manager.encoder_hidden_units, attention_states)

                decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                    transducer_cell,
                    attention_mechanism,
                    attention_layer_size=self.cons_manager.transducer_hidden_units)

                projection_layer = layers_core.Dense(self.cons_manager.vocab_size, use_bias=False)

                # Build previous state
                trans_hidden_c, trans_hidden_h = tf.split(trans_hidden_state, num_or_size_splits=2, axis=0)
                trans_hidden_c = tf.reshape(trans_hidden_c, shape=[-1, self.cons_manager.transducer_hidden_units])
                trans_hidden_h = tf.reshape(trans_hidden_h, shape=[-1, self.cons_manager.transducer_hidden_units])
                trans_hidden_state_t = LSTMStateTuple(trans_hidden_c, trans_hidden_h)

                decoder = tf.contrib.seq2seq.BasicDecoder(
                    decoder_cell, helper,
                    decoder_cell.zero_state(1, tf.float32).clone(cell_state=trans_hidden_state_t),
                    output_layer=projection_layer)

                outputs, transducer_hidden_state_new, _ = tf.contrib.seq2seq.dynamic_decode(decoder,
                                                                                            output_time_major=True,
                                                                                            maximum_iterations=transducer_amount_outputs)
                logits = outputs.rnn_output  # logits of shape [max_time,batch_size,vocab_size]
                decoder_prediction = outputs.sample_id  # For debugging

                # Modify output of transducer_hidden_state_new so that it can be fed back in again without problems.
                transducer_hidden_state_new = tf.concat(
                    [transducer_hidden_state_new[0].c, transducer_hidden_state_new[0].h],
                    axis=0)
                transducer_hidden_state_new = tf.reshape(transducer_hidden_state_new,
                                                         shape=[2, -1, self.cons_manager.transducer_hidden_units])


                # Note the outputs
                outputs_int = outputs_int.write(current_block - start_block, logits)

                return current_block + 1, outputs_int, encoder_hidden_state_new, transducer_hidden_state_new

            _, outputs_final, encoder_hidden_state_new, transducer_hidden_state_new = \
                tf.while_loop(cond, body, init_state, parallel_iterations=1)

            # Process outputs
            outputs = outputs_final.concat()
            logits = tf.reshape(
                outputs,
                shape=(-1, 1, self.cons_manager.vocab_size))  # And now its [max_output_time, batch_size, vocab]

            # For loading the model later on
            logits = tf.identity(logits, name='logits')
            encoder_hidden_state_new = tf.identity(encoder_hidden_state_new, name='encoder_hidden_state_new')
            transducer_hidden_state_new = tf.identity(transducer_hidden_state_new, name='transducer_hidden_state_new')

        train_saver = tf.train.Saver()  # For now save everything

        return max_blocks, inputs_full_raw, transducer_list_outputs, start_block, encoder_hidden_init,\
            trans_hidden_init, logits, encoder_hidden_state_new, transducer_hidden_state_new, train_saver

    def build_training_step(self):
        targets = tf.placeholder(shape=(None,), dtype=tf.int32, name='targets')
        targets_one_hot = tf.one_hot(targets, depth=self.cons_manager.vocab_size, dtype=tf.float32)

        targets_one_hot = tf.Print(targets_one_hot, [targets], message='Targets: ', summarize=10)
        targets_one_hot = tf.Print(targets_one_hot, [tf.argmax(self.logits, axis=2)], message='Argmax: ', summarize=10)

        stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=targets_one_hot,
                                                                         logits=self.logits)
        loss = tf.reduce_mean(stepwise_cross_entropy)
        train_op = tf.train.AdamOptimizer().minimize(loss)
        return targets, train_op, loss


constants_manager = ConstantsManager(input_dimensions=1, input_embedding_size=11, inputs_embedded=False,
                                     encoder_hidden_units=100, transducer_hidden_units=100, vocab_ids=[0, 1, 2],
                                     input_block_size=1, beam_width=5)
model = Model(cons_manager=constants_manager)

Answer 1

在这种特殊情况下，错误似乎不正确（请参阅注释中的github问题）。但是，一般而言，此类错误意味着以下内容：

错误消息抱怨的使用模式始终是非法的。早期版本的TensorFlow对它没有很好的检查。

问题的核心是在TensorFlow的执行模型中，你不能使用你在while循环中创建的张量。有关此问题的简单说明，请查看此test case。

您可以通过立即从here返回来禁用检查，但您的计算图形将会格式错误，从而导致未定义的行为。

正确的解决方法是将您要在while循环之外访问的所有张量（cond和body函数之外）添加到loop_vars并将其用作已返回来自tf.while_loop。

Answer 2

我最近在dynamic_rnn（即while循环）中放置了scan时遇到了类似的问题。似乎该错误仅在TensorFlow 1.5中引入。您可以尝试将TensorFlow版本降级到1.4或升级到1.6。两者都应该有效。

Tensorflow：带有while循环的strided_slice切片错误

2 个答案: