在Tensorflow中使用ctc_loss时如何解决``没有为任何变量提供渐变''错误

时间:2019-05-02 22:53:10

标签: python-3.x tensorflow keras deep-learning tensorflow2.0

我正在尝试在 Tensorflow 2.0.0alpha0 中制作百度的Deep Speech 2模型。我在使用ctc_loss对象计算梯度时无法优化Tensorflow tf.GradientTape()

我目前正在将形状为(batch_size, max_step, feats)的张量传递给我的模型,然后将计算出的对数传递给损失函数。我也尝试过传递稀疏张量,但这也行不通。

这是创建模型的代码

import tensorflow as tf


class DeepSpeech2(tf.keras.Model):

    def __init__(self, vocab_size, conv_filters=[11], conv_kernel_sizes=[1280], conv_strides=[2], 
                 recur_sizes=[100], rnn_type='gru', bidirect_rnn=False, batch_norm=True, 
                 learning_rate=1e-3, name='DeepSpeech2'):

        super(DeepSpeech2, self).__init__()

        self._vocab_size = vocab_size
        self._conv_filters = conv_filters
        self._conv_kernel_sizes = conv_kernel_sizes
        self._conv_strides = conv_strides
        self._recur_sizes = recur_sizes
        self._rnn_type = rnn_type
        self._bidirect_rnn = bidirect_rnn
        self._batch_norm = batch_norm
        self._learning_rate = learning_rate
        self._name = name

        self._conv_batch_norm = None

        with tf.name_scope(self._name):

            self._convolution = [tf.keras.layers.Conv1D(filters=conv_filters[i], 
                kernel_size=conv_kernel_sizes[i], strides=conv_strides[i],
                padding='valid', activation='relu', 
                name='conv1d_{}'.format(i)) for i in range(len(self._conv_filters))]

            if self._batch_norm:
                self._conv_batch_norm = tf.keras.layers.BatchNormalization(name='bn_conv_1d')

            if self._rnn_type == 'gru':
                rnn_init = tf.keras.layers.GRU
            elif self._rnn_type == 'lstm':
                rnn_init = tf.keras.layers.LSTM
            else:
                raise Exception("Invalid rnn_type: '{}' (must be 'lstm' or 'gru')"
                                .format(self._rnn_type))

            self._rnn = []
            for i, r in enumerate(self._recur_sizes):
                layer = rnn_init(r, activation='relu', return_sequences=True,
                    name='{}_{}'.format(self._rnn_type, i))
                if self._bidirect_rnn:
                    layer = tf.keras.layers.Bidirectional(layer)
                self._rnn.append(layer)
                if self._batch_norm:
                    self._rnn.append(tf.keras.layers.BatchNormalization())

            self._fc = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(
                self._vocab_size, name='fc', activation='linear'))

            self._optimizer = tf.keras.optimizers.Adam(lr=self._learning_rate)

    def __call__(self, specs):

        with tf.name_scope(self._name):

            feats = specs
            for layer in self._convolution:
                feats = layer(feats)

            if self._conv_batch_norm:
                feats = self._conv_batch_norm(feats)

            rnn_outputs = feats
            for layer in self._rnn:
                rnn_outputs = layer(rnn_outputs)

            outputs = self._fc(rnn_outputs)

            return tf.transpose(outputs, (1, 0, 2))

    @tf.function
    def train_step(self, specs, spec_lengths, labels, label_lengths):

        with tf.GradientTape() as tape:

            logits = self.__call__(specs)

            loss = tf.nn.ctc_loss(labels=labels, logits=logits,
                label_length=label_lengths, logit_length=spec_lengths)
            cost = tf.reduce_sum(loss)

            decoded, neg_sum_logits = tf.nn.ctc_greedy_decoder(logits, label_lengths)

            gradients = tape.gradient(cost, self.trainable_variables)
            self._optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        return (decoded[0].indices, decoded[0].values, decoded[0].dense_shape), cost

我当前遇到以下错误

ValueError: No gradients provided for any variable: ['DeepSpeech2/conv1d_0/kernel:0', 'DeepSpeech2/conv1d_0/bias:0', 'DeepSpeech2/bn_conv_1d/gamma:0', 'DeepSpeech2/bn_conv_1d/beta:0', 'DeepSpeech2/gru_0/kernel:0', 'DeepSpeech2/gru_0/recurrent_kernel:0', 'DeepSpeech2/gru_0/bias:0', 'DeepSpeech2/batch_normalization_v2/gamma:0', 'DeepSpeech2/batch_normalization_v2/beta:0', 'DeepSpeech2/time_distributed/kernel:0', 'DeepSpeech2/time_distributed/bias:0'].

在将梯度应用于优化器的行上发生错误。当我打印出gradients变量时,它只是None

的列表

据我了解,此错误表示图中没有从变量到损失的路径,但我不确定为什么会得到这个。任何帮助将不胜感激!

0 个答案:

没有答案