Question

我正在尝试使用READ.MD文件中提到的语法运行github program 运行程序后，我收到以下错误：

>>> from deepsphinx.api import Predict
>>> ds = Predict(Predict.default_flags(),'batch-21937.data-00000-of-00001')
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/home/aims-whm/deepsphinx-master/deepsphinx/api.py", line 24, in __init__
    1.0)
  File "/home/aims-whm/deepsphinx-master/deepsphinx/seq2seq_model.py", line 241, in seq2seq_model
    keep_prob)
  File "/home/aims-whm/deepsphinx-master/deepsphinx/seq2seq_model.py", line 51, in encoding_layer
    dtype=tf.float32)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/rnn.py", line 396, in bidirectional_dynamic_rnn
    seq_dim=time_dim, batch_dim=batch_dim)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/rnn.py", line 389, in _reverse
    seq_dim=seq_dim, batch_dim=batch_dim)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_ops.py", line 2355, in reverse_sequence
    name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 2633, in reverse_sequence
    batch_dim=batch_dim, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 589, in apply_op
    param_name=input_name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 60, in _SatisfiesTypeConstraint
    ", ".join(dtypes.as_dtype(x).name for x in allowed_list)))
TypeError: Value passed to parameter 'seq_lengths' has DataType float64 not in list of allowed values: int32, int64

所以我尝试检查问题是什么，找到了seq2seq_model.py文件。当我到达第51行时，它写成float32，我认为是正确的。但我找不到那里的津贴。

这里是我正在处理的两个文件：
seq2seq_model.py

"""Tensorflow model for speech recognition"""
import tensorflow as tf
from deepsphinx.vocab import VOCAB_SIZE, VOCAB_TO_INT
from deepsphinx.utils import FLAGS
from deepsphinx.lm import LMCellWrapper
from deepsphinx.attention import BahdanauAttentionCutoff

def encoding_layer(
        input_lengths,
        rnn_inputs,
        keep_prob):
    """ Encoding layer for the model.

    Args:
        input_lengths (Tensor): A tensor of input lenghts of instances in
            batches
        rnn_inputs (Tensor): Inputs

    Returns:
        Encoding output, LSTM state, output length
    """
    for layer in range(FLAGS.num_layers):
        with tf.variable_scope('encoder_{}'.format(layer)):
            cell_fw = tf.contrib.rnn.LSTMCell(
                FLAGS.rnn_size,
                initializer=tf.random_uniform_initializer(
                    -0.1, 0.1, seed=2))
            cell_fw = tf.contrib.rnn.DropoutWrapper(
                cell_fw,
                output_keep_prob=keep_prob,
                variational_recurrent=True,
                dtype=tf.float32,
                input_size=rnn_inputs.get_shape()[2])

            cell_bw = tf.contrib.rnn.LSTMCell(
                FLAGS.rnn_size,
                initializer=tf.random_uniform_initializer(
                    -0.1, 0.1, seed=2))
            cell_bw = tf.contrib.rnn.DropoutWrapper(
                cell_bw,
                output_keep_prob=keep_prob,
                variational_recurrent=True,
                dtype=tf.float32,
                input_size=rnn_inputs.get_shape()[2])

            enc_output, enc_state = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                rnn_inputs,
                input_lengths,
                dtype=tf.float32)

            if layer != FLAGS.num_layers - 1:
                rnn_inputs = tf.concat(enc_output, 2)
                # Keep only every second element in the sequence
                rnn_inputs = rnn_inputs[:, ::2, :]
                input_lengths = (input_lengths + 1) / 2
    # Join outputs since we are using a bidirectional RNN
    enc_output = tf.concat(enc_output, 2)

    return enc_output, enc_state, input_lengths

def get_dec_cell(
        enc_output,
        enc_output_lengths,
        use_lm,
        fst,
        tile_size,
        keep_prob):
    """Decoding cell for attention based model

    Return:
        `RNNCell` Instance
    """

    lstm = tf.contrib.rnn.LSTMCell(
        FLAGS.rnn_size,
        initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
    dec_cell_inp = tf.contrib.rnn.DropoutWrapper(
        lstm,
        output_keep_prob=keep_prob,
        variational_recurrent=True,
        dtype=tf.float32)
    lstm = tf.contrib.rnn.LSTMCell(
        FLAGS.rnn_size,
        initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
    dec_cell = tf.contrib.rnn.DropoutWrapper(
        lstm,
        output_keep_prob=keep_prob,
        variational_recurrent=True,
        dtype=tf.float32)

    dec_cell_out = tf.contrib.rnn.LSTMCell(
        FLAGS.rnn_size,
        num_proj=VOCAB_SIZE,
        initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))

    dec_cell = tf.contrib.rnn.MultiRNNCell(
        [dec_cell_inp] +
        [dec_cell] * (FLAGS.num_decoding_layers - 2) +
        [dec_cell_out])

    enc_output = tf.contrib.seq2seq.tile_batch(
        enc_output,
        tile_size)

    enc_output_lengths = tf.contrib.seq2seq.tile_batch(
        enc_output_lengths,
        tile_size)

    attn_mech = BahdanauAttentionCutoff(
        FLAGS.rnn_size,
        enc_output,
        enc_output_lengths,
        normalize=True,
        name='BahdanauAttention')

    dec_cell = tf.contrib.seq2seq.AttentionWrapper(
        dec_cell,
        attn_mech,
        VOCAB_SIZE,
        output_attention=True)

    if use_lm:
        dec_cell = LMCellWrapper(dec_cell, fst, 5)

    return dec_cell


#pylint: disable-msg=too-many-arguments
def training_decoding_layer(
        target_data,
        target_lengths,
        enc_output,
        enc_output_lengths,
        fst,
        keep_prob):
    """ Training decoding layer for the model.

    Returns:
        Training logits
    """
    target_data = tf.concat(
        [tf.fill([FLAGS.batch_size, 1], VOCAB_TO_INT['<s>']),
         target_data[:, :-1]], 1)

    dec_cell = get_dec_cell(
        enc_output,
        enc_output_lengths,
        FLAGS.use_train_lm,
        fst,
        1,
        keep_prob)

    initial_state = dec_cell.zero_state(
        dtype=tf.float32,
        batch_size=FLAGS.batch_size)

    target_data = tf.nn.embedding_lookup(
        tf.eye(VOCAB_SIZE),
        target_data)

    training_helper = tf.contrib.seq2seq.TrainingHelper(
        inputs=target_data,
        sequence_length=target_lengths,
        time_major=False)

    training_decoder = tf.contrib.seq2seq.BasicDecoder(
        dec_cell,
        training_helper,
        initial_state)

    training_logits, _, _ = tf.contrib.seq2seq.dynamic_decode(
        training_decoder,
        output_time_major=False,
        impute_finished=True)

    return training_logits


def inference_decoding_layer(
        enc_output,
        enc_output_lengths,
        fst,
        keep_prob):
    """ Inference decoding layer for the model.

    Returns:
        Predictions
    """

    dec_cell = get_dec_cell(
        enc_output,
        enc_output_lengths,
        FLAGS.use_inference_lm,
        fst,
        FLAGS.beam_width,
        keep_prob)

    initial_state = dec_cell.zero_state(
        dtype=tf.float32,
        batch_size=FLAGS.batch_size * FLAGS.beam_width)

    start_tokens = tf.fill(
        [FLAGS.batch_size],
        VOCAB_TO_INT['<s>'],
        name='start_tokens')

    inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
        dec_cell,
        tf.eye(VOCAB_SIZE),
        start_tokens,
        VOCAB_TO_INT['</s>'],
        initial_state,
        FLAGS.beam_width)

    predictions, _, _ = tf.contrib.seq2seq.dynamic_decode(
        inference_decoder,
        output_time_major=False,
        maximum_iterations=FLAGS.max_output_len)

    return predictions

def seq2seq_model(
        input_data,
        target_data,
        input_lengths,
        target_lengths,
        fst,
        keep_prob):
    """ Attention based model

    Returns:
        Logits, Predictions, Training operation, Cost, Step, Scores of beam
        search
    """

    enc_output, _, enc_lengths = encoding_layer(
        input_lengths,
        input_data,
        keep_prob)

    with tf.variable_scope("decode"):

        training_logits = training_decoding_layer(
            target_data,
            target_lengths,
            enc_output,
            enc_lengths,
            fst,
            keep_prob)
    with tf.variable_scope("decode", reuse=True):
        predictions = inference_decoding_layer(
            enc_output,
            enc_lengths,
            fst,
            keep_prob)

    # Create tensors for the training logits and predictions
    training_logits = tf.identity(
        training_logits.rnn_output,
        name='logits')
    scores = tf.identity(
        predictions.beam_search_decoder_output.scores,
        name='scores')
    predictions = tf.identity(
        predictions.predicted_ids,
        name='predictions')

    # Create the weights for sequence_loss
    masks = tf.sequence_mask(
        target_lengths,
        tf.reduce_max(target_lengths),
        dtype=tf.float32,
        name='masks')

    with tf.name_scope("optimization"):
        # Loss function
        cost = tf.contrib.seq2seq.sequence_loss(
            training_logits,
            target_data,
            masks)

        tf.summary.scalar('cost', cost)

        step = tf.contrib.framework.get_or_create_global_step()

        # Optimizer
        optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)

        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [
            (tf.clip_by_value(grad, -5., 5.), var)
            for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients, step)

    return training_logits, predictions, train_op, cost, step, scores

我认为另一个有问题的文件是

api.py

""" A minimal API for the prediction for an audio file """
from deepsphinx.seq2seq_model import seq2seq_model
from deepsphinx.utils import FLAGS
from deepsphinx.vocab import VOCAB, VOCAB_SIZE
from deepsphinx.data import get_features
import tensorflow as tf

class Predict(object):
    """ Set flags and restore from checkpoint """
    def __init__(self, flags, checkpoint_path, lm_fst=None):
        self.graph = tf.Graph()
        with self.graph.as_default():
            flags['batch_size'] = 1
            # TODO: Use higher level API
            FLAGS.__dict__['__flags'] = flags
            self.input_length = tf.placeholder(tf.int32, shape=[1])
            self.input = tf.placeholder(tf.float32, shape=[1, None, FLAGS.nfilt * 3 + 1])
            _, self.predictions, _, _, _, _ = seq2seq_model(
                self.input,
                tf.placeholder(tf.float32, shape=[1, VOCAB_SIZE]),
                self.input_length,
                tf.placeholder(tf.int32, shape=[1]),
                lm_fst,
                1.0)
            self.sess = tf.Session(graph=self.graph)
            tf.train.Saver().restore(self.sess, checkpoint_path)

    @staticmethod
    def default_flags():
        return {'nfilt': 40,
                'max_output_len': 250,
                'rnn_size': 256,
                'num_layers': 3,
                'num_decoding_layers': 3,
                'batch_size': 1,
                'beam_width': 16,
                'cutoff_range': 200,
                'use_train_lm': False,
                'use_inference_lm': False,
                'learning_rate': 0.0
                }

    def predict(self, audio_file):
        """ Predict and return string output by beam search """
        feat = get_features(audio_file)
        pred = self.sess.run(self.predictions, feed_dict={
            self.input: [feat], self.input_length: [feat.shape[0]]})
        return ''.join([VOCAB[l] for l in pred[0, :, 0]])

请帮助我解决这个问题，因为我没有解决这个问题。请建议我解决它有什么帮助。

＆＃39; seq_lengths＆＃39; DataType float64不在允许值列表中：int32，int64

0 个答案: