Question

我正在研究Tensorflow，我有来自github的代码，用于语音识别项目。它是一个基于tensorflow的speech2text seq2seq模型。

from __future__ import print_function
import numpy as np
import tensorflow as tf
#import sugartensor as tf
import sugartensor
import layer
import speech_data
from speech_data import Source,Target
from layer import net

learning_rate = 0.00001
training_iters = 300000 #steps
batch_size = 64



input_classes=20 # mfcc features
max_input_length=80 # (max) length of utterance
max_output_length=20
output_classes=32 # dimensions: characters


# Target.word here just returns the filename "1_STEFFI_160.wav" = digit_speaker_words-per-minute.wav nicely 'encoded' ;)
batch=word_batch=speech_data.mfcc_batch_generator(batch_size, source=Source.DIGIT_WAVES, target=Target.hotword)
X,Y=next(batch)

# EOS='\n' # end of sequence symbol todo use how?
# GO=1       # start symbol 0x01 todo use how?
# def decode(bytes):
#   return "".join(map(chr, bytes)).replace('\x00', '').replace('\n', '')

vocab_size=input_classes
target_vocab_size=output_classes
buckets=[(max_input_length, max_output_length)] # our input and response words can be up to 10 characters long
# (1000,1000) Takes 6 minutes on the Mac, half on Nvidia
PAD=[0] # fill words shorter than 10 characters with 'padding' zeroes

input_data    = x= X
target_data   = y= Y
target_weights= [[1.0]*50 + [0.0]*(max_input_length-50)] *batch_size # mask padding. todo: redundant --
encoder_size = max_input_length
decoder_size = max_output_length #self.buckets[bucket_id]

num_dim=input_classes #?

# residual block
def res_block(tensor, size, rate, dim=num_dim):
    # filter convolution
    conv_filter = tensor.sg_aconv1d(size=size, rate=rate, act='tanh', bn=True)
    # gate convolution
    conv_gate = tensor.sg_aconv1d(size=size, rate=rate,  act='sigmoid', bn=True)
    # output by gate multiplying
    out = conv_filter * conv_gate
    # final output
    out = out.sg_conv1d(size=1, dim=dim, act='tanh', bn=True)
    # residual and skip output
    return out + tensor, out

# expand dimension
z = x.sg_conv1d(sg_conv1d, size=1, dim=num_dim, act='tanh', bn=True)

# dilated conv block loop
skip = 0  # skip connections
for i in range(num_blocks):
    for r in [1, 2, 4, 8, 16]:
        z, s = res_block(z, size=7, rate=r)
        skip += s

# final logit layers
logit = (skip
         .sg_conv1d(size=1, act='tanh', bn=True)
         .sg_conv1d(size=1, dim=voca_size))

# CTC loss
loss = logit.sg_ctc(target=y, seq_len=seq_len)
tf.train.AdamOptimizer(learning_rate).minimize(loss)
saver = tf.train.Saver(tf.global_variables())

# train
tf.sg_train(log_interval=30, lr=0.0001, loss=loss, ep_size=1000, max_ep=200, early_stop=False)

问题是，当我运行此代码时，我收到以下错误。我没有看到任何名为list的对象。我试图在所有方面做一些伎俩。

File "C:/Users/User1/Desktop/tensorflow-speech-recognition-master/speech2text-seq2seq.py", line 61, in <module>
    z = x.sg_conv1d(sg_conv1d, size=1, dim=num_dim, act='tanh', bn=True)
AttributeError: 'list' object has no attribute 'sg_conv1d'

我一直在努力解决这个问题，但每次都不会对我有用。谁能帮我解决这个问题？感谢

Tensorflow Speech2Text错误

0 个答案: