我正在研究Tensorflow,我有来自github的代码,用于语音识别项目。它是一个基于tensorflow的speech2text seq2seq模型。
from __future__ import print_function
import numpy as np
import tensorflow as tf
#import sugartensor as tf
import sugartensor
import layer
import speech_data
from speech_data import Source,Target
from layer import net
learning_rate = 0.00001
training_iters = 300000 #steps
batch_size = 64
input_classes=20 # mfcc features
max_input_length=80 # (max) length of utterance
max_output_length=20
output_classes=32 # dimensions: characters
# Target.word here just returns the filename "1_STEFFI_160.wav" = digit_speaker_words-per-minute.wav nicely 'encoded' ;)
batch=word_batch=speech_data.mfcc_batch_generator(batch_size, source=Source.DIGIT_WAVES, target=Target.hotword)
X,Y=next(batch)
# EOS='\n' # end of sequence symbol todo use how?
# GO=1 # start symbol 0x01 todo use how?
# def decode(bytes):
# return "".join(map(chr, bytes)).replace('\x00', '').replace('\n', '')
vocab_size=input_classes
target_vocab_size=output_classes
buckets=[(max_input_length, max_output_length)] # our input and response words can be up to 10 characters long
# (1000,1000) Takes 6 minutes on the Mac, half on Nvidia
PAD=[0] # fill words shorter than 10 characters with 'padding' zeroes
input_data = x= X
target_data = y= Y
target_weights= [[1.0]*50 + [0.0]*(max_input_length-50)] *batch_size # mask padding. todo: redundant --
encoder_size = max_input_length
decoder_size = max_output_length #self.buckets[bucket_id]
num_dim=input_classes #?
# residual block
def res_block(tensor, size, rate, dim=num_dim):
# filter convolution
conv_filter = tensor.sg_aconv1d(size=size, rate=rate, act='tanh', bn=True)
# gate convolution
conv_gate = tensor.sg_aconv1d(size=size, rate=rate, act='sigmoid', bn=True)
# output by gate multiplying
out = conv_filter * conv_gate
# final output
out = out.sg_conv1d(size=1, dim=dim, act='tanh', bn=True)
# residual and skip output
return out + tensor, out
# expand dimension
z = x.sg_conv1d(sg_conv1d, size=1, dim=num_dim, act='tanh', bn=True)
# dilated conv block loop
skip = 0 # skip connections
for i in range(num_blocks):
for r in [1, 2, 4, 8, 16]:
z, s = res_block(z, size=7, rate=r)
skip += s
# final logit layers
logit = (skip
.sg_conv1d(size=1, act='tanh', bn=True)
.sg_conv1d(size=1, dim=voca_size))
# CTC loss
loss = logit.sg_ctc(target=y, seq_len=seq_len)
tf.train.AdamOptimizer(learning_rate).minimize(loss)
saver = tf.train.Saver(tf.global_variables())
# train
tf.sg_train(log_interval=30, lr=0.0001, loss=loss, ep_size=1000, max_ep=200, early_stop=False)
问题是,当我运行此代码时,我收到以下错误。我没有看到任何名为list的对象。我试图在所有方面做一些伎俩。
File "C:/Users/User1/Desktop/tensorflow-speech-recognition-master/speech2text-seq2seq.py", line 61, in <module>
z = x.sg_conv1d(sg_conv1d, size=1, dim=num_dim, act='tanh', bn=True)
AttributeError: 'list' object has no attribute 'sg_conv1d'
我一直在努力解决这个问题,但每次都不会对我有用。 谁能帮我解决这个问题?感谢