我试图用张量流来制作语音识别系统。
输入数据是一个大小为50000 X 1的numpy数组。
输出数据(映射数据)是一个大小为400 X 1的numpy数组。
输入和映射数据在列表中以2批次传递。
我已经使用this教程来设计神经网络。以下是代码段:
对于RNN:
input_data = tf.placeholder(tf.float32, [batch_size, sound_constants.MAX_ROW_SIZE_IN_DATA, sound_constants.MAX_COLUMN_SIZE_IN_DATA], name="train_input")
target = tf.placeholder(tf.float32, [batch_size, sound_constants.MAX_ROW_SIZE_IN_TXT, sound_constants.MAX_COLUMN_SIZE_IN_TXT], name="train_output")
fwd_cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden, state_is_tuple=True, forget_bias=1.0)
# creating one backward cell
bkwd_cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden, state_is_tuple=True, forget_bias=1.0)
# creating bidirectional RNN
val, _, _ = tf.nn.static_bidirectional_rnn(fwd_cell, bkwd_cell, tf.unstack(input_data), dtype=tf.float32)
用于提供数据:
feed = {g['input_data'] : trb[0], g['target'] : trb[1], g['dropout'] : 0.6}
accuracy_, _ = sess.run([g['accuracy'], g['ts']], feed_dict=feed)
accuracy += accuracy_
当我运行代码时,我收到了这个错误:
Traceback (most recent call last):
File "/home/wolborg/PycharmProjects/speech-to-text-rnn/src/rnn_train_1.py", line 205, in <module>
tr_losses, te_losses = train_network(g)
File "/home/wolborg/PycharmProjects/speech-to-text-rnn/src/rnn_train_1.py", line 177, in train_network
accuracy_, _ = sess.run([g['accuracy'], g['ts']], feed_dict=feed)
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1102, in _run
raise ValueError('Tensor %s may not be fed.' % subfeed_t)
ValueError: Tensor Tensor("Const:0", shape=(), dtype=float32) may not be fed.
Process finished with exit code 1
之前,我在使用tf.sparse_placeholder时遇到了这个问题,然后在浏览之后,我将输入类型更改为tf.placeholder并进行了相关更改。现在我对我犯错误的地方毫无头绪。
请建议我应该如何提供数据。
整个代码:
import tensorflow as tf
# for taking MFCC and label input
import numpy as np
import rnn_input_data_1
import sound_constants
# input constants
# Training Parameters
num_input = 10 # mfcc data input
training_data_size = 8 # determines number of files in training and testing module
testing_data_size = num_input - training_data_size
# Network Parameters
learning_rate = 0.0001 # for large training set, it can be set 0.001
num_hidden = 200 # number of hidden layers
num_classes = 28 # total alphabet classes (a-z) + extra symbols (', ' ')
epoch = 1 # number of iterations
batch_size = 2 # number of batches
mfcc_coeffs, text_data = rnn_input_data_1.mfcc_and_text_encoding()
class DataGenerator:
def __init__(self, data_size):
self.ptr = 0
self.epochs = 0
self.data_size = data_size
def next_batch(self):
self.ptr += batch_size
if self.ptr > self.data_size:
self.epochs += 1
self.ptr = 0
return mfcc_coeffs[self.ptr-batch_size : self.ptr], text_data[self.ptr-batch_size : self.ptr]
def reset_graph():
if 'sess' in globals() and sess:
sess.close()
tf.reset_default_graph()
def struct_network():
print ('Inside struct network !!')
reset_graph()
input_data = tf.placeholder(tf.float32, [batch_size, sound_constants.MAX_ROW_SIZE_IN_DATA, sound_constants.MAX_COLUMN_SIZE_IN_DATA], name="train_input")
target = tf.placeholder(tf.float32, [batch_size, sound_constants.MAX_ROW_SIZE_IN_TXT, sound_constants.MAX_COLUMN_SIZE_IN_TXT], name="train_output")
keep_prob = tf.constant(1.0)
fwd_cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden, state_is_tuple=True, forget_bias=1.0)
# creating one backward cell
bkwd_cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden, state_is_tuple=True, forget_bias=1.0)
# creating bidirectional RNN
val, _, _ = tf.nn.static_bidirectional_rnn(fwd_cell, bkwd_cell, tf.unstack(input_data), dtype=tf.float32)
# adding dropouts
val = tf.nn.dropout(val, keep_prob)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
# creating bidirectional RNN
print ('BiRNN created !!')
print ('Last Size: ', last.get_shape())
weight = tf.Variable(tf.truncated_normal([num_hidden * 2, sound_constants.MAX_ROW_SIZE_IN_TXT]))
bias = tf.Variable(tf.constant(0.1, shape=[sound_constants.MAX_ROW_SIZE_IN_TXT]))
# mapping to 28 output classes
logits = tf.matmul(last, weight) + bias
prediction = tf.nn.softmax(logits)
prediction = tf.reshape(prediction, shape = [batch_size, sound_constants.MAX_ROW_SIZE_IN_TXT, sound_constants.MAX_COLUMN_SIZE_IN_TXT])
# getting probability distribution
mat1 = tf.cast(tf.argmax(prediction,1),tf.float32)
correct = tf.equal(prediction, target)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
logits = tf.reshape(logits, shape=[batch_size, sound_constants.MAX_ROW_SIZE_IN_TXT, sound_constants.MAX_COLUMN_SIZE_IN_TXT])
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=target))
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
# returning components as dictionary elements
return {'input_data' : input_data,
'target' : target,
'dropout': keep_prob,
'loss': loss,
'ts': train_step,
'preds': prediction,
'accuracy': accuracy
}
def train_network(graph):
# initialize tensorflow session and all variables
# tf_gpu_config = tf.ConfigProto(allow_soft_placement = True, log_device_placement = True)
# tf_gpu_config.gpu_options.allow_growth = True
# with tf.Session(config = tf_gpu_config) as sess:
with tf.Session() as sess:
train_instance = DataGenerator(training_data_size)
test_instance = DataGenerator(testing_data_size)
print ('Training data size: ', train_instance.data_size)
print ('Testing data size: ', test_instance.data_size)
sess.run(tf.global_variables_initializer())
print ('Starting session...')
step, accuracy = 0, 0
tr_losses, te_losses = [], []
current_epoch = 0
while current_epoch < epoch:
step += 1
trb = train_instance.next_batch()
feed = {g['input_data'] : trb[0], g['target'] : trb[1], g['dropout'] : 0.6}
accuracy_, _ = sess.run([g['accuracy'], g['ts']], feed_dict=feed)
accuracy += accuracy_
if train_instance.epochs > current_epoch:
current_epoch += 1
tr_losses.append(accuracy / step)
step, accuracy = 0, 0
#eval test set
te_epoch = test_instance.epochs
while test_instance.epochs == te_epoch:
step += 1
print ('Testing round ', step)
trc = test_instance.next_batch()
feed = {g['input_data']: trc[0], g['target']: trc[1]}
accuracy_ = sess.run([g['accuracy']], feed_dict=feed)[0]
accuracy += accuracy_
te_losses.append(accuracy / step)
step, accuracy = 0,0
print("Accuracy after epoch", current_epoch, " - tr:", tr_losses[-1], "- te:", te_losses[-1])
return tr_losses, te_losses
g = struct_network()
tr_losses, te_losses = train_network(g)
答案 0 :(得分:2)
您将keep_prob
定义为tf.constant
,但之后尝试将值提供给它。将keep_prob = tf.constant(1.0)
替换为keep_prob = tf.placeholder(tf.float32,[])
或keep_prob = tf.placeholder_with_default(1.0,[])