我目前正在尝试实施RNN进行回归。 我需要创建一个能够将音频样本转换为mfcc特征向量的神经网络。我已经知道每个音频样本的功能是什么,所以它自己的任务是创建一个能够将音频样本列表转换为所需MFCC特征的神经网络。
我面临的第二个问题是,由于我采样的音频文件长度不同,带有音频样本的列表也会有不同的长度,这会导致我需要输入的输入数量有问题。神经网络。我发现this post关于如何处理变量序列长度,并尝试将其纳入我的RNN实现中,但由于无法解释的原因似乎无法得到很多错误。
有人能看出我的实施出了什么问题吗?
以下是代码:
def length(sequence): ##Zero padding to fit the max lenght... Question whether that is a good idea.
used = tf.sign(tf.reduce_max(tf.abs(sequence), reduction_indices=2))
length = tf.reduce_sum(used, reduction_indices=1)
length = tf.cast(length, tf.int32)
return length
def cost(output, target):
# Compute cross entropy for each frame.
cross_entropy = target * tf.log(output)
cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
mask = tf.sign(tf.reduce_max(tf.abs(target), reduction_indices=2))
cross_entropy *= mask
# Average over actual sequence lengths.
cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1)
cross_entropy /= tf.reduce_sum(mask, reduction_indices=1)
return tf.reduce_mean(cross_entropy)
def last_relevant(output):
max_length = int(output.get_shape()[1])
relevant = tf.reduce_sum(tf.mul(output, tf.expand_dims(tf.one_hot(length, max_length), -1)), 1)
return relevant
files_train_path = [dnn_train+f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_path = [dnn_test+f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]
files_train_name = [f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_name = [f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]
os.chdir(dnn_train)
train_name,train_data = generate_list_of_names_data(files_train_path)
train_data, train_names, train_output_data, train_class_output = load_sound_files(files_train_path,train_name,train_data)
max_length = 0 ## Used for variable sequence input
for element in train_data:
if element.size > max_length:
max_length = element.size
NUM_EXAMPLES = len(train_data)/2
test_data = train_data[NUM_EXAMPLES:]
test_output = train_output_data[NUM_EXAMPLES:]
train_data = train_data[:NUM_EXAMPLES]
train_output = train_output_data[:NUM_EXAMPLES]
print("--- %s seconds ---" % (time.time() - start_time))
#----------------------------------------------------------------------#
#----------------------------Main--------------------------------------#
### Tensorflow neural network setup
batch_size = None
sequence_length_max = max_length
input_dimension=1
data = tf.placeholder(tf.float32,[batch_size,sequence_length_max,input_dimension])
target = tf.placeholder(tf.float32,[None,14])
num_hidden = 24 ## Hidden layer
cell = tf.nn.rnn_cell.LSTMCell(num_hidden,state_is_tuple=True) ## Long short term memory
output, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32,sequence_length = length(data)) ## Creates the Rnn skeleton
last = last_relevant(output)#tf.gather(val, int(val.get_shape()[0]) - 1) ## Appedning as last
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = cost(output,target)# How far am I from correct value?
optimizer = tf.train.AdamOptimizer() ## TensorflowOptimizer
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
## Training ##
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1000
no_of_batches = int(len(train_data)/batch_size)
epoch = 5000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_data[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize,{data: inp, target: out})
print "Epoch - ",str(i)
incorrect = sess.run(error,{data: test_data, target: test_output})
print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect))
sess.close()
错误讯息:
Traceback (most recent call last):
File "tensorflow_test.py", line 177, in <module>
last = last_relevant(output)#tf.gather(val, int(val.get_shape()[0]) - 1) ## Appedning as last
File "tensorflow_test.py", line 132, in last_relevant
relevant = tf.reduce_sum(tf.mul(output, tf.expand_dims(tf.one_hot(length, max_length), -1)), 1)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 2778, in one_hot
name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1413, in _one_hot
axis=axis, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 454, in apply_op
as_ref=input_arg.is_ref)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 621, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/constant_op.py", line 180, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/constant_op.py", line 163, in constant
tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_util.py", line 421, in make_tensor_proto
tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/compat.py", line 45, in as_bytes
(bytes_or_text,))
TypeError: Expected binary or unicode string, got <function length at 0x7f51a7a3ede8>
修改:
更改tf.one_hot(lenght(输出),max_length)给出了以下错误消息:
Traceback (most recent call last):
File "tensorflow_test.py", line 184, in <module>
cross_entropy = cost(output,target)# How far am I from correct value?
File "tensorflow_test.py", line 121, in cost
cross_entropy = target * tf.log(output)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 754, in binary_op_wrapper
return func(x, y, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 903, in _mul_dispatch
return gen_math_ops.mul(x, y, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 1427, in mul
result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2312, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1704, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 1801, in _BroadcastShape
% (shape_x, shape_y))
ValueError: Incompatible shapes for broadcasting: (?, 14) and (?, 138915, 24)
答案 0 :(得分:0)
tf.one_hot(length, ...)
这里长度是一个函数,而不是张量。尝试长度(某事)。