使用Tensorflow进行RNN回归?

时间:2016-10-23 13:32:35

标签: python-2.7 audio tensorflow regression recurrent-neural-network

我目前正在尝试实施RNN进行回归。 我需要创建一个能够将音频样本转换为mfcc特征向量的神经网络。我已经知道每个音频样本的功能是什么,所以它自己的任务是创建一个能够将音频样本列表转换为所需MFCC特征的神经网络。

我面临的第二个问题是,由于我采样的音频文件长度不同,带有音频样本的列表也会有不同的长度,这会导致我需要输入的输入数量有问题。神经网络。我发现this post关于如何处理变量序列长度,并尝试将其纳入我的RNN实现中,但由于无法解释的原因似乎无法得到很多错误。

有人能看出我的实施出了什么问题吗?

以下是代码:

def length(sequence): ##Zero padding to fit the max lenght... Question whether that is a good idea.
    used = tf.sign(tf.reduce_max(tf.abs(sequence), reduction_indices=2))
    length = tf.reduce_sum(used, reduction_indices=1)
    length = tf.cast(length, tf.int32)
    return length

def cost(output, target):
    # Compute cross entropy for each frame.
    cross_entropy = target * tf.log(output)
    cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
    mask = tf.sign(tf.reduce_max(tf.abs(target), reduction_indices=2))
    cross_entropy *= mask
    # Average over actual sequence lengths.
    cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1)
    cross_entropy /= tf.reduce_sum(mask, reduction_indices=1)
    return tf.reduce_mean(cross_entropy)

def last_relevant(output):
    max_length = int(output.get_shape()[1])
    relevant = tf.reduce_sum(tf.mul(output, tf.expand_dims(tf.one_hot(length, max_length), -1)), 1)
    return relevant

files_train_path = [dnn_train+f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_path = [dnn_test+f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]

files_train_name = [f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_name = [f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]

os.chdir(dnn_train)

train_name,train_data = generate_list_of_names_data(files_train_path)
train_data, train_names, train_output_data, train_class_output = load_sound_files(files_train_path,train_name,train_data)

max_length = 0 ## Used for variable sequence input

for element in train_data:
    if element.size > max_length:
        max_length = element.size

NUM_EXAMPLES = len(train_data)/2

test_data = train_data[NUM_EXAMPLES:]
test_output = train_output_data[NUM_EXAMPLES:]

train_data = train_data[:NUM_EXAMPLES]
train_output = train_output_data[:NUM_EXAMPLES]
print("--- %s seconds ---" % (time.time() - start_time))

#----------------------------------------------------------------------#
#----------------------------Main--------------------------------------#
### Tensorflow neural network setup

batch_size = None
sequence_length_max = max_length
input_dimension=1

data = tf.placeholder(tf.float32,[batch_size,sequence_length_max,input_dimension])
target = tf.placeholder(tf.float32,[None,14])

num_hidden = 24 ## Hidden layer
cell = tf.nn.rnn_cell.LSTMCell(num_hidden,state_is_tuple=True)  ## Long short term memory

output, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32,sequence_length = length(data))  ## Creates the Rnn skeleton

last = last_relevant(output)#tf.gather(val, int(val.get_shape()[0]) - 1) ## Appedning as last

weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))

prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)

cross_entropy = cost(output,target)# How far am I from correct value?

optimizer = tf.train.AdamOptimizer() ## TensorflowOptimizer
minimize = optimizer.minimize(cross_entropy)

mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))

## Training ##

init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)

batch_size = 1000
no_of_batches = int(len(train_data)/batch_size)
epoch = 5000
for i in range(epoch):
    ptr = 0
    for j in range(no_of_batches):
        inp, out = train_data[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
        ptr+=batch_size
        sess.run(minimize,{data: inp, target: out})
    print "Epoch - ",str(i)
incorrect = sess.run(error,{data: test_data, target: test_output})
print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect))
sess.close()

错误讯息:

Traceback (most recent call last):
  File "tensorflow_test.py", line 177, in <module>
    last = last_relevant(output)#tf.gather(val, int(val.get_shape()[0]) - 1) ## Appedning as last
  File "tensorflow_test.py", line 132, in last_relevant
    relevant = tf.reduce_sum(tf.mul(output, tf.expand_dims(tf.one_hot(length, max_length), -1)), 1)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 2778, in one_hot
    name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1413, in _one_hot
    axis=axis, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 454, in apply_op
    as_ref=input_arg.is_ref)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 621, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/constant_op.py", line 180, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/constant_op.py", line 163, in constant
    tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape))
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_util.py", line 421, in make_tensor_proto
    tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values])
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/compat.py", line 45, in as_bytes
    (bytes_or_text,))
TypeError: Expected binary or unicode string, got <function length at 0x7f51a7a3ede8>

修改:

更改tf.one_hot(lenght(输出),max_length)给出了以下错误消息:

Traceback (most recent call last):
  File "tensorflow_test.py", line 184, in <module>
    cross_entropy = cost(output,target)# How far am I from correct value?
  File "tensorflow_test.py", line 121, in cost
    cross_entropy = target * tf.log(output)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 754, in binary_op_wrapper
    return func(x, y, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 903, in _mul_dispatch
    return gen_math_ops.mul(x, y, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 1427, in mul
    result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2312, in create_op
    set_shapes_for_outputs(ret)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1704, in set_shapes_for_outputs
    shapes = shape_func(op)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 1801, in _BroadcastShape
    % (shape_x, shape_y))
ValueError: Incompatible shapes for broadcasting: (?, 14) and (?, 138915, 24)

1 个答案:

答案 0 :(得分:0)

tf.one_hot(length, ...) 

这里长度是一个函数,而不是张量。尝试长度(某事)。