我很难在这里运行这段example code来将音频信号转换为stfts。我正在使用label_wave.py并编辑 “运行图”功能。
def run_graph(wav_data, labels, input_layer_name, output_layer_name,
num_top_predictions):
"""Runs the audio data through the graph and prints predictions."""
data = np.fromstring(wav_data, dtype='int16')[-16000:]
# Normalize the data
data = data * 1.0 / (1 << 15) # Range [-1 1]
data = tf.reshape(data, (1, 16000))
# tf.cast(data, tf.float32)
stfts = tf.contrib.signal.stft(data, frame_length=1024, frame_step=256,fft_length=1024)
错误
Traceback (most recent call last):
File "mfcc_label_wav.py", line 149, in <module>
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py", line 126, in run
_sys.exit(main(argv))
File "mfcc_label_wav.py", line 121, in main
FLAGS.output_name, FLAGS.how_many_labels)
File "mfcc_label_wav.py", line 115, in label_wav
run_graph(wav_data, labels_list, input_name, output_name, how_many_labels)
File "mfcc_label_wav.py", line 71, in run_graph
fft_length=1024)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/signal/python/ops/spectral_ops.py", line 91, in stft
return spectral_ops.rfft(framed_signals, [fft_length])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/spectral_ops.py", line 130, in _rfft
input_tensor = _ops.convert_to_tensor(input_tensor, _dtypes.float32)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 950, in convert_to_tensor
as_ref=False)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1040, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 883, in _TensorTensorConversionFunction
(dtype.name, t.dtype.name, str(t)))
ValueError: Tensor conversion requested dtype float32 for Tensor with dtype float64: 'Tensor("stft/mul:0", shape=(1, 59, 1024), dtype=float64)'
答案 0 :(得分:0)
执行tf.cast(data,tf.float32)无法正常工作。因此,我先将numpy数组从float64转换为float32,然后将数据重塑。
data = np.fromstring(wav_data, dtype='int16')[-16000:]
# Normalize the data
data = data * 1.0 / (1 << 15) # Range [-1 1]
data = np.float32(data)
data = tf.reshape(data, (1, 16000))
# tf.cast(data, tf.float32)
stfts = tf.contrib.signal.stft(data, frame_length=1024,
frame_step=256,fft_length=1024)