我正在使用LSTM进行语音分类。我想创建一些预测声音的方法。但是,我收到以下错误
"ValueError: Input 0 of layer sequential is incompatible with the layer:
expected ndim=3, found ndim=2. Full shape received: [None, 40]"
LSTM模型
model = Sequential()
model.add(LSTM(128, input_shape=x_train.shape[1:], return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(20, activation='softmax'))
预测功能:
import librosa
import numpy as np
def extract_feature(file_name):
try:
audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
mfccsscaled = np.mean(mfccs.T,axis=0)
except Exception as e:
print("Error encountered while parsing file: ", file)
return None, None
return np.array([mfccsscaled])
def print_prediction(file_name):
prediction_feature = extract_feature(file_name)
predicted_vector = model.predict_classes(prediction_feature)
predicted_class = le.inverse_transform(predicted_vector)
print("The predicted class is:", predicted_class[0], '\n')
predicted_proba_vector = model.predict_proba(prediction_feature)
predicted_proba = predicted_proba_vector[0]
for i in range(len(predicted_proba)):
category = le.inverse_transform(np.array([i]))
print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )
filename = 'sound1.wav'
print(extract_features(file_name).shape)
print_prediction(filename)
有人知道原因和解决方法吗?