Question

我正在使用LSTM进行语音分类。我想创建一些预测声音的方法。但是，我收到以下错误

"ValueError: Input 0 of layer sequential is incompatible with the layer: 
expected ndim=3, found ndim=2. Full shape received: [None, 40]"

LSTM模型

model = Sequential()
model.add(LSTM(128, input_shape=x_train.shape[1:], return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(20, activation='softmax'))

预测功能：

import librosa 
import numpy as np 

def extract_feature(file_name):
   
    try:
        audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None, None

    return np.array([mfccsscaled])

def print_prediction(file_name):
    prediction_feature = extract_feature(file_name) 

    predicted_vector = model.predict_classes(prediction_feature)
    predicted_class = le.inverse_transform(predicted_vector) 
    print("The predicted class is:", predicted_class[0], '\n') 

    predicted_proba_vector = model.predict_proba(prediction_feature) 
    predicted_proba = predicted_proba_vector[0]
    for i in range(len(predicted_proba)): 
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )

filename = 'sound1.wav' 
print(extract_features(file_name).shape)
print_prediction(filename)

有人知道原因和解决方法吗？

ValueError：图层顺序的输入0与图层不兼容：预期ndim = 3，找到的ndim = 2

0 个答案: