我正在尝试用不同的训练单词训练多个GMM模型。然后我试图用一个看不见的测试词测试我的模型,我得到负值。知道我做错了吗?
rate = []#reading rates and signals of all Training wav files
sig = []
for filename in glob.glob('Data\Training\*.wav'):
sr_value, x_value = wav.read(filename)
rate.append(sr_value)
sig.append(x_value)
阅读所有培训文件
all_mfcc_feat = []
for audio in sig:
#defaults
all_mfcc_feat.append(mfcc(signal = audio, samplerate = 16000, winlen = 0.025, winstep = 0.01, nfilt=26, nfft = 512, numcep = 13, preemph = 0.97, ceplifter=22, appendEnergy =False))
计算每个信号的mfcc
delta_oneT = []
double_deltaT = []
for mfcc in all_mfcc_feat:
delta1 = (delta(mfcc, 2))
delta_oneT.append(delta1) #calculating delta
double_deltaT.append(delta(delta1, 2)) #calculating double delta from previous delta
training_feat = []
for i in range (0, len(all_mfcc_feat)): #iterate through signals
df = pd.DataFrame(data = None, )
for j in range (0, len(all_mfcc_feat[i])): #iterate through list of mfcc's
combined = np.concatenate([all_mfcc_feat[i][j],delta_oneT[i][j], double_deltaT[i][j] ])
df = df.append(pd.Series(combined), ignore_index = True)
dfnew = df.values
training_feat.append(dfnew)
(sr_valueX, x_valueX) = wav.read('Data\Testing\wiehedT.wav')
mfcc_test = mfcc(x_valueX, sr_valueX)
delta_oneTest = []
double_deltaTest = []
delta1T = delta(mfcc_test, 2)
delta_oneTest.append(delta1T) #calculating delta
double_deltaTest.append(delta(delta1, 2)) #calculating double delta from previous delta
df = pd.DataFrame(data = None, )
for i in range (0, len(mfcc_test)):
combined = np.concatenate([mfcc_test[i],delta_oneTest[0][i],double_deltaTest[0][i]])
df = df.append(pd.Series(combined), ignore_index = True)
testingFeat = df.values
allmodels = []
for feat in training_feat:
gmm = GaussianMixture() #default weights and means
gmm.fit(feat)
allmodels.append(gmm)
i = 1
for gmm in allmodels:
print 'Model ',i
scores = gmm.score(testingFeat)
print scores
i = i+1
计算每个信号的增量
substring_index
答案 0 :(得分:0)
该代码按预期工作。函数gmm.score(testingFeat)
返回输入数据中每个点的计算对数概率。这是the documentation of score()
对数概率只是概率的对数-属于区间(0,1)-,因此它们是负数。要解决这个问题,您可以像在post中一样应用指数函数。
但是,这不会导致分数/概率之类的分数,因为您的数据可能分布不均匀。 here对此进行了详细说明。