使用Python 3.6和Spyder&Pycharm框架,我想从wav文件中提取特征,我编写了PLP的代码来提取特征,执行后发现此错误,请帮助我检查一下。这是代码:
#!usr/bin/python
import numpy.matlib
import scipy
from scipy.fftpack.realtransforms import dct
from sidekit.frontend.vad import pre_emphasis
from sidekit.frontend.io import *
from sidekit.frontend.normfeat import *
from sidekit.frontend.features import *
import scipy.io.wavfile as wav
import numpy as np
def readWavFile(wav):
#given a path from the keyboard to read a .wav file
#wav = raw_input('Give me the path of the .wav file you want to read: ')
inputWav = 'C:/Speech_Processing/2-Speech_Signal_Processing_and_Classification-master/feature_extraction_techniques'+wav
return inputWav
#reading the .wav file (signal file) and extract the information we need
def initialize(inputWav):
rate , signal = wav.read(readWavFile(inputWav)) # returns a wave_read object , rate: sampling frequency
sig = wave.open(readWavFile(inputWav))
# signal is the numpy 2D array with the date of the .wav file
# len(signal) number of samples
sampwidth = sig.getsampwidth()
print ('The sample rate of the audio is: ',rate)
print ('Sampwidth: ',sampwidth)
return signal , rate
def PLP():
folder = input('Give the name of the folder that you want to read data: ')
amount = input('Give the number of samples in the specific folder: ')
for x in range(1,int(amount)+1):
wav = '/'+folder+'/'+str(x)+'.wav'
print (wav)
#inputWav = readWavFile(wav)
signal,rate = initialize(wav)
#returns PLP coefficients for every frame
plp_features = plp(signal,rasta=True)
meanFeatures(plp_features[0])
#compute the mean features for one .wav file (take the features for every frame and make a mean for the sample)
def meanFeatures(plp_features):
#make a numpy array with length the number of plp features
mean_features=np.zeros(len(plp_features[0]))
#for one input take the sum of all frames in a specific feature and divide them with the number of frames
for x in range(len(plp_features)):
for y in range(len(plp_features[x])):
mean_features[y]+=plp_features[x][y]
mean_features = (mean_features / len(plp_features))
print (mean_features)
def main():
PLP()
main()
运行上面的代码后:
Give the name of the folder that you want to read data: samples
Give the number of samples in the specific folder: 5
/samples/1.wav
The sample rate of the audio is: 16000
Sampwidth: 2
Traceback (most recent call last):
File "<ipython-input-1-b9bf30e2ed19>", line 1, in <module>
runfile('C:/Speech_Processing/2-Speech_Signal_Processing_and_Classification-master/feature_extraction_techniques/plp.py', wdir='C:/Speech_Processing/2-Speech_Signal_Processing_and_Classification-master/feature_extraction_techniques')
File "C:\ProgramData\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 704, in runfile
execfile(filename, namespace)
File "C:\ProgramData\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 108, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Speech_Processing/2-Speech_Signal_Processing_and_Classification-master/feature_extraction_techniques/plp.py", line 54, in <module>
main()
File "C:/Speech_Processing/2-Speech_Signal_Processing_and_Classification-master/feature_extraction_techniques/plp.py", line 52, in main
PLP()
File "C:/Speech_Processing/2-Speech_Signal_Processing_and_Classification-master/feature_extraction_techniques/plp.py", line 38, in PLP
plp_features = plp(signal,rasta=True)
File "C:\ProgramData\Anaconda3\lib\site-packages\sidekit\frontend\features.py", line 921, in plp
powspec, log_energy = power_spectrum(input_sig, fs, nwin, shift, prefac)
File "C:\ProgramData\Anaconda3\lib\site-packages\sidekit\frontend\features.py", line 399, in power_spectrum
ahan = framed[start:stop, :] * window
ValueError: operands could not be broadcast together with shapes (400,2) (400,)