我正在研究Takuya Fujishima提出的Pitch Class Profile。我已经尽力实现这个等式(使用scipy和numpy);但是,我得到了一些相当奇怪的结果。我已经讨论过如何将它放在DSP上,但我认为这更像是一个编码问题,而不是方程理解问题。
无论如何,这是我的代码。
import scipy.io.wavfile
import numpy as np
import math
import sys
class PCP:
def __init__(self):
self.note_references = [16.35, 17.32, 18.35, 19.45, 20.60, 21.83, 23.12, 24.50, 25.96, 27.50, 29.14, 30.87]
self.results = {}
def create_fft(self, filename):
self.rate, self.data = scipy.io.wavfile.read('fmin.wav')
print "Data from the File: \n", self.data
self.frames = self.data.size
print "Number of Frames: ", self.frames
print "Rate: ", self.rate
self.fft_results = np.fft.rfft(self.data) ##fft computing and normalization
print "Results from the FFT: \n", self.fft_results
# The work of the following classes was almost entirely based on a
# thread in DSP. Here is the link to the particular article
# http://dsp.stackexchange.com/questions/13722/pitch-class-profiling
# This function returns the values of the notes given the spectrograph
def m_func(self, l, p):
#M(l) = round(12 * log_2( (f_s*l)/(N*f_ref) ) ) % 12
#print "L: ", l
#print "Note: ", p
a = self.rate * l
b = self.frames * self.note_references[p]
c = 12 * np.log2(a/b)
d = np.round(c)
e = np.mod(d.all(), 12)
#print "Result: ", e
#raw_input()
return e
def pcp(self, p):
r = 0
for l in self.fft_results:
result = self.m_func(l[0], p)
#print "actual returned result", result
if result == p:
r+=1
#print "There was a match! Add it!"
return r
def calculate_PCP(self):
for p in range(0,11): #for all 12 notes
self.results[p] = self.pcp(p)
def print_results(self):
for i in self.results.keys():
print i , ":" , self.results[i]
def main():
m = PCP()
m.create_fft("fmin.wav")
m.calculate_PCP()
m.print_results()
if __name__ == '__main__':
main()
以下是输出:
Data from the File:
[[16 15]
[ 9 9]
[15 15]
...,
[ 0 0]
[ 0 0]
[ 0 0]]
Number of Frames: 352800
Rate: 44100
Results from the FFT:
[[ 31.+0.j 1.+0.j]
[ 18.+0.j 0.+0.j]
[ 30.+0.j 0.+0.j]
...,
[ 0.+0.j 0.+0.j]
[ 0.+0.j 0.+0.j]
[ 0.+0.j 0.+0.j]]
PCP.py:36: RuntimeWarning: divide by zero encountered in log2
c = 12 * np.log2(a/b)
PCP.py:36: RuntimeWarning: invalid value encountered in cdouble_scalars
c = 12 * np.log2(a/b)
0 : 143
1 : 176263
2 : 0
3 : 0
4 : 0
5 : 0
6 : 0
7 : 0
8 : 0
9 : 0
10 : 0
该文件包含一个弹奏F小调和弦的钢琴(在结果词典中以0,5和7作为回应)。然而,结果表明C#/ Db非常强大,我当然可以确认录音中没有C#。我非常感谢所有人的帮助!
答案 0 :(得分:0)
音高频率与频谱频率不同,因此不等于每第12个fft幅度结果箱的内容(特别是对于实际音乐声音的录音)。如果不出意外,任何强大的奇次谐波(不是2的幂)将最终出现在错误的音高等级箱中。
所引用的算法仅适用于受限制的波形类别,这些波形可能无法代表现场音乐音频。