Question

我正在尝试使用傅立叶变换（FFT）返回的前10个频率来重新创建音符。产生的声音与原始声音不匹配。不知道我是无法正确找到频率还是无法正确产生声音。该代码的目的是匹配原始声音。这是我的代码：

import numpy as np
from scipy.io import wavfile
from scipy.fftpack import fft
import matplotlib.pyplot as plt

i_framerate = 44100
fs, data = wavfile.read('./Flute.nonvib.ff.A4.stereo.wav') # load the data

def findFrequencies(arr_data, i_framerate = 44100, i_top_n =5):
    a = arr_data.T[0] # this is a two channel soundtrack, I get the first track
#        b=[(ele/2**8.)*2-1 for ele in a] # this is 8-bit track, b is now normalized on [-1,1)
    y = fft(a) # calculate fourier transform (complex numbers list)

    xf = np.linspace(0,int(i_framerate/2.0),int((i_framerate/2.0))+1) /2 # Need to find out this last /2 part
    yf = np.abs(y[:int((i_framerate//2.0))+1])

    plt.plot(xf,yf)

    yf_top_n = np.argsort(yf)[-i_top_n:][::-1]
    amp_top_n =  yf[yf_top_n] / np.max(yf[yf_top_n])
    freq_top_n = xf[yf_top_n]

    return freq_top_n, amp_top_n

def createSoundData(a_freq, a_amp, i_framerate=44100, i_time = 1, f_amp = 1000.0):
    n_samples = i_time * i_framerate

    x = np.linspace(0,i_time, n_samples)
    y = np.zeros(n_samples)
    for i in range(len(a_freq)):
        y += np.sin(2 * np.pi * a_freq[i] * x)* f_amp * a_amp[i]
    data2 = np.c_[y,y] # 2 Channel sound
    return data2

top_freq , top_freq_amp = findFrequencies(data, i_framerate = 44100 , i_top_n = 200)

print('Frequencies: ',top_freq)
print('Amplitudes : ',top_freq_amp) 

soundData = createSoundData(top_freq, top_freq_amp,i_time = 2, f_amp = 50 / len(top_freq))
wavfile.write('createsound_A4_v6.wav',i_framerate,soundData)

Answer 1

音符中的前10个频谱频率与前10个FFT结果箱幅度的中心频率不同。实际的频率峰值可以在FFT区间之间。

不仅频率峰值信息可以位于FFT仓之间，而且再现任何音符瞬变（攻击，衰减等）所需的相位信息也可以位于仓之间。 FFT区间之间的频谱信息由复数FFT结果的跨度（最大为全宽度）承载。

无法使用FFT重建相同的声音

1 个答案: