道歉,如果答案在某个地方的某个地方(无法找到)。我是一个业余爱好者,他试图加载WAV文件,获取它的幅度和相位数据(用于修改),生成频谱图,然后将其保存为新的WAV文件。 我使用C ++(Qt)和FFTW库。
我的问题是,即使没有进行任何修改,产生的WAV也与原始WAV不同。如果对整个样本序列执行FFT运算,那么它看起来就像原始样本一样。但我必须使用具有重叠窗口的STFT。在这种情况下,我会受到扭曲,导致周期性的开裂/节流声音,音频波形也会发生显着变化。
这可以在以下示例中看到(在Audacity中查看):
原创/在一个块中处理: original
已处理(windowSize = 2048,hopSize = 1024,无窗口功能): processed ws=2048, hs=1024, wf=none
我无法发布更多具有声望的示例,但是在ISTFT之后(不是在STFT之前)使用我用来组合结果窗口样本的方法执行汉明窗函数可以提供良好的声音。但波形仍然存在很大差异,主要是观察到峰值的显着损失。
我认为将ISTFT的结果合并到新的样本序列中的方法就是问题。这样做的正确方法是什么? C ++中的示例将非常感激。
以下是相关来源:
// getSampleNormalized(n) returns sample n of 1 channel in -1.0 to 1.0 range
// getSampleCount() returns sample count of 1 channel
// quint32 is just unsigned int
quint32 windowSize = 2048;
quint32 windowSizeHalf = windowSize / 2 + 1;
quint32 slideWindowBy = 1024; // hopSize
quint32 windowCount = getSampleCount() / slideWindowBy;
if ( (windowCount * slideWindowBy) < getSampleCount()){
windowCount += 1;
}
quint32 newSampleCount = windowCount * slideWindowBy + ( windowSize - slideWindowBy );
double *window = new double[windowSize];
fftw_complex *fftResult = new fftw_complex[windowSizeHalf];
fftw_complex *fftWindow = new fftw_complex[windowSizeHalf];
double *result = new double[windowSize];
double **magnitudes = new double*[windowCount];
double **phases = new double*[windowCount];
double **signalWindows = new double*[windowCount];
for (int i = 0; i < windowCount; ++i){
magnitudes[i] = new double[windowSizeHalf];
phases[i] = new double[windowSizeHalf];
signalWindows[i] = new double[windowSize];
}
double *sampleSignals = new double[newSampleCount];
fftw_plan fftPlan = fftw_plan_dft_r2c_1d( windowSize, window, fftResult, FFTW_ESTIMATE );
fftw_plan ifftPlan = fftw_plan_dft_c2r_1d( windowSize, fftWindow, result, FFTW_ESTIMATE );
// STFT
for ( int currentWindow = 0; currentWindow < windowCount; ++currentWindow ){
for (int i = 0; i < windowSize; ++i){
quint32 currentSample = currentWindow * slideWindowBy + i;
if ( ( currentSample ) < getSampleCount() ){
window[i] = getSampleNormalized( currentSample ); // * ( windowHamming( i, windowSize ) );
}
else{
window[i] = 0.0;
}
}
fftw_execute(fftPlan);
for (int i = 0; i < windowSizeHalf; ++i){
magnitudes[currentWindow][i] = sqrt( fftResult[i][0]*fftResult[i][0] + fftResult[i][1]*fftResult[i][1] );
phases[currentWindow][i] = atan2( fftResult[i][1], fftResult[i][0] );
}
}
// INVERSE STFT
for ( int currentWindow = 0; currentWindow < windowCount; ++currentWindow ){
for ( int i = 0; i < windowSizeHalf; ++i ){
fftWindow[i][0] = magnitudes[currentWindow][i] * cos( phases[currentWindow][i] ); // Real
fftWindow[i][1] = magnitudes[currentWindow][i] * sin( phases[currentWindow][i] ); // Imaginary
}
fftw_execute(ifftPlan);
for ( int i = 0; i < windowSize; ++i ){
signalWindows[currentWindow][i] = result[i] / windowSize; // getting normalized result
//signalWindows[currentWindow][i] *= (windowHamming( i, windowSize )); // applying Hamming window function
}
}
quint32 pos;
// HERE WE COMBINE RESULTED WINDOWS
// COMBINE AND AVERAGE
// 1st window should be full replace
for ( int i = 0; i < windowSize; ++i ){
sampleSignals[i] = signalWindows[0][i];
}
// 2nd window and onwards: combine with previous ones
for ( int currentWindow = 1; currentWindow < windowCount; ++currentWindow ){
// combine and average with data from previous window
for ( int i = 0; i < (windowSize - slideWindowBy); ++i ){
pos = currentWindow * slideWindowBy + i;
sampleSignals[pos] = (sampleSignals[pos] + signalWindows[currentWindow][i]) * 0.5;
}
// simply replace for the rest
for ( int i = (windowSize - slideWindowBy); i < windowSize; ++i ){
pos = currentWindow * slideWindowBy + i;
sampleSignals[pos] = signalWindows[currentWindow][i];
}
}
// then just save the wav file...