我正在尝试通过FFT对两个数组进行卷积,使用C ++中的FFTW库并实现重叠添加算法。矢量数据特别包含使用sndfile从WAV文件加载的双打文件-其中一个是用于处理的WAV文件,另一个是具有虚假响应(IR)的WAV文件,因此本质上的目标是卷积混响。
我以前已经使用Python成功实现了此功能,但是涉及通过scipy.signal使用FFT卷积,因此我不必自己实现重叠添加。但是从那次经验中,我可以确认我正在处理一个脉冲响应,该响应应该通过FFT与输入信号整齐地卷积,并产生与Python输出相当的可识别回声效果。我还可以确认两个WAV文件具有匹配的比特率(16位)和采样率(44.1 kHz)。
一些错误的启动只会产生平坦的噪声作为输出,这是一个熟悉的绊脚石。现在,我产生的WAV输出几乎类似于脉冲响应的变化抽头,但每个感知的“抽头”只有噪声脉冲。
我正在遵循两个有关实现重叠添加的指南; https://www.dspguide.com/ch18/1.htm
http://blog.robertelder.org/overlap-add-overlap-save/
我还将关注一些在线示例和其他StackOverflow线程,并阅读FFTW官方文档(http://www.fftw.org/fftw2_doc/fftw_2.html)。
这是我的代码,其过程说明如下:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sndfile.h>
#include <iostream>
#include <cstring>
#include <vector>
#include <complex.h>
#include <fftw3.h>
using namespace std;
#define ARRAY_LEN(x) ((int) (sizeof (x) / sizeof (x [0])))
#define MAX(x,y) ((x) > (y) ? (x) : (y))
#define MIN(x,y) ((x) < (y) ? (x) : (y))
fftw_complex* fftw_complex_transform(vector<double >);
fftw_complex* ifft_from_complex_vector(vector<vector<double> >);
vector<double> convolved_block(vector<double>, vector<complex<double> >, int);
vector<complex<double> > filter_kernel_cpx_vec(vector<double>);
vector<double> padded(vector<double>, int);
void testDummyData();
int main (int argc, char ** argv) {
SNDFILE *infile, *outfile, *irfile ;
SF_INFO sfinfo ;
double buffer[1024];
sf_count_t count;
// 1 - import both WAV files
vector<double> inputWavArr;
vector<double> irWavArr;
if (argc != 4) {
printf("\nUsage :\n\n <executable name> <input signal file> <impulse response file> <output file>\n") ;
exit(0);
}
memset (&sfinfo, 0, sizeof (sfinfo)) ;
if ((infile = sf_open (argv [1], SFM_READ, &sfinfo)) == NULL) {
printf ("Error : Not able to open input file '%s'\n", argv [1]);
sf_close (infile);
exit (1) ;
}
if ((irfile = sf_open (argv [2], SFM_READ, &sfinfo)) == NULL) {
printf ("Error : Not able to open input file '%s'\n", argv [2]);
sf_close (irfile);
exit (1) ;
}
if ((outfile = sf_open (argv [3], SFM_WRITE, &sfinfo)) == NULL) {
printf ("Error : Not able to open output file '%s'\n", argv [3]);
sf_close (outfile);
exit (1);
}
while ((count = sf_read_double (infile, buffer, ARRAY_LEN (buffer))) > 0) {
for (int i = 0; i < 1024; i++)
inputWavArr.push_back(buffer[i]);
}
while ((count = sf_read_double (irfile, buffer, ARRAY_LEN (buffer))) > 0) {
for (int i = 0; i < 1024; i++)
irWavArr.push_back(buffer[i]); // max value 0.0408325
}
// 2 - Settle on a padding scheme
int irLen = irWavArr.size();
int windowSize = 262144 - irLen+1; // 262144 is a pwr of 2
const int outputLength = irLen + windowSize - 1;
sf_close(infile);
sf_close(irfile);
irWavArr = padded(irWavArr, outputLength);
int newIrLength = irWavArr.size();
// 3 and 4 - use FFTW to process IR kernel into vector of complex values
vector<complex<double> > ir_vec;
ir_vec = filter_kernel_cpx_vec(irWavArr);
// 5 - divide dry input signal into sections of length windowSize
int numSections = floor(inputWavArr.size()/windowSize);
// OVERLAP-ADD PROCEDURE
vector<vector<double> > totals;
cout << "numSections is " << numSections << "\n";
for (int j=0; j<numSections; j++) { // may be OBOB use numSections+1? or pad inputWavArr?
vector<double> total;
cout << "convolving section " << j << "\n";
vector<double> section_arr;
for (int i=j*windowSize; i<(j*windowSize + windowSize); i++) {
section_arr.push_back(inputWavArr[i]);
}
// Hanning window
for (int i = 0; i < windowSize; i++) {
double multiplier = 0.5 * (1 - cos(2*M_PI*i/(windowSize-1)));
section_arr[i] = multiplier * section_arr[i];
}
int old_section_arr_size = section_arr.size();
section_arr = padded(section_arr, outputLength);
// 6 - yield convolved block for each section
vector<double> output = convolved_block(section_arr, ir_vec, old_section_arr_size+irLen-1);
for (int i=0; i<output.size(); i++) {
total.push_back(output[i]);
}
// 7 - append convolved block to vector of resultant block vectors
totals.push_back(total);
}
vector<double> results(inputWavArr.size()+newIrLength-1, 0);
// 8 - loop though vector of convolved segments, and carry out addition of overlapping sub-segments to yield final "results" vector
for (int j=0; j<numSections; j++) {
vector<double> totals_arr = totals[j];
cout << "overlap summing section " << j << "\n";
for (int i=0; i<totals_arr.size(); i++) {
int newIdx = j*windowSize+i;
results[newIdx] += totals_arr[i];
}
}
// RESULTS MARK THE END OF OVERLAP-ADD PROCEDURE
// load result vector into buffer for writing via libsndfile
double* buff3 = (double*)malloc(results.size()*sizeof(double));
for (int idx = 0; idx < results.size(); idx++) {
buff3[idx] = results[idx]/400; // normalizing factor for these samples... output without this has amplitude going almost up to 120. input file had max around 0.15. max should be 1 about
}
long writtenFrames = sf_writef_double (outfile, buff3, results.size());
sf_close (outfile);
return 0 ;
}
fftw_complex* fftw_complex_transform(vector<double> signal_wav) {
int N = signal_wav.size();
fftw_complex *in, *out;
fftw_plan irPlan;
in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
for (int i = 0; i < signal_wav.size(); i++)
{
in[i][0] = signal_wav[i];
in[i][1] = (float)0; // complex component .. 0 for input of wav file
}
irPlan = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
fftw_execute(irPlan);
fftw_destroy_plan(irPlan);
fftw_free(in);
return out;
}
vector<complex<double> > filter_kernel_cpx_vec(vector<double> input) {
fftw_complex* irFFT = fftw_complex_transform(input);
vector<complex<double> > kernel_vec;
for (int i=0; i<input.size(); i++) {
complex<double> m1 (irFFT[i][0], irFFT[i][1]);
kernel_vec.push_back(m1);
}
fftw_free(irFFT);
return kernel_vec;
}
fftw_complex* ifft_from_complex_vector(vector<vector<double> > signal_vec) {
int N = signal_vec.size();
fftw_complex *in, *out;
fftw_plan irPlan;
in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
for (int i = 0; i < signal_vec.size(); i++)
{
in[i][0] = signal_vec[i][0]; // real
in[i][1] = signal_vec[i][1]; // imag
}
irPlan = fftw_plan_dft_1d(N, in, out, FFTW_BACKWARD, FFTW_ESTIMATE);
fftw_execute(irPlan);
fftw_destroy_plan(irPlan);
fftw_free(in);
return out;
}
vector<double> convolved_block(vector<double> in_vector, vector<complex<double> > ir_cpx_vector, int size) {
fftw_complex* outputFFT = fftw_complex_transform(in_vector);
vector<vector<double> > products;
vector<complex<double> > sig_fft_cpx;
for (int i=0; i<size; i++) {
complex<double> m1 (outputFFT[i][0], outputFFT[i][1]);
sig_fft_cpx.push_back(m1);
}
fftw_free(outputFFT);
for (int j=0; j<size; j++) {
std::complex<double> complexProduct = sig_fft_cpx[j]*ir_cpx_vector[j];
double re = real(complexProduct);
double im = imag(complexProduct);
vector<double> elemVec(2);
elemVec[0] = re;
elemVec[1] = im;
products.push_back(elemVec);
}
fftw_complex* revFFT = ifft_from_complex_vector(products);
vector<double> out_vec_dbl;
for (int i=0; i<size; i++) {
out_vec_dbl.push_back(outputFFT[i][0]);
}
fftw_free(revFFT);
return out_vec_dbl;
}
vector<double> padded(vector<double> input, int total) {
vector<double> padded_vec;
for (int i = 0; i<input.size(); i++) {
padded_vec.push_back(input[i]);
}
int numZeroes = total - input.size();
for (int i = 0; i< numZeroes; i++) {
padded_vec.push_back((double)0);
}
return padded_vec;
}
void testDummyData() {
vector<double> dummyFilter;
dummyFilter.push_back(1);
dummyFilter.push_back(-1);
dummyFilter.push_back(1);
vector<double> dummySignal;
dummySignal.push_back(3);
dummySignal.push_back(-1);
dummySignal.push_back(0);
dummySignal.push_back(3);
dummySignal.push_back(2);
dummySignal.push_back(0);
dummySignal.push_back(1);
dummySignal.push_back(2);
dummySignal.push_back(1);
const int nearWindowSize=3;
const int nearIrLength=3;
const int totalLength = 5;
dummyFilter = padded(dummyFilter, totalLength);
vector<complex<double> > dummy_ir_vec = filter_kernel_cpx_vec(dummyFilter);
int localNumSections = 3;
vector<vector<double> > outputs;
for (int j=0; j<localNumSections; j++) {
vector<double> local_section;
for (int i; i<nearWindowSize; i++) {
int idx = j*nearWindowSize + i;
local_section.push_back(dummySignal[idx]);
}
local_section = padded(local_section, totalLength);
vector<double> local_output = convolved_block(local_section, dummy_ir_vec, totalLength);
outputs.push_back(local_output);
}
vector<double> local_results(11,0); // example has 11 in output
for (int j=0; j<localNumSections; j++) {
vector<double> local_totals_arr = outputs[j];
cout << "overlap summing section " << j << "\n";
for (int i=0; i<local_totals_arr.size(); i++) {
int newIdx = j*nearWindowSize+i;
local_results[newIdx] += local_totals_arr[i];
}
}
for (int i=0; i<11; i++) {
cout << "result " << i << "\n";
cout << local_results[i] << "\n";
}
}
我的过程:
由于这无法产生预期的结果,因此我尝试在我上面参考的指南(http://blog.robertelder.org/overlap-add-overlap-save/)的演示数据上运行相同的高级FFT函数(封装了FFTW调用)。
此测试是通过void函数testDummyData()实现的。
在此测试中,中间FFT复数值与指南中的值不同,“卷积”向量的最终结果也不相同(指南中通过小数据集的输入端算法确认为正确)。由于我一直认真地遵循FFTW教程(http://www.fftw.org/fftw2_doc/fftw_2.html)以便获得FFT结果,因此这种情况让我抓狂了。我已经仔细检查了重叠添加的实现,并确保避免常见的实现陷阱,例如缺少Hanning窗口或不正确的填充方案(似乎填充恰好在这里点…)。显然,这里有些我想念的东西。
关于输出的一个注释:“卷积”的输出样本的幅度比干输入信号或IR信号的最大样本幅度大得多。我使用400的除法来获得近似归一化的音量范围,尽管这当然不能解决基本问题。不过,我希望这种意想不到的幅度变化能对该问题有所了解。
环境详细信息:如果将C ++脚本另存为convolution.cpp,则可以使用以下命令进行编译:g ++ pkg-config --cflags --libs fftw3 sndfile
convolution.cpp -o outprog
只需要fftw和libsndfile库即可运行,并且我在Mac上都使用homebrew进行了安装。
它可以按以下方式运行,这就是我加载相关文件的方式(与工作的Python版本相同):./outprog ../F.wav ../spaceEchoIR.wav wetSignal.wav
答案 0 :(得分:1)
看来我已经解决了基本问题。从fft逆包装函数(ifft_from_complex_vector)返回的指针revFFT加载值后未被引用。取而代之的是,代码在outputFFT中寻找期望值,该期望值已通过fftw_free释放。
有助于消除混乱的另一件事是切换到FFTW函数fftw_plan_dft_r2c_1d和fftw_plan_dft_c2r_1d,以使用标志FFTW_BACKWARD和FFTW_FORWARD替换对fftw_plan_dft_1d的调用。这些类型现在更容易跟踪,尤其是对于需要与FFTW一起使用的实数向量。
卷积混响现在几乎可以按预期渲染。有些窗口和孤立的零件会出现锯齿和变形。正如Cris所建议的,这可能是由于我不断需要强大的标准化方案。这就是我接下来要处理的,但值得庆幸的是,基本问题已解决。
下面的工作代码:
//Give the input and output file names on the command line
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sndfile.h>
#include <iostream>
#include <cstring>
#include <vector>
#include <complex.h>
#include <fftw3.h>
using namespace std;
#define ARRAY_LEN(x) ((int) (sizeof (x) / sizeof (x [0])))
#define MAX(x,y) ((x) > (y) ? (x) : (y))
#define MIN(x,y) ((x) < (y) ? (x) : (y))
fftw_complex* fftw_complex_transform(vector<double >);
//fftw_complex* ifft_from_complex_vector(vector<vector<double> >);
double* ifft_from_complex_vector(vector<vector<double> >);
vector<double> convolved_block(vector<double>, vector<complex<double> >, int);
vector<complex<double> > filter_kernel_cpx_vec(vector<double>);
vector<double> padded(vector<double>, int);
void testDummyData();
int main (int argc, char ** argv) {
SNDFILE *infile, *outfile, *irfile ;
SF_INFO sfinfo ;
double buffer[1024];
sf_count_t count;
//for fftw3
vector<double> inputWavArr;
vector<double> irWavArr;
if (argc != 4) {
printf("\nUsage :\n\n <executable name> <input signal file> <impulse response file> <output file>\n") ;
exit(0);
}
memset (&sfinfo, 0, sizeof (sfinfo)) ;
if ((infile = sf_open (argv [1], SFM_READ, &sfinfo)) == NULL) {
printf ("Error : Not able to open input file '%s'\n", argv [1]);
sf_close (infile);
exit (1) ;
}
if ((irfile = sf_open (argv [2], SFM_READ, &sfinfo)) == NULL) {
printf ("Error : Not able to open input file '%s'\n", argv [2]);
sf_close (irfile);
exit (1) ;
}
if ((outfile = sf_open (argv [3], SFM_WRITE, &sfinfo)) == NULL) {
printf ("Error : Not able to open output file '%s'\n", argv [3]);
sf_close (outfile);
exit (1);
}
while ((count = sf_read_double (infile, buffer, ARRAY_LEN (buffer))) > 0) {
for (int i = 0; i < 1024; i++)
inputWavArr.push_back(buffer[i]);
}
double sumIrImpulses = 0;
while ((count = sf_read_double (irfile, buffer, ARRAY_LEN (buffer))) > 0) {
for (int i = 0; i < 1024; i++) {
double el = buffer[i];
irWavArr.push_back(el); // max value 0.0408325
sumIrImpulses += (el);
}
}
sumIrImpulses = abs(sumIrImpulses);
//const int irLen = 189440; // filter(ir) block len
int irLen = irWavArr.size();
//const int windowSize = 72705; // s.t. irLen+windowSize-1 is a pwr of 2
int windowSize = 262144 - irLen+1; // 262144 is a pwr of 2
const int outputLength = irLen + windowSize - 1;
sf_close(infile);
sf_close(irfile);
irWavArr = padded(irWavArr, outputLength);
int newIrLength = irWavArr.size();
vector<complex<double> > ir_vec;
ir_vec = filter_kernel_cpx_vec(irWavArr);
int numSections = floor(inputWavArr.size()/windowSize);
if (numSections*windowSize != inputWavArr.size()) {
inputWavArr = padded(inputWavArr, (numSections+1)*windowSize);
numSections++;
}
// OVERLAP-ADD PROCEDURE
vector<vector<double> > totals;
cout << "numSections is " << numSections << "\n";
for (int j=0; j<numSections; j++) { // may be OBOB use numSections+1? or pad inputWavArr?
vector<double> total;
cout << "convolving section " << j << "\n";
vector<double> section_arr;
for (int i=j*windowSize; i<(j*windowSize + windowSize); i++) {
section_arr.push_back(inputWavArr[i]/200.21);
}
// hanning window
for (int i = 0; i < windowSize; i++) {
double multiplier = 0.5 * (1 - cos(2*M_PI*i/(windowSize-1)));
section_arr[i] = multiplier * section_arr[i];
}
int old_section_arr_size = section_arr.size();
section_arr = padded(section_arr, outputLength);
vector<double> output = convolved_block(section_arr, ir_vec, old_section_arr_size+irLen-1);
for (int i=0; i<output.size(); i++) {
total.push_back(output[i]); // normalize
}
totals.push_back(total);
}
vector<double> results(inputWavArr.size()+newIrLength-1, 0);
for (int j=0; j<numSections; j++) {
vector<double> totals_arr = totals[j];
cout << "overlap summing section " << j << "\n";
for (int i=0; i<totals_arr.size(); i++) {
int newIdx = j*windowSize+i;
results[newIdx] += totals_arr[i]/550;
}
}
double maxVal = 0;
for (int i=0; i<results.size(); i++) {
if (results[i] > maxVal) {
maxVal = results[i];
}
}
cout << "maxval" << maxVal << "\n";
cout << "sumIrImpulses" << sumIrImpulses << "\n";
// RESULTS MARK THE END OF OVERLAP-ADD PROCEDURE
double* buff3 = (double*)malloc(results.size()*sizeof(double));
for (int idx = 0; idx < results.size(); idx++) {
buff3[idx] = results[idx]; // NO LONGER normalizing factor for these samples... output without this has amplitude going almost up to 120. input file had max around 0.15. max should be 1 about
}
long writtenFrames = sf_writef_double (outfile, buff3, results.size());
sf_close (outfile);
return 0 ;
}
fftw_complex* fftw_complex_transform(vector<double> signal_wav) {
int N = signal_wav.size();
double *in;
fftw_complex *out;
fftw_plan irPlan;
//in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
in = (double*) malloc(sizeof(double)*N);
out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
for (int i = 0; i < signal_wav.size(); i++)
{
//in[i][0] = signal_wav[i];
in[i] = signal_wav[i];
//in[i][1] = (float)0; // complex component .. 0 for input of wav file
}
//irPlan = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
irPlan = fftw_plan_dft_r2c_1d(N, in, out, FFTW_ESTIMATE);
fftw_execute(irPlan);
fftw_destroy_plan(irPlan);
fftw_free(in);
return out;
}
vector<complex<double> > filter_kernel_cpx_vec(vector<double> input) {
fftw_complex* irFFT = fftw_complex_transform(input);
vector<complex<double> > kernel_vec;
for (int i=0; i<input.size(); i++) {
complex<double> m1 (irFFT[i][0], irFFT[i][1]);
kernel_vec.push_back(m1);
}
fftw_free(irFFT);
return kernel_vec;
}
//fftw_complex* ifft_from_complex_vector(vector<vector<double> > signal_vec) {
double* ifft_from_complex_vector(vector<vector<double> > signal_vec) {
int N = signal_vec.size();
//fftw_complex *in, *out;
fftw_complex *in;
double *out;
fftw_plan irPlan;
in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
//out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
out = (double*) malloc(sizeof(double)*N);
for (int i = 0; i < signal_vec.size(); i++)
{
in[i][0] = signal_vec[i][0]; // real
in[i][1] = signal_vec[i][1]; // imag
}
//irPlan = fftw_plan_dft_1d(N, in, out, FFTW_BACKWARD, FFTW_ESTIMATE);
irPlan = fftw_plan_dft_c2r_1d(N, in, out, FFTW_ESTIMATE);
fftw_execute(irPlan);
fftw_destroy_plan(irPlan);
fftw_free(in);
return out;
}
vector<double> convolved_block(vector<double> in_vector, vector<complex<double> > ir_cpx_vector, int size) {
fftw_complex* outputFFT = fftw_complex_transform(in_vector);
vector<vector<double> > products;
vector<complex<double> > sig_fft_cpx;
for (int i=0; i<size; i++) {
complex<double> m1 (outputFFT[i][0], outputFFT[i][1]);
sig_fft_cpx.push_back(m1);
}
fftw_free(outputFFT);
for (int j=0; j<size; j++) {
std::complex<double> complexProduct = sig_fft_cpx[j]*ir_cpx_vector[j];
double re = real(complexProduct);
double im = imag(complexProduct);
vector<double> elemVec(2);
elemVec[0] = re;
elemVec[1] = im;
products.push_back(elemVec);
}
//fftw_complex* revFFT = ifft_from_complex_vector(products);
double* revFFT = ifft_from_complex_vector(products);
vector<double> out_vec_dbl;
for (int i=0; i<size; i++) {
//out_vec_dbl.push_back(outputFFT[i][0]);
//out_vec_dbl.push_back(revFFT[i][0]);
out_vec_dbl.push_back(revFFT[i]);
//out_vec_dbl.push_back(outputFFT[i]);
}
//fftw_free(revFFT);
free(revFFT);
return out_vec_dbl;
}
vector<double> padded(vector<double> input, int total) {
vector<double> padded_vec;
for (int i = 0; i<input.size(); i++) {
padded_vec.push_back(input[i]);
}
int numZeroes = total - input.size();
for (int i = 0; i< numZeroes; i++) {
padded_vec.push_back((double)0);
}
return padded_vec;
}
void testDummyData() {
vector<double> dummyFilter;
dummyFilter.push_back(1);
dummyFilter.push_back(-1);
dummyFilter.push_back(1);
vector<double> dummySignal;
dummySignal.push_back(3);
dummySignal.push_back(-1);
dummySignal.push_back(0);
dummySignal.push_back(3);
dummySignal.push_back(2);
dummySignal.push_back(0);
dummySignal.push_back(1);
dummySignal.push_back(2);
dummySignal.push_back(1);
const int nearWindowSize=3;
const int nearIrLength=3;
const int totalLength = 5;
dummyFilter = padded(dummyFilter, totalLength);
vector<complex<double> > dummy_ir_vec = filter_kernel_cpx_vec(dummyFilter);
int localNumSections = 3;
vector<vector<double> > outputs;
for (int j=0; j<localNumSections; j++) {
vector<double> local_section;
for (int i; i<nearWindowSize; i++) {
int idx = j*nearWindowSize + i;
local_section.push_back(dummySignal[idx]);
}
local_section = padded(local_section, totalLength);
vector<double> local_output = convolved_block(local_section, dummy_ir_vec, totalLength);
outputs.push_back(local_output);
}
vector<double> local_results(11,0); // example has 11 in output
for (int j=0; j<localNumSections; j++) {
vector<double> local_totals_arr = outputs[j];
cout << "overlap summing section " << j << "\n";
for (int i=0; i<local_totals_arr.size(); i++) {
int newIdx = j*nearWindowSize+i;
local_results[newIdx] += local_totals_arr[i];
}
}
for (int i=0; i<11; i++) {
cout << "result " << i << "\n";
cout << local_results[i] << "\n";
}
}