我正在尝试使用FFTW的Wisdom功能来更快地创建FFTW计划。智慧似乎正确地保存并正确加载,但它并没有加快后续运行的创建速度。此外,如果我尝试使用WISDOM_ONLY标志创建计划,它会返回一个NULL计划,让我知道即使加载智慧的调用工作正常,也无法用它来制定计划。
如果我多次运行以下代码,第一次无法加载智慧文件(因为它不存在),并且在后续运行中它会报告它已正确加载。在每次运行时,首先或之后,创建FFTW计划需要相同的时间。
#include <Windows.h>
#include <omp.h>
#include "fftw3.h"
#define THREADS 4
int main ()
{
LARGE_INTEGER li, li2;
__int64 init, run;
double freq;
int Xfft = 384;
int Yfft = 384;
int iterations = 1000;
fftwf_plan pFft[THREADS];
QueryPerformanceFrequency ( &li );
freq = (double)li.QuadPart / 1000000.0;
// Malloc buffer
fftwf_complex* arr[THREADS];
for ( int i = 0; i < THREADS; i++)
{
arr[i] = (fftwf_complex*) fftwf_malloc ( sizeof(fftwf_complex) * Xfft * Yfft );
}
// Setup FFT plan
QueryPerformanceCounter ( &li );
if (fftw_import_wisdom_from_filename("fftw.wis") == 1 )
{
printf("SUCCESS!\n");
}
else
{
printf("FAILURE!\n");
}
for ( int i = 0; i < THREADS; i++ )
{
pFft[i] = fftwf_plan_dft_2d ( Xfft, Yfft, arr[i], arr[i], FFTW_FORWARD, FFTW_PATIENT );
}
fftw_export_wisdom_to_filename("fftw.wis");
QueryPerformanceCounter ( &li2 );
init = li2.QuadPart - li.QuadPart;
// Init array to random numbers
for ( int i = 0; i < THREADS; i++ )
for ( int j = 0; j < Xfft * Yfft; j++ )
{
arr[i][j][0] = (float)rand();
arr[i][j][1] = (float)rand();
}
omp_set_num_threads(THREADS);
// Perform FFTs
QueryPerformanceCounter ( &li );
#pragma omp parallel
{
int ID = omp_get_thread_num();
#pragma omp for
for ( int i = 0; i < iterations; i++ )
{
fftwf_execute ( pFft[ID] );
}
}
QueryPerformanceCounter ( &li2 );
run = li2.QuadPart - li.QuadPart;
// Free Buffer / destroy plan
for ( int i = 0; i < THREADS; i++ )
{
fftwf_destroy_plan ( pFft[i] );
fftwf_free ( arr[i] );
}
printf ( "Plan creation time: %6.4f microseconds\n", (double)init / freq );
printf ( "FFT execution time: %6.4f microseconds\n", (double)run / freq / (double)iterations );
system( "pause" );
return EXIT_SUCCESS;
}