我试过做一个简单的fft,并在2d数组上比较MATLAB和CUDA之间的结果。
MATLAB: 9个数字的数组1-9
I = [1 2 3
4 5 6
7 8 9];
并使用此代码:
fft(I)
给出结果:
12.0000 + 0.0000i 15.0000 + 0.0000i 18.0000 + 0.0000i
-4.5000 + 2.5981i -4.5000 + 2.5981i -4.5000 + 2.5981i
-4.5000 - 2.5981i -4.5000 - 2.5981i -4.5000 - 2.5981i
和CUDA代码:
int FFT_Test_Function() {
int width = 3;
int height = 3;
int n = width * height;
double in[width][height];
Complex out[width][height];
for (int i = 0; i<width; i++)
{
for (int j = 0; j < height; j++)
{
in[i][j] = (i * width) + j + 1;
}
}
// Allocate the buffer
cufftDoubleReal *d_in;
cufftDoubleComplex *d_out;
unsigned int out_mem_size = sizeof(cufftDoubleComplex)*n;
unsigned int in_mem_size = sizeof(cufftDoubleReal)*n;
cudaMalloc((void **)&d_in, in_mem_size);
cudaMalloc((void **)&d_out, out_mem_size);
// Save time stamp
milliseconds timeStart = getCurrentTimeStamp();
cufftHandle plan;
cufftResult res = cufftPlan2d(&plan, width, height, CUFFT_D2Z);
if (res != CUFFT_SUCCESS) { cout << "cufft plan error: " << res << endl; return 1; }
cudaCheckErrors("cuda malloc fail");
for (int i = 0; i < width; i++)
{
cudaMemcpy(d_in + (i * width), &in[i], height * sizeof(double), cudaMemcpyHostToDevice);
cudaCheckErrors("cuda memcpy H2D fail");
}
cudaCheckErrors("cuda memcpy H2D fail");
res = cufftExecD2Z(plan, d_in, d_out);
if (res != CUFFT_SUCCESS) { cout << "cufft exec error: " << res << endl; return 1; }
for (int i = 0; i < width; i++)
{
cudaMemcpy(&out[i], d_out + (i * width), height * sizeof(Complex), cudaMemcpyDeviceToHost);
cudaCheckErrors("cuda memcpy H2D fail");
}
cudaCheckErrors("cuda memcpy D2H fail");
milliseconds timeEnd = getCurrentTimeStamp();
milliseconds totalTime = timeEnd - timeStart;
std::cout << "Total time: " << totalTime.count() << std::endl;
return 0;
}
在这个CUDA代码中我得到了结果:
我错过了什么?
非常感谢你的关注!
答案 0 :(得分:4)
cuFFT结果看起来正确,但您的FFT代码错误 - 应该是:
octave:1> I = [ 1 2 3; 4 5 6; 7 8 9 ]
I =
1 2 3
4 5 6
7 8 9
octave:2> fft2(I)
ans =
45.00000 + 0.00000i -4.50000 + 2.59808i -4.50000 - 2.59808i
-13.50000 + 7.79423i 0.00000 + 0.00000i 0.00000 + 0.00000i
-13.50000 - 7.79423i 0.00000 - 0.00000i 0.00000 - 0.00000i
请注意使用fft2
。