Question

我试过做一个简单的fft，并在2d数组上比较MATLAB和CUDA之间的结果。

MATLAB： 9个数字的数组1-9

I = [1 2 3
4 5 6
7 8 9];

并使用此代码：

fft(I)

给出结果：

 12.0000 + 0.0000i  15.0000 + 0.0000i  18.0000 + 0.0000i
  -4.5000 + 2.5981i  -4.5000 + 2.5981i  -4.5000 + 2.5981i
  -4.5000 - 2.5981i  -4.5000 - 2.5981i  -4.5000 - 2.5981i

和CUDA代码：

int FFT_Test_Function() {

int width = 3;
int height = 3;
    int n = width * height;

double in[width][height];
Complex out[width][height];

for (int i = 0; i<width; i++)
{
    for (int j = 0; j < height; j++)
    {
        in[i][j] = (i * width) + j + 1;
    }
}

    // Allocate the buffer
    cufftDoubleReal *d_in;
    cufftDoubleComplex *d_out;
    unsigned int out_mem_size = sizeof(cufftDoubleComplex)*n;
    unsigned int in_mem_size = sizeof(cufftDoubleReal)*n;
    cudaMalloc((void **)&d_in, in_mem_size);
    cudaMalloc((void **)&d_out, out_mem_size);

    // Save time stamp
    milliseconds timeStart = getCurrentTimeStamp();

    cufftHandle plan;
    cufftResult res = cufftPlan2d(&plan, width, height, CUFFT_D2Z);
    if (res != CUFFT_SUCCESS) { cout << "cufft plan error: " << res << endl; return 1; }
    cudaCheckErrors("cuda malloc fail");

for (int i = 0; i < width; i++)
{
    cudaMemcpy(d_in + (i * width), &in[i], height * sizeof(double), cudaMemcpyHostToDevice);
    cudaCheckErrors("cuda memcpy H2D fail");
}
    cudaCheckErrors("cuda memcpy H2D fail");

    res = cufftExecD2Z(plan, d_in, d_out);
    if (res != CUFFT_SUCCESS) { cout << "cufft exec error: " << res << endl; return 1; }
for (int i = 0; i < width; i++)
{
    cudaMemcpy(&out[i], d_out + (i * width), height * sizeof(Complex), cudaMemcpyDeviceToHost);
    cudaCheckErrors("cuda memcpy H2D fail");
}
    cudaCheckErrors("cuda memcpy D2H fail");

    milliseconds timeEnd = getCurrentTimeStamp();
    milliseconds totalTime = timeEnd - timeStart;
    std::cout << "Total time: " << totalTime.count() << std::endl;

    return 0;
}

在这个CUDA代码中我得到了结果：

你可以看到CUDA给出了不同的结果。

我错过了什么？

非常感谢你的关注！

Answer 1

cuFFT结果看起来正确，但您的FFT代码错误 - 应该是：

octave:1> I = [ 1 2 3; 4 5 6; 7 8 9 ]
I =

   1   2   3
   4   5   6
   7   8   9

octave:2> fft2(I)
ans =

   45.00000 +  0.00000i   -4.50000 +  2.59808i   -4.50000 -  2.59808i
  -13.50000 +  7.79423i    0.00000 +  0.00000i    0.00000 +  0.00000i
  -13.50000 -  7.79423i    0.00000 -  0.00000i    0.00000 -  0.00000i

请注意使用fft2。

CUDA fft 2d与2d的MATLAB fft不同

1 个答案: