我有以下代码在进行逆FFT时有错误。正向FFT工作,因为我打印输出并验证它。但反过来似乎没有。有任何想法吗?看起来我错过了一个概念吗?
代码 - http://pastebin.com/iZYtdcqR
编辑 - 我基本上重写了CUDA工具包示例附带的代码。我试图使用FFT但使用修改后的算法(实际上是DIF)执行卷积。
EDIT2 - 为问题提供代码。
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda_runtime.h>
#include <cufft.h>
typedef enum signaltype {REAL, COMPLEX} signal;
typedef float2 Complex;
void
printData(Complex *a, int size, char *msg) {
if (msg == "") printf("\n");
else printf("%s\n", msg);
for (int i = 0; i < size; i++)
printf("%f %f\n", a[i].x, a[i].y);
}
void
normData(Complex *a, int size, float norm) {
for (int i = 0; i < size; i++) {
a[i].x /= norm;
a[i].y /= norm;
}
}
void
randomFill(Complex *h_signal, int size, int flag) {
// Real signal.
if (flag == REAL) {
for (int i = 0; i < size; i++) {
h_signal[i].x = rand() / (float) RAND_MAX;
h_signal[i].y = 0;
}
}
}
// FFT a signal that's on the _DEVICE_.
void
signalFFT(Complex *d_signal, int signal_size) {
cufftHandle plan;
if (cufftPlan1d(&plan, signal_size, CUFFT_C2C, 1) != CUFFT_SUCCESS) {
printf("Failed to plan FFT\n");
exit(0);
}
// Execute the plan.
if (cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal, CUFFT_FORWARD) != CUFFT_SUCCESS) {
printf ("Failed Executing FFT\n");
exit(0);
}
}
void
signalIFFT(Complex *d_signal, int signal_size) {
cufftHandle plan;
if (cufftPlan1d(&plan, signal_size, CUFFT_C2C, 1) != CUFFT_SUCCESS) {
printf("Failed to plan IFFT\n");
exit(0);
}
// Execute the plan.
if (cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal, CUFFT_INVERSE) != CUFFT_SUCCESS) {
printf ("Failed Executing IFFT\n");
exit(0);
}
}
int main()
{
Complex *h_signal, *d_signal1;
int alloc_size, i;
alloc_size = 16;
// Kernel Block and Grid Size.
const dim3 blockSize(16, 16, 1);
const dim3 gridSize(alloc_size / 16 + 1, alloc_size / 16 + 1, 1);
h_signal = (Complex *) malloc(sizeof(Complex) * alloc_size);
cudaMalloc(&d_signal1, sizeof(Complex) * alloc_size);
if (cudaGetLastError() != cudaSuccess){
printf("Cuda error: Failed to allocate\n");
exit(0);
}
//cudaMalloc(&d_signal2, sizeof(Complex) * alloc_size);
// Add random data to signal.
randomFill(h_signal, alloc_size, REAL);
printData(h_signal, alloc_size, "Random H1");
cudaMemcpy(d_signal1, h_signal, sizeof(Complex) * alloc_size, cudaMemcpyHostToDevice);
signalFFT(d_signal1, alloc_size);
signalIFFT(d_signal1, alloc_size);
cudaDeviceSynchronize();
cudaMemcpy(h_signal, d_signal1, sizeof(Complex) * alloc_size, cudaMemcpyDeviceToHost);
printData(h_signal, alloc_size, "IFFT");
return 0;
}
答案 0 :(得分:5)
写一个好问题的建议:
另一个注意事项:
现在关于你的问题,我运行了你的代码并获得了这样的结果:
Random H1
0.840188 0.000000
0.394383 0.000000
0.783099 0.000000
0.798440 0.000000
0.911647 0.000000
0.197551 0.000000
0.335223 0.000000
0.768230 0.000000
0.277775 0.000000
0.553970 0.000000
0.477397 0.000000
0.628871 0.000000
0.364784 0.000000
0.513401 0.000000
0.952230 0.000000
0.916195 0.000000
IFFT
13.443005 0.000000
6.310127 -0.000000
12.529589 0.000000
12.775041 0.000000
14.586359 -0.000000
3.160823 0.000000
5.363565 0.000000
12.291674 -0.000000
4.444397 -0.000000
8.863521 0.000000
7.638353 0.000000
10.061934 -0.000000
5.836554 0.000000
8.214415 -0.000000
15.235678 -0.000000
14.659121 -0.000000
您的代码中似乎唯一缺少的是您没有将结果除以转换的长度(在本例中为16)以获取原始数据(如示例代码{{中所示) 3}})。当我这样做时,我得到了我认为的预期结果:
Random H1
0.840188 0.000000
0.394383 0.000000
0.783099 0.000000
0.798440 0.000000
0.911647 0.000000
0.197551 0.000000
0.335223 0.000000
0.768230 0.000000
0.277775 0.000000
0.553970 0.000000
0.477397 0.000000
0.628871 0.000000
0.364784 0.000000
0.513401 0.000000
0.952230 0.000000
0.916195 0.000000
IFFT
0.840188 0.000000
0.394383 -0.000000
0.783099 0.000000
0.798440 0.000000
0.911647 -0.000000
0.197551 0.000000
0.335223 0.000000
0.768230 -0.000000
0.277775 -0.000000
0.553970 0.000000
0.477397 0.000000
0.628871 -0.000000
0.364785 0.000000
0.513401 -0.000000
0.952230 -0.000000
0.916195 -0.000000
顺便说一句,感谢您提供完整的,可编译的代码示例。