我对这个程序有疑问:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cufft.h>
#include <cuComplex.h>
#define SIGNAL_SIZE 1024
int main(int argc, char **argv) {
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Allocate host memory for the signal
cuDoubleComplex *h_signal = (cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);
// Initalize the memory for the signal
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
if((double)i/SIGNAL_SIZE>=0 && (double)i/SIGNAL_SIZE<0.5) h_signal[i].x = (double)i/SIGNAL_SIZE;
else if((double)i/SIGNAL_SIZE>=0.5 && (double)i/SIGNAL_SIZE<1) h_signal[i].x = (double)i/SIGNAL_SIZE-1;
h_signal[i].y = 0;
}
// Allocate device memory for signal
cuDoubleComplex *d_signal;
cudaMalloc((void **) &d_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex));
// Copy host memory to device
cudaMemcpy(d_signal, h_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaEventRecord(start, 0);
cufftHandle plan;
cufftPlan1d(&plan, SIGNAL_SIZE , CUFFT_C2C, 1);
// FFT computation
cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal,
CUFFT_FORWARD);
cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal, CUFFT_INVERSE);
cuDoubleComplex *h_signal_inv =(cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);
cudaMemcpy(h_signal_inv, d_signal, sizeof(cuDoubleComplex) * SIGNAL_SIZE, cudaMemcpyDeviceToHost);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start, stop);
printf("Elapsed Time: %3.1f ms\n", elapsedTime);
for(int i=0;i<SIGNAL_SIZE;i++) printf("\n%f %f", h_signal[i].x, h_signal_inv[i].x);
cufftDestroy(plan);
free(h_signal);
free(h_signal_inv);
cudaFree(d_signal);
cudaDeviceReset();
return 0;
}
我想转换一个信号,然后反过来回来,但是上半年的输出是错误的。
你能帮我找到错误吗?
非常感谢!
答案 0 :(得分:2)
您的数据类型很混乱。
cufftDoubleComplex
与cufftComplex
不同。使用cufftDoubleComplex
,your transform type should be Z2Z, not C2C。
另外,为了在进行正向变换后使用CUFFT进行逆变换时看到数据奇偶校验,它是necessary to divide the result by the signal size:
cuFFT执行非标准化FFT;也就是说,对输入数据集执行前向FFT,然后对得到的集合进行逆FFT,产生等于输入的数据,按元素数量进行缩放。通过数据集大小的倒数来缩放变换,留给用户按照适合的方式执行。
以下代码解决了上述问题,应该会给您带来更好的结果:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cufft.h>
#include <cuComplex.h>
#define SIGNAL_SIZE 1024
int main(int argc, char **argv) {
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Allocate host memory for the signal
cuDoubleComplex *h_signal = (cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);
// Initalize the memory for the signal
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
if((double)i/SIGNAL_SIZE>=0 && (double)i/SIGNAL_SIZE<0.5) h_signal[i].x = (double)i/SIGNAL_SIZE;
else if((double)i/SIGNAL_SIZE>=0.5 && (double)i/SIGNAL_SIZE<1) h_signal[i].x = (double)i/SIGNAL_SIZE-1;
h_signal[i].y = 0;
}
// Allocate device memory for signal
cuDoubleComplex *d_signal;
cudaMalloc((void **) &d_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex));
// Copy host memory to device
cudaMemcpy(d_signal, h_signal, SIGNAL_SIZE*sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cudaEventRecord(start, 0);
cufftHandle plan;
cufftPlan1d(&plan, SIGNAL_SIZE , CUFFT_Z2Z, 1);
// FFT computation
cufftExecZ2Z(plan, d_signal, d_signal, CUFFT_FORWARD);
cufftExecZ2Z(plan, d_signal, d_signal, CUFFT_INVERSE);
cuDoubleComplex *h_signal_inv =(cuDoubleComplex *) malloc(sizeof(cuDoubleComplex) * SIGNAL_SIZE);
cudaMemcpy(h_signal_inv, d_signal, sizeof(cuDoubleComplex) * SIGNAL_SIZE, cudaMemcpyDeviceToHost);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start, stop);
printf("Elapsed Time: %3.1f ms\n", elapsedTime);
for(int i=0;i<SIGNAL_SIZE;i++) printf("\n%f %f", h_signal[i].x, h_signal_inv[i].x/SIGNAL_SIZE);
cufftDestroy(plan);
free(h_signal);
free(h_signal_inv);
cudaFree(d_signal);
cudaDeviceReset();
return 0;
}