每当我使用cuFFT绘制程序获得的值并将结果与Matlab的结果进行比较时,我得到相同形状的图形,并且最大值和最小值的值在相同点处得到。但是,cuFFT产生的值远大于Matlab产生的值。 Matlab代码是
fs = 1000; % sample freq
D = [0:1:4]'; % pulse delay times
t = 0 : 1/fs : 4000/fs; % signal evaluation time
w = 0.5; % width of each pulse
yp = pulstran(t,D,'rectpuls',w);
filt = conj(fliplr(yp));
xx = fft(yp,1024).*fft(filt,1024);
xx = (abs(ifft(xx)));
和具有相同输入的CUDA代码如下:
cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD);
cufftExecC2C(plan, (cufftComplex *)d_filter_signal, (cufftComplex *)d_filter_signal, CUFFT_FORWARD);
ComplexPointwiseMul<<<blocksPerGrid, threadsPerBlock>>>(d_signal, d_filter_signal, NX);
cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_INVERSE);
cuFFT还执行1024
点FFT,批量大小为2
。
如果缩放系数为NX=1024
,则值不正确。请告诉我该怎么做。
答案 0 :(得分:4)
这是从未答复的清单中删除此问题的最新答案。
您没有提供足够的信息来诊断您的问题,因为您缺少指定设置cuFFT计划的方式。您甚至没有指定是否完全相同的形状用于Matlab和cuFFT的信号(因此您只需缩放)或者大约相同的形状。但是,让我提出以下两点意见:
yp
向量包含4000
个元素;与fft(yp,1024)
的那个相反,您通过将信号截断为1024
元素来执行FFT; 为了方便起见(它可能对其他用户有用),我在下面报告一个简单的FFT-IFFT方案,该方案还包括使用CUDA Thrust库执行的缩放。
#include <cufft.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
/*********************/
/* SCALE BY CONSTANT */
/*********************/
class Scale_by_constant
{
private:
float c_;
public:
Scale_by_constant(float c) { c_ = c; };
__host__ __device__ float2 operator()(float2 &a) const
{
float2 output;
output.x = a.x / c_;
output.y = a.y / c_;
return output;
}
};
int main(void){
const int N=4;
// --- Setting up input device vector
thrust::device_vector<float2> d_vec(N,make_cuComplex(1.f,2.f));
cufftHandle plan;
cufftPlan1d(&plan, N, CUFFT_C2C, 1);
// --- Perform in-place direct Fourier transform
cufftExecC2C(plan, thrust::raw_pointer_cast(d_vec.data()),thrust::raw_pointer_cast(d_vec.data()), CUFFT_FORWARD);
// --- Perform in-place inverse Fourier transform
cufftExecC2C(plan, thrust::raw_pointer_cast(d_vec.data()),thrust::raw_pointer_cast(d_vec.data()), CUFFT_INVERSE);
thrust::transform(d_vec.begin(), d_vec.end(), d_vec.begin(), Scale_by_constant((float)(N)));
// --- Setting up output host vector
thrust::host_vector<float2> h_vec(d_vec);
for (int i=0; i<N; i++) printf("Element #%i; Real part = %f; Imaginary part: %f\n",i,h_vec[i].x,h_vec[i].y);
getchar();
}
答案 1 :(得分:4)
通过引入cuFFT回调功能,cuFFT执行的逆FFT所需的归一化可以直接嵌入cufftExecC2C
调用中,方法是将归一化操作定义为__device__
函数。 / p>
除了cuFFT用户指南,有关cuFFT回调功能,请参阅
CUDA Pro Tip: Use cuFFT Callbacks for Custom Data Processing
以下是通过cuFFT回调实现IFFT规范化的示例。
#include <stdio.h>
#include <assert.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cufft.h>
#include <cufftXt.h>
/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
/*********************/
/* CUFFT ERROR CHECK */
/*********************/
// See http://stackoverflow.com/questions/16267149/cufft-error-handling
#ifdef _CUFFT_H_
static const char *_cudaGetErrorEnum(cufftResult error)
{
switch (error)
{
case CUFFT_SUCCESS:
return "CUFFT_SUCCESS";
case CUFFT_INVALID_PLAN:
return "CUFFT_INVALID_PLAN";
case CUFFT_ALLOC_FAILED:
return "CUFFT_ALLOC_FAILED";
case CUFFT_INVALID_TYPE:
return "CUFFT_INVALID_TYPE";
case CUFFT_INVALID_VALUE:
return "CUFFT_INVALID_VALUE";
case CUFFT_INTERNAL_ERROR:
return "CUFFT_INTERNAL_ERROR";
case CUFFT_EXEC_FAILED:
return "CUFFT_EXEC_FAILED";
case CUFFT_SETUP_FAILED:
return "CUFFT_SETUP_FAILED";
case CUFFT_INVALID_SIZE:
return "CUFFT_INVALID_SIZE";
case CUFFT_UNALIGNED_DATA:
return "CUFFT_UNALIGNED_DATA";
}
return "<unknown>";
}
#endif
#define cufftSafeCall(err) __cufftSafeCall(err, __FILE__, __LINE__)
inline void __cufftSafeCall(cufftResult err, const char *file, const int line)
{
if( CUFFT_SUCCESS != err) {
fprintf(stderr, "CUFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n",__FILE__, __LINE__,err, \
_cudaGetErrorEnum(err)); \
cudaDeviceReset(); assert(0); \
}
}
__device__ void IFFT_Scaling(void *dataOut, size_t offset, cufftComplex element, void *callerInfo, void *sharedPtr) {
float *scaling_factor = (float*)callerInfo;
float2 output;
output.x = cuCrealf(element);
output.y = cuCimagf(element);
output.x = output.x / scaling_factor[0];
output.y = output.y / scaling_factor[0];
((float2*)dataOut)[offset] = output;
}
__device__ cufftCallbackStoreC d_storeCallbackPtr = IFFT_Scaling;
/********/
/* MAIN */
/********/
int main() {
const int N = 16;
cufftHandle plan;
float2 *h_input = (float2*)malloc(N*sizeof(float2));
float2 *h_output1 = (float2*)malloc(N*sizeof(float2));
float2 *h_output2 = (float2*)malloc(N*sizeof(float2));
float2 *d_input; gpuErrchk(cudaMalloc((void**)&d_input, N*sizeof(float2)));
float2 *d_output1; gpuErrchk(cudaMalloc((void**)&d_output1, N*sizeof(float2)));
float2 *d_output2; gpuErrchk(cudaMalloc((void**)&d_output2, N*sizeof(float2)));
float *h_scaling_factor = (float*)malloc(sizeof(float));
h_scaling_factor[0] = 16.0f;
float *d_scaling_factor; gpuErrchk(cudaMalloc((void**)&d_scaling_factor, sizeof(float)));
gpuErrchk(cudaMemcpy(d_scaling_factor, h_scaling_factor, sizeof(float), cudaMemcpyHostToDevice));
for (int i=0; i<N; i++) {
h_input[i].x = 1.0f;
h_input[i].y = 0.f;
}
gpuErrchk(cudaMemcpy(d_input, h_input, N*sizeof(float2), cudaMemcpyHostToDevice));
cufftSafeCall(cufftPlan1d(&plan, N, CUFFT_C2C, 1));
cufftSafeCall(cufftExecC2C(plan, d_input, d_output1, CUFFT_FORWARD));
gpuErrchk(cudaMemcpy(h_output1, d_output1, N*sizeof(float2), cudaMemcpyDeviceToHost));
for (int i=0; i<N; i++) printf("Direct transform - %d - (%f, %f)\n", i, h_output1[i].x, h_output1[i].y);
cufftCallbackStoreC h_storeCallbackPtr;
gpuErrchk(cudaMemcpyFromSymbol(&h_storeCallbackPtr, d_storeCallbackPtr, sizeof(h_storeCallbackPtr)));
cufftSafeCall(cufftXtSetCallback(plan, (void **)&h_storeCallbackPtr, CUFFT_CB_ST_COMPLEX, (void **)&d_scaling_factor));
cufftSafeCall(cufftExecC2C(plan, d_output1, d_output2, CUFFT_INVERSE));
gpuErrchk(cudaMemcpy(h_output2, d_output2, N*sizeof(float2), cudaMemcpyDeviceToHost));
for (int i=0; i<N; i++) printf("Inverse transform - %d - (%f, %f)\n", i, h_output2[i].x, h_output2[i].y);
cufftSafeCall(cufftDestroy(plan));
gpuErrchk(cudaFree(d_input));
gpuErrchk(cudaFree(d_output1));
gpuErrchk(cudaFree(d_output2));
return 0;
}
修改强>
&#34;时刻&#34;执行回调操作由CUFFT_CB_ST_COMPLEX
调用cufftXtSetCallback
中指定。请注意,您可以使用相同的cuFFT计划加载和存储回调。