我有300,000分的数组,我想要每600分的fft。我正在尝试使用cufftPlanMany来执行,但我在这里收到一个未知错误:
cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, 1,1, CUFFT_C2C, 500));
retrevialfft.cu(82) : cufftSafeCall() CUFFT error: <unknown>
这是上下文中的代码
cudaSetDevice(0);
// Allocate host memory for the signal
cufftComplex* h_signal=(cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);
// Initalize the memory for the signal
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
h_signal[i].x = rand() / (float)RAND_MAX;
h_signal[i].y = 0;
// printf("Orignal: %f %f \n", h_signal[i].x, h_signal[i].y);
}
int mem_size = sizeof(cufftComplex) * SIGNAL_SIZE;
// Allocate device memory for signal
cufftComplex* d_signal;
cudaMalloc((void**)&d_signal, mem_size);
int rank = 1; //1d plan
int numCols = 300000;
int n[] = {numCols};
int batch = 500;
int istride = 1;
int ostride = 1;
int idist = numCols;
// CUFFT plan
cufftHandle plan;
cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, 1,1, CUFFT_C2C, 500));
// Transform signal
printf("Transforming signal cufftExecC2C\n");
cufftSafeCall(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD));
// Copy device memory to host
cufftComplex* h_transformed = (cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);;
cudaMemcpy(h_transformed, d_signal, mem_size,
cudaMemcpyDeviceToHost);
//Destroy CUFFT context
cufftDestroy(plan);
// cleanup memory
free(h_signal);
free(h_transformed);
cudaFree(d_signal);
cudaDeviceReset();
知道错误究竟是什么?
答案 0 :(得分:1)
您决定不再显示有关您问题的详细信息。下面,我使用cufftPlanMany()
提供完整的工作代码来执行批量1D FFT。我希望它有所帮助。
#include <stdio.h>
#include <stdlib.h>
#include <cufft.h>
#include <assert.h>
/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) { getchar(); exit(code); }
}
}
/*********************/
/* CUFFT ERROR CHECK */
/*********************/
static const char *_cudaGetErrorEnum(cufftResult error)
{
switch (error)
{
case CUFFT_SUCCESS:
return "CUFFT_SUCCESS";
case CUFFT_INVALID_PLAN:
return "CUFFT_INVALID_PLAN";
case CUFFT_ALLOC_FAILED:
return "CUFFT_ALLOC_FAILED";
case CUFFT_INVALID_TYPE:
return "CUFFT_INVALID_TYPE";
case CUFFT_INVALID_VALUE:
return "CUFFT_INVALID_VALUE";
case CUFFT_INTERNAL_ERROR:
return "CUFFT_INTERNAL_ERROR";
case CUFFT_EXEC_FAILED:
return "CUFFT_EXEC_FAILED";
case CUFFT_SETUP_FAILED:
return "CUFFT_SETUP_FAILED";
case CUFFT_INVALID_SIZE:
return "CUFFT_INVALID_SIZE";
case CUFFT_UNALIGNED_DATA:
return "CUFFT_UNALIGNED_DATA";
}
return "<unknown>";
}
#define cufftSafeCall(err) __cufftSafeCall(err, __FILE__, __LINE__)
inline void __cufftSafeCall(cufftResult err, const char *file, const int line)
{
if( CUFFT_SUCCESS != err) {
fprintf(stderr, "CUFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n",__FILE__, __LINE__,err, \
_cudaGetErrorEnum(err)); \
cudaDeviceReset(); assert(0); \
}
}
/********/
/* MAIN */
/********/
void main() {
int batch = 3; // --- How many transforms to be performed
int numCols = 16; // --- Size of each transform
int SIGNAL_SIZE = batch * numCols; // --- Overall size for all the signals
// --- Allocate host memory for all the signals
cufftComplex* h_signal=(cufftComplex*)malloc(sizeof(cufftComplex) * SIGNAL_SIZE);
// --- Initalize host memory for all the signals
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
h_signal[i].x = 1.f;
h_signal[i].y = 0.f;
}
// --- Allocate device memory for all the signals
cufftComplex* d_signal; gpuErrchk(cudaMalloc((void**)&d_signal, sizeof(cufftComplex) * SIGNAL_SIZE));
// --- Host to Device memcopy
gpuErrchk(cudaMemcpy(d_signal, h_signal, sizeof(cufftComplex) * SIGNAL_SIZE, cudaMemcpyHostToDevice));
int rank = 1; // --- 1d plan
int n[] = {numCols};
int istride = 1;
int ostride = 1;
int idist = numCols;
int odist = numCols;
// --- CUFFT plan
cufftHandle plan;
cufftSafeCall(cufftPlanMany(&plan, rank, n, NULL, istride, idist, NULL, ostride, odist, CUFFT_C2C, 500));
// --- Signals transformations
cufftSafeCall(cufftExecC2C(plan, (cufftComplex*)d_signal, (cufftComplex*)d_signal, CUFFT_FORWARD));
// --- Device to Host memcopy
gpuErrchk(cudaMemcpy(h_signal, d_signal, sizeof(cufftComplex) * SIGNAL_SIZE, cudaMemcpyDeviceToHost));
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) printf("Real part = %f; Imaginar part = %f\n", h_signal[i].x, h_signal[i].y);
// --- Destroy CUFFT context
cufftSafeCall(cufftDestroy(plan));
// --- Memory cleanup
free(h_signal);
gpuErrchk(cudaFree(d_signal));
cudaDeviceReset();
}