我的cuda fft代码有什么问题?

时间:2019-12-12 02:29:46

标签: c++ visual-studio-2010 cuda fft cufft

我一直在努力尝试一个简单的2d cufft示例。 这是我的代码的说明。

  1. 将4x4矩阵读取为16x1向量
  2. 制作cufftPlan
  3. 执行cufftMalloc,cufftMemcpy
  4. 执行2d fft
  5. 从GPU读取输出数据
  6. 显示输出数据

我试图在带有Cuda 8.0的VS2010上构建代码,但它显示了此消息。

kernel.cu.obj : error LNK2019: unresolved external symbol _cufftDestroy@4 referenced in function _main
kernel.cu.obj : error LNK2019: unresolved external symbol _cufftExecC2C@16 referenced in function _main
kernel.cu.obj : error LNK2019: unresolved external symbol _cufftPlan2d@16 referenced in function _main

包括所有头文件和 我猜cufftDestroy,cufftExecC2C,cufftPlan2d的cufftHandle'plan_input'是问题。 你们能告诉我代码中的问题吗?...

#include <stdlib.h>
#include <stdio.h>

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cuda.h>
#include <cufft.h>
#include <cmath>

#include <iostream>
#include <fstream>
#include <string>

using namespace std;
typedef float2 Complex;

int NX, NY;

cufftHandle plan_input;
float2* ginput;
float2* goutput;

cufftResult result;
cudaError ErrorCode;

cufftComplex *idata;
cufftComplex *odata;

int main()
{
    int NX = 4;
    int NY = 4;
    int count;
    const int NUM_element = 16;

    ginput = new float2[NUM_element];
    goutput = new float2[NUM_element];

    // read File
    ifstream inputFile;
    inputFile.open("array.txt");

    count = 0;
    while (!inputFile.eof())
    {
        inputFile >> ginput[count].x;
        ginput[count].y = 0;
        count++;
    }
    inputFile.close();


    // Main process
    //------------------------------Plan-----------------------------//
    result = cufftPlan2d(&plan_input, NX, NY, CUFFT_C2C);
    if (result != CUFFT_SUCCESS)
        return result;  

    //------------Memory Allocation for input/output-----------------//
    ErrorCode = cudaMalloc((void**)&idata, sizeof(cufftComplex)*NX*NY);
    if (ErrorCode != cudaSuccess){
        fprintf(stderr, "Cuda error: Failed to allocate input\n");
        return ErrorCode;   
    }

    ErrorCode = cudaMalloc((void**)&odata, sizeof(cufftComplex)*NX*NY);
    if (ErrorCode != cudaSuccess){
        fprintf(stderr, "Cuda error: Failed to allocate output\n");
        return ErrorCode;   
    }

    size_t sizeComplex = sizeof(cufftComplex);
    ErrorCode = cudaMemcpy(idata, (cufftComplex *)ginput, sizeComplex*NX*NY, cudaMemcpyHostToDevice);
    if (ErrorCode != cudaSuccess){
        fprintf(stderr, "Cuda error: Failed to cudaMemcpy\n");
        return ErrorCode;
    }
    //--------------------------2-D FFT Z2Z--------------------------//
    result = cufftExecC2C(plan_input, idata, odata, CUFFT_FORWARD);
    if (result != CUFFT_SUCCESS){
        return result;      
    }


    ErrorCode = cudaMemcpy((cufftComplex *)goutput, odata, sizeComplex*NX*NY, cudaMemcpyDeviceToHost);
    if (ErrorCode != cudaSuccess){
        fprintf(stderr, "Cuda error: Failed to cudaMemcpy\n");
        return ErrorCode;
    }
    /////////////////////// destroy and free memonry ///////////////////
    ErrorCode = cudaFree(idata); 
    if( ErrorCode != cudaSuccess)
    {
        return ErrorCode;
    }

    ErrorCode = cudaFree(odata);
    if( ErrorCode != cudaSuccess)
    {
        return ErrorCode;
    }

    cufftDestroy(plan_input);

    //////////////// show result //////////////////
    for(count = 0; count < NUM_element; count++)
        cout << goutput[count].x << endl;

    return 0;
}

0 个答案:

没有答案