使用简单的FFT 1D OpenCL示例会导致错误的结果

时间:2013-11-29 11:59:29

标签: opencl gpu fft amd-processor

我尝试用clAmdFft OpenCL libray做一个简单的FFT 1D。为此,我使用频率等于10 Hz的cosinus信号,采样频率为sizex * frequency_signal,sizex为采样点数。

在我的结果中,我无法在f = 10 Hz时产生dirac冲击。对于第一个频率(k * f_sampling / sizex,其中k = 0到sizex),我有以下输出(第1列:k * f_sampling / size x和第2列:X_k)

0.000000 -5.169492e-06
10.000000 -5.169449e-06
20.000000 2.498745e-02
30.000000 2.499508e-02
40.000000 2.832322e-06
50.000000 9.587315e-07
60.000000 4.648561e-07
70.000000 9.241408e-08
80.000000 1.400218e-07
90.000000 3.699876e-07
100.000000 2.663564e-07
110.000000 2.523218e-07
120.000000 1.631196e-07
130.000000 7.783362e-08
140.000000 7.932793e-08
150.000000 9.296434e-08
160.000000 1.039581e-07
170.000000 7.396823e-08
180.000000 6.584698e-08
190.000000 1.468647e-08
200.000000 1.694581e-08
210.000000 3.367836e-08
220.000000 3.538253e-08
...

正如你所看到的,似乎有一个dirac(我说因为“e-02”值与其他值相比,但我不确定)在f = 20 Hz和f = 30 Hz之间不是预期的10赫兹。

以下是代码:

#include "clAmdFft.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

/////////////////////////////////////
// OpenCL FFT 1D function ///////////
/////////////////////////////////////

int FftOpenCL(float *tab, const char* direction, int Ng)
{
 cl_int err;
    cl_platform_id platform = 0;
    cl_device_id device = 0;
    cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
    cl_context ctx = 0;
    cl_command_queue queue = 0;
    cl_mem bufX;
    int ret = 0;
    size_t N = Ng/2;

    /* FFT library realted declarations */
    clAmdFftPlanHandle planHandle;
    clAmdFftDim dim = CLFFT_1D;
    size_t clLengths[1] = {N};

    /* Setup OpenCL environment. */
    err = clGetPlatformIDs( 1, &platform, NULL );
    err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL );

    props[1] = (cl_context_properties)platform;
    ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
    queue = clCreateCommandQueue( ctx, device, 0, &err );

    /* Setup clFFT. */
    clAmdFftSetupData fftSetup;
    err = clAmdFftInitSetupData(&fftSetup);
    err = clAmdFftSetup(&fftSetup);

    /* Prepare OpenCL memory objects and place data inside them. */
    bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, N * 2 * sizeof(float), NULL, &err );

    err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0,
    N * 2 * sizeof(float), tab, 0, NULL, NULL );

    /* Create a default plan for a complex FFT. */
    err = clAmdFftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);

    /* Set plan parameters. */
    err = clAmdFftSetPlanPrecision(planHandle, CLFFT_SINGLE);
    err = clAmdFftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
    err = clAmdFftSetResultLocation(planHandle, CLFFT_INPLACE);

    /* Bake the plan. */
    err = clAmdFftBakePlan(planHandle, 1, &queue, NULL, NULL);


    if(strcmp(direction,"forward") == 0) 
    {
    /* Execute the plan. */
    err = clAmdFftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);
    }

    /* Wait for calculations to be finished. */
    err = clFinish(queue);

    /* Fetch results of calculations. */
    err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, N * 2 * sizeof(float), tab, 0, NULL, NULL );

    /* Release OpenCL memory objects. */
    clReleaseMemObject( bufX );

    /* Release the plan. */
    err = clAmdFftDestroyPlan( &planHandle );

    /* Release clFFT library. */
    clAmdFftTeardown( );

    /* Release OpenCL working objects. */
    clReleaseCommandQueue( queue );
    clReleaseContext( ctx );

    return ret;

}


int main(void) {

    int i;

    // temporal array
    float *x_temp;

    // signal array and FFT output array
    float *Array;

    // number of sampling points
    int sizex = 10000;

    float h = 0;

    // signal frequency
    float frequency_signal = 10;

    // size x points between 0 and T_signal
    float frequency_sampling = sizex*frequency_signal;

    // step = T_sampling
    float step = 1.0/frequency_sampling;

    FILE *Fft1D;

    // Allocation of Array

    Array = (float*) malloc(sizex*sizeof(float));
    x_temp = (float*) malloc(sizex*sizeof(float));

    // Initialization of 1D ArrayInput

    for(i=0; i<sizex; i++)
    {
        Array[i] = cos(2*M_PI*frequency_signal*h);
        x_temp[i] = h; 
        h = h + step;
    }

    if (FftOpenCL(Array,"forward", sizex) == 0)
        printf("Fft passed !\n");;

    // Printf ArrayOutput - Multiply FFT Array result by period T_sampling

    Fft1D = fopen("Fft1D.dat","w");

    for(i=0; i<sizex; i++)
        fprintf(Fft1D,"%f %e\n", i*frequency_sampling/sizex, 1.0/frequency_sampling*Array[i]);

    fclose(Fft1D);

    return 0;

}

任何人都可以看到什么问题?

由于

1 个答案:

答案 0 :(得分:0)

看起来真实和复杂数组之间存在类型不匹配。假设COMPLEX_INTERLEAVED表示(re,im)值对,则需要分配数组 2 * sizex ,并初始化:

array[2*i  ] = cos(...);
array[2*i+1] = 0.0f;

或者将输入类型更改为real。