OpenCL内存传输问题(错误代码-6)

时间:2014-07-19 01:21:44

标签: opencl global nvidia allocation

我在里面死了一点。我一整天都在努力,但无济于事。我在运行一些以前运行良好的代码时出现问题,所以我写了一个简短的玩具" OpenCL程序试图弄清楚发生了什么,但我的玩具程序让我感到困惑和令人难以置信的沮丧。

我正在使用具有3Gb全局内存的Nvidia 780i。它的最大分配量约为780 Mb。首先,当我故意过度分配时,它不会出错。解决了(它是印刷的,但编译器/分析器并没有抓住它)。现在,即使尝试在设备应该能够处理的范围之下分配WAY,我在第二个大缓冲区分配上得到-6(CL_OUT_OF_HOST_MEMORY)的错误代码。

我一直在研究这个错误,我无法跟踪它在这种情况下的应用方式。在我使用的机器上有32 gb的内存,所以那里肯定不缺。我认为那里有一些我不理解的事情。

它将分配第一个缓冲区,但随后会阻塞第二个缓冲区。我基本上只能分配几乎我想要和需要的全局内存量。

非常感谢任何帮助。如果你可以帮我解决这个问题而且你在洛杉矶附近,我会带你出去喝酒。那是多么沮丧。

我的机器的代码和输出如下。

谢谢, 约翰

主程序:

#define _CRT_SECURE_NO_WARNINGS
#define PROGRAM_FILE "kernels.cl"
#define KERNEL_NAME "test"

#include <CL/cl.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>


#define N_PROJ 4000
#define N_CHANNELS 736
#define N_ROWS 32


/* Find a GPU or CPU associated with the first available platform */
cl_device_id create_device() {

    cl_platform_id platform;
    cl_device_id dev;
    int err;

    /* Identify a platform */
    err = clGetPlatformIDs(1, &platform, NULL);
    if(err < 0) {
        perror("Couldn't identify a platform");
        exit(1);
    }

    /* Access a device */
    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev, NULL);
    if(err == CL_DEVICE_NOT_FOUND) {
        perror("Just a heads up: I'm not going to run on the GPU");
        err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL);
    }
    if(err < 0) {
        perror("Couldn't access any devices");
        exit(1);
    }
    cl_ulong16 alloc_size,mem_size;
    char name[40];

    clGetDeviceInfo(dev,CL_DEVICE_MAX_MEM_ALLOC_SIZE,sizeof(cl_ulong16),&alloc_size,NULL);
    clGetDeviceInfo(dev,CL_DEVICE_NAME,sizeof(name),name,NULL);
    clGetDeviceInfo(dev,CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(cl_ulong16),&mem_size,NULL);
    printf("Using device: %s\n",name);
    printf("Global memory size: %lu\n",mem_size);
    printf("Max. allocation: %lu\n",alloc_size);

    return dev;
}

/* Create program from a file and compile it */
cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {

   cl_program program;
   FILE *program_handle;
   char *program_buffer, *program_log;
   size_t program_size, log_size;
   int err;

   /* Read program file and place content into buffer */
   program_handle = fopen(filename, "r");
   if(program_handle == NULL) {
      perror("Couldn't find the program file");
      exit(1);
   }
   fseek(program_handle, 0, SEEK_END);
   program_size = ftell(program_handle)-13;
   rewind(program_handle);
   program_buffer = (char*)malloc(program_size + 1);
   program_buffer[program_size] = '\0';
   fread(program_buffer, sizeof(char), program_size, program_handle);
   fclose(program_handle);

   /* Create program from file */
   program = clCreateProgramWithSource(ctx, 1,
      (const char**)&program_buffer, &program_size, &err);
   if(err < 0) {
      perror("Couldn't create the program");
      exit(1);
   }
   free(program_buffer);

   /* Build program */
   err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
   if(err < 0) {

      /* Find size of log and print to std output */
      clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
            0, NULL, &log_size);
      program_log = (char*) malloc(log_size + 1);
      program_log[log_size] = '\0';
      clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
            log_size + 1, program_log, NULL);
      printf("%s\n", program_log);
      free(program_log);
      exit(1);
   }

   return program;
}

int main(int argc, const char * argv[])
{
    /* This file serves as a backbone for OpenCL programs. */
    /* All the user needs to do is enter their OpenCL data */
    /* structures, set kernel args, and kernel dispatches. */

    /* Standard OCL structures */
    cl_device_id device;
    cl_context context;
    cl_program program;
    cl_kernel kernel;
    cl_command_queue queue;

    device=create_device();
    context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
    program = build_program(context, device, PROGRAM_FILE);
    queue = clCreateCommandQueue(context, device,0, NULL);
    kernel = clCreateKernel(program, KERNEL_NAME, NULL);

    /* User code goes here */
    cl_int err;

    /* Declare and set data */
    int a[]={1,2,3};
    float *rebin;
    rebin =(float*) calloc(N_PROJ*N_CHANNELS*N_ROWS,sizeof(float));
    float *mat;
    mat   =(float*) calloc(N_PROJ*N_CHANNELS*N_ROWS,sizeof(float));

    printf("\nAllocation size: %lu\n",N_PROJ*N_CHANNELS*N_ROWS*sizeof(float));

    /* Declare and set buffer objects */
    cl_mem a_buff,rebin_buff,mat_buff;

    printf("Total memory to be allocated: %lu\n",2*N_PROJ*N_CHANNELS*N_ROWS*sizeof(float)+sizeof(a) );

    a_buff      =clCreateBuffer(context,CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE,sizeof(a),a,NULL);
    rebin_buff  =clCreateBuffer(context,CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE,N_PROJ*N_CHANNELS*N_ROWS*sizeof(float),rebin,&err);
    if (err<0){
        printf("Error: %i\n",err);
        perror("Couldn't create buffer 1");
        exit(1);
    }
    mat_buff    =clCreateBuffer(context,CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE,N_PROJ*N_CHANNELS*N_ROWS*sizeof(float),mat  ,&err);
    if (err<0){
        printf("Error: %i\n",err);
        perror("Couldn't create buffer 2");
        exit(1);
    }
    /* Copy data over to the device */
    err=clSetKernelArg(kernel,0,sizeof(cl_mem),&mat_buff);
    if (err<0){
        perror("Couldn't set kernel argument");
        exit(1);
    }
    err=clSetKernelArg(kernel,1,sizeof(cl_mem),&rebin_buff);
    err=clSetKernelArg(kernel,2,sizeof(cl_mem),&a_buff);

    clEnqueueTask(queue,kernel,0,NULL,NULL);

    clEnqueueReadBuffer(queue,mat_buff  ,CL_TRUE,0,N_PROJ*N_CHANNELS*N_ROWS*sizeof(float),mat  ,0,NULL,NULL);
    clEnqueueReadBuffer(queue,rebin_buff,CL_TRUE,0,N_PROJ*N_CHANNELS*N_ROWS*sizeof(float),rebin,0,NULL,NULL);
    clEnqueueReadBuffer(queue,a_buff,    CL_TRUE,0,sizeof(a),a,0,NULL,NULL);

    printf("%f %f %f\n",mat[1],mat[2],mat[3]);
    printf("%f %f %f\n",rebin[1],rebin[2],rebin[3]);
    printf("%i %i %i",a[0],a[1],a[2]);

    /***********************/



    clReleaseKernel(kernel);
    clReleaseCommandQueue(queue);
    clReleaseProgram(program);
    clReleaseContext(context);
    //clReleaseDevice(device);

    printf("\n\nProgram apparently executed fully. \n");
    return 0;
}

内核:

    __kernel void test(__global float *mat,__global float *rebin,__global int *a){
    a[0]=3;
    a[1]=2;
    a[2]=1;

    rebin[1]=1.0f;
    rebin[2]=2.0f;
    rebin[3]=3.0f;

    mat[1]=3.0f;
    mat[2]=2.0f;
    mat[3]=1.0f;
}

我的机器的控制台输出:

Using device: GeForce GTX 780
Global memory size: 3221225472
Max. allocation: 805306368

Allocation size: 376832000
Total memory to be allocated: 753664012
Error: -6
Couldn't create buffer 2: No error

Process returned 1 (0x1)   execution time : 0.435 s
Press any key to continue.

0 个答案:

没有答案