openCL Long Overflowing

时间:2015-11-04 22:13:02

标签: opencl integer-overflow


__kernel void collatz(__global int* in, __global int* out)
    uint id = get_global_id(0);
    unsigned long n = (unsigned long)id;
    uint count = 0;

    while (n > 1) { 
        if (n % 2 == 0) {
            n = n / 2; 
        } else { 
            if(n == 1572066143) {
                unsigned long test = n;
                printf("BEFORE - %lu\n", n);
                test = (3 * test) + 1; 
                printf("AFTER  - %lu\n", test);

                n = (3 * n) + 1; 
             } else {
                 n = (3 * n) + 1; 


       count = count + 1;

    out[id] = count;



BEFORE - 1572066143
AFTER  - 421231134



unsigned long test = 1572066143;
printf("BEFORE - %lu\n", test);
test = (3 * test) + 1; 
printf("AFTER  - %lu\n", test);


 BEFORE - 1572066143
 AFTER  - 4716198430


谢谢, 斯蒂芬



int _tmain(int argc, _TCHAR* argv[])
    /*Step1: Getting platforms and choose an available one.*/
    cl_uint numPlatforms;   //the NO. of platforms
    cl_platform_id platform = NULL; //the chosen platform
    cl_int  status = clGetPlatformIDs(0, NULL, &numPlatforms);

    cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms*   sizeof(cl_platform_id));
    status = clGetPlatformIDs(numPlatforms, platforms, NULL);
    platform = platforms[0];

    /*Step 2:Query the platform and choose the first GPU device if has one.*/
    cl_device_id        *devices;
    devices = (cl_device_id*)malloc(1 * sizeof(cl_device_id));
    clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, devices, NULL);

    /*Step 3: Create context.*/
    cl_context context = clCreateContext(NULL, 1, devices, NULL, NULL, NULL);

    /*Step 4: Creating command queue associate with the context.*/
    cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);

    /*Step 5: Create program object */
    const char *filename = "";
    std::string sourceStr;
    status = convertToString(filename, sourceStr);
    const char *source = sourceStr.c_str();
    size_t sourceSize[] = { strlen(source) };
    cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);

    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);

    /*Step 7: Initial input,output for the host and create memory objects for the kernel*/
    cl_ulong max = 2000000;
    cl_ulong *numbers = NULL;
    numbers = new cl_ulong[max];
    for (int i = 1; i <= max; i++) {
        numbers[i] = i;

    int *output = (int*)malloc(sizeof(cl_ulong) * max);

    cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, max * sizeof(cl_ulong), (void *)numbers, NULL);
    cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, max * sizeof(cl_ulong), NULL, NULL);

    /*Step 8: Create kernel object */
    cl_kernel kernel = clCreateKernel(program, "collatz", NULL);

    /*Step 9: Sets Kernel arguments.*/
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer);

    // Determine the size of the log
    size_t log_size;
    clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

    // Allocate memory for the log
    char *log = (char *)malloc(log_size);

    // Get the log
    clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);

    // Print the log
    printf("%s\n", log);

    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&outputBuffer);

    /*Step 10: Running the kernel.*/
    size_t global_work_size[] = { max };
    status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);

   /*Step 11: Read the data put back to host memory.*/
   status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, max * sizeof(cl_ulong), output, 0, NULL, NULL);

return SUCCESS;


2 个答案:

答案 0 :(得分:0)




答案 1 :(得分:0)


