这是我尝试编写一个opencl代码来添加2个向量
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define MAX_SOURCE_SIZE (0x100000)
//24/12
//data structure platform, device, context,program, kernel, command queue
void main()
{
/////////////////////////////////////////////////////////////////////
//PLATFORM QUERY:
/////////////////////////////////////////////////////////////////////
//clGetPlatformIDs(num_entries, platforms, &num_platforms);
// two part: platform = NULL
// malloc and get platforms*
cl_uint num_platforms; //must be uint
cl_platform_id *platforms;
clGetPlatformIDs(5, NULL, &num_platforms);
printf("There are %d platforms \n", num_platforms);
platforms = (cl_platform_id*) malloc (num_platforms*sizeof(cl_platform_id));
clGetPlatformIDs(5, platforms, &num_platforms);
for(int i = 0; i < num_platforms; i++)
{
char name[40],vendor[40],version[40], profile[40],extensions[4096];
clGetPlatformInfo(platforms[i],CL_PLATFORM_NAME, sizeof(name), &name, NULL);
clGetPlatformInfo(platforms[i],CL_PLATFORM_VENDOR, sizeof(vendor), &vendor, NULL);
clGetPlatformInfo(platforms[i],CL_PLATFORM_VERSION, sizeof(vendor), &version, NULL);
clGetPlatformInfo(platforms[i],CL_PLATFORM_PROFILE, sizeof(vendor), &profile, NULL);
//clGetPlatformInfo(platforms[i],CL_PLATFORM_EXTENSIONS, sizeof(vendor), &extensions, NULL);
printf("Platform %d \n", i);
printf("Name %s \n", name);
printf("Vendor %s \n", vendor);
printf("Version %s \n", version);
printf("Profile %s \n", profile);
//printf("Extension %s \n", extensions);
printf("----------------------------------\n");
}
////////////////////////////////////////////////////////////////
//DEVICES QUERYING
////////////////////////////////////////////////////////////////
cl_device_id* devices;
cl_uint num_devices;
cl_device_fp_config flag ;
for(int i= 0; i< num_platforms; i++)
{
printf("Platform %d has:\n",i);
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 3, NULL, &num_devices);
devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
char name[40];
for(int j=0; j < num_devices; j++)
{
int err= clGetDeviceInfo(devices[j],CL_DEVICE_NAME,sizeof(name),name,NULL);
if (err<0)
{
//printf("Error querying devices name\n");
}
else
{
printf("Device name %s \n", name);
}
err= clGetDeviceInfo(devices[j],CL_DEVICE_NAME,sizeof(flag),&flag,NULL);
if (flag & CL_FP_DENORM)
{
printf("This device support denormalized number \n");
}
}
printf("-----------------------------------\n");
}
///////////////////////////////////////////////////////
//CONTEXT QUERYING AND CREATING
////////////////////////////////////////////////////////
//NOTE clCreateContext returns cl_context instead of errors
//REF_COUNT if very important in the future
//create context for GPU
cl_context context;
cl_uint ref_count;
cl_int err;
char name[40];
context= clCreateContext(NULL,1,&devices[0], NULL,NULL,&err);
clGetContextInfo(context,CL_CONTEXT_REFERENCE_COUNT,sizeof(ref_count), &ref_count, NULL);
printf("Original reference count is %d \n",ref_count);
/*clRetainContext(context);
clGetContextInfo(context,CL_CONTEXT_REFERENCE_COUNT,sizeof(ref_count), &ref_count, NULL);
printf("Incremented reference count is %d \n",ref_count);
clReleaseContext(context);
clGetContextInfo(context,CL_CONTEXT_REFERENCE_COUNT,sizeof(ref_count), &ref_count, NULL);
printf("Decremented reference count is %d \n",ref_count);*/
////////////////////////////////////////////////////////
//Create programme
///////////////////////////////////////////////////////
size_t program_size;
err=0;
cl_program program;
char* program_buffer;
FILE* program_handle = fopen("kernel.cl","r");
//More recommendable than source code???
program_buffer = (char*)malloc(MAX_SOURCE_SIZE);
program_size = fread( program_buffer, 1, MAX_SOURCE_SIZE, program_handle);
fclose( program_handle );
program = clCreateProgramWithSource(context,1,(const char**) &program_buffer,
(size_t*)&program_size, &err);
////////////////////////////////////////////////////////
//Build Program
///////////////////////////////////////////////////////
//const char options[] = "-cl-finite-math-only -cl-no-signed-zeros";
char* program_log;
size_t log_size;
err= clBuildProgram(program, 1 , devices, NULL, NULL, NULL);
if(err < 0) //debug , printing log
{
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
program_log = (char*) malloc(log_size+1);
program_log[log_size] = '\0';
clGetProgramBuildInfo(program,devices[0],CL_PROGRAM_BUILD_LOG,log_size,
program_log,NULL);
printf("%s\n",program_log);
free(program_log);
//exit(1);
}
///////////////////////////////////////////////////////////////////////////////////
//create kernel
///////////////////////////////////////////////////////////////////////////////////
cl_uint num_kernels;
cl_kernel kernel;
char kernel_name[40];
kernel = clCreateKernel(program,"add",&err);
if (err<0)
{
perror("could not found any kernels\n");
}
//kernels = (cl_kernel*)malloc(num_kernels*sizeof(cl_kernel));
//clCreateKernelsInProgram(program, num_kernels, kernels, NULL);
///FOR REFERNECE
//for(int i=0; i<num_kernels; i++)
//{
clGetKernelInfo(kernel,CL_KERNEL_FUNCTION_NAME,sizeof(kernel_name),kernel_name,NULL);
printf("Kernel function: %s \n",kernel_name);
//}
/////////////////////////////////////////////////////
//Create command queue
/////////////////////////////////////////////////////
cl_command_queue queue = clCreateCommandQueue(context, devices[0],CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,&err);
if (err < 0)
{
printf("Couldn't create command queue \n");
exit(1);
}
clEnqueueTask(queue, kernel, 0, NULL, NULL);//only enqueue
//////////////////////////////////////////
unsigned int n= 1000;
int* h_a;
int* h_b;
int* h_c;
cl_mem d_a;
cl_mem d_b;
cl_mem d_c;
h_a = (int*) malloc(n*sizeof(int));
h_b = (int*) malloc(n*sizeof(int));
h_c = (int*) malloc(n*sizeof(int));
for(int i=0; i< n; i++)
{
h_a[i]= 1;//sinf(i)*sinf(i);
h_b[i]= 1;//cosf(i)*cosf(i);
}
d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(h_a),NULL,NULL);
d_b = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(h_a),NULL,NULL);
d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(h_a),NULL,NULL);
err = clEnqueueWriteBuffer(queue,d_a,CL_TRUE,0,sizeof(h_a),h_a,0, NULL, NULL);
err |= clEnqueueWriteBuffer(queue,d_b,CL_TRUE,0,sizeof(h_b),h_a,0, NULL, NULL);
//////set argument
err= clSetKernelArg(kernel,0,sizeof(cl_mem),&d_a);
err= clSetKernelArg(kernel,1,sizeof(cl_mem),&d_b);
err= clSetKernelArg(kernel,2,sizeof(cl_mem),&d_c);
err= clSetKernelArg(kernel,3,sizeof(unsigned int),&n);
///////////////
size_t globalsize, localsize;
localsize=64;
globalsize=ceil(n/(float)localsize)*localsize;
err= clEnqueueNDRangeKernel(queue,kernel,1, NULL,&globalsize,&localsize,0,NULL,NULL);
////////////////////////
clFinish(queue);
err=clEnqueueReadBuffer(queue, d_c,CL_TRUE, 0, sizeof(h_c), h_c, 0 , NULL, NULL);
for(int i = 0; i< n; i++)
{
printf(" h_c[%d] = %d \n", i, h_c[i]);
}
clReleaseMemObject(d_a);
clReleaseMemObject(d_b);
clReleaseMemObject(d_c);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
clReleaseKernel(kernel);
free(h_a);
free(h_b);
free(h_c);
getchar();
}
这是我的kernel.cl
__kernel void add(__global int * a, __global int *b, __global int* c, const unsigned n)
{
int id= get_global_id(0);
if (id<n)
c[id]= a[id] + b[id];
}
有了这个,我只收到垃圾值,例如所有i的h_c [i] = -842150451。 请帮我修理一下。谢谢!
答案 0 :(得分:1)
此陈述不正确:
sizeof(h_a)
应该是这样的:
n * sizeof(int)
确实 h_a 只是指针所以 sizeof(h_a)= sizeof(int) =&gt;你只有一个项目的空间。