大家好我的一个opencl程序出了问题,基本上在内核中我有以下内容:
__kernel void add(__global const double* A, __global const double* B, __global double* C) {
int id = get_global_id(0);
printf("id = %d\n", id);
C[id] = A[id] + B[id];
在这个非常基本的内核失败之后,printf被添加到了我...由于某种原因,下面的代码(或上面的代码):
try {
//Part 4 - memory allocation
//host - input
std::vector<double> A = workingSet; //C++11 allows this type of initialisation
std::vector<double> B = workingSet;
size_t vector_elements = A.size();//number of elements
size_t vector_size = A.size()*sizeof(double);//size in bytes
//host - output
std::vector<double> C(vector_elements);
//device - buffers
cl::Buffer buffer_A(kernel.context, CL_MEM_READ_WRITE, vector_size);
cl::Buffer buffer_B(kernel.context, CL_MEM_READ_WRITE, vector_size);
cl::Buffer buffer_C(kernel.context, CL_MEM_READ_WRITE, vector_size);
//Part 5 - device operations
//5.1 Copy arrays A and B to device memory
kernel.queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, vector_size, &A[0]);
kernel.queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, vector_size, &B[0]);
//5.2 Setup and execute the kernel (i.e. device code)
cl::Kernel kernel_add = cl::Kernel(kernel.program, "add");
kernel_add.setArg(0, buffer_A);
kernel_add.setArg(1, buffer_B);
kernel_add.setArg(2, buffer_C);
kernel.queue.enqueueNDRangeKernel(kernel_add, cl::NullRange, cl::NDRange(vector_elements), cl::NullRange);
//5.3 Copy the result from device to host
kernel.queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, vector_size, &C[0]);
//std::cout << "A = " << A << std::endl;
//std::cout << "B = " << B << std::endl;
std::cout << "C = " << C << std::endl;
}
catch (cl::Error err) {
std::cerr << "ERROR: " << err.what() << ", " << getErrorString(err.err()) << std::endl;
}
似乎得到一个大约14K开始的global_id有时是15L,workingset是一个std :: vector of double(大约18K左右)
内核在别处启动并通过以下代码传递给该主代码块:
kernel_set myKern;
//Part 2 - host operations
//2.1 Select computing devices
cl::Context context = GetContext(platform_id, device_id);
//create a queue to which we will push commands for the device
cl::CommandQueue queue(context);
//2.2 Load & build the device code
cl::Program::Sources sources;
AddSources(sources, "kernel.cl");
cl::Program program(context, sources);
program.build();
myKern.error = false;
myKern.context = context;
myKern.queue = queue;
myKern.program = program;
return myKern;
由于我没有可靠的global_id,我无法弄清楚该怎么做