我使用的平台是AMD radeon 200系列(蓝宝石250(GPU-Oland) 我在主机上创建了一个类,它有一个指针作为数据成员,并且在内核方面也是同一个类。 现在,如果在内核端,指针存在于类中,则会出现构建错误(-11)。如果我们删除内核端的指针,代码就会建立起来。 在那里,指针值被复制到全局指针,解除引用的值变为零。
我的主要目标是访问设备端类中的指针。
AMD SDK-3.0 opencl c ++版本1.2
任何帮助将不胜感激。
class A
{
public:
int* ptr;
};
int main()
{
const int LIST_SIZE = 1;
int abc=20;
A *obj=new A;
obj->ptr= &abc;
printf("\nx=%d\n",*(obj->ptr));
int *A = new int[LIST_SIZE];
int *B = new int[LIST_SIZE];
cl_int z;
for(int i = 0; i < LIST_SIZE; i++) {
A[i] = i;
B[i] = LIST_SIZE - i;
}
// Get available platforms
vector<Platform> platforms;
Platform::get(&platforms);
// Select the default platform and create a context using this platform and the GPU
cl_context_properties cps[3] = {
CL_CONTEXT_PLATFORM,
(cl_context_properties)(platforms[0])(),
0
};
Context context( CL_DEVICE_TYPE_GPU, cps,NULL,NULL,&z);
// Get a list of devices on this platform
vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
// Create a command queue and use the first device
CommandQueue queue = CommandQueue(context, devices[0],NULL,&z);
//Read source file
std::ifstream sourceFile("kernel.cl");
std::string sourceCode(
std::istreambuf_iterator<char>(sourceFile),
(std::istreambuf_iterator<char>()));
Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1));
// Make program of the source code in the context
Program program = Program(context, source,&z);
// Build program for these specific devices
z=program.build(devices,"-x clc++",NULL,NULL);
if(z!=CL_SUCCESS){
cout<<"build"<<endl;return 1;}
// Make kernel
Kernel kernel(program, "vector_add",&z);
// Create memory buffers
Buffer bufferA = Buffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int),NULL,&z);
Buffer bufferB = Buffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int),NULL,&z);
Buffer bufferC = Buffer(context, CL_MEM_WRITE_ONLY, LIST_SIZE * sizeof(int),NULL,&z);
Buffer bufferD = Buffer(context, CL_MEM_READ_WRITE, sizeof(obj),NULL,&z);
// Copy lists A and B to the memory buffers
z= queue.enqueueWriteBuffer(bufferA, CL_TRUE, 0, LIST_SIZE * sizeof(int), A,NULL,NULL);
if(z!=CL_SUCCESS){
cout<<"enqueue buff A"<<endl;return 1;}
z=queue.enqueueWriteBuffer(bufferB, CL_TRUE, 0, LIST_SIZE * sizeof(int), B,NULL,NULL);
if(z!=CL_SUCCESS){
cout<<"enqueue buffB"<<endl;return 1;}
z=queue.enqueueWriteBuffer(bufferD, CL_TRUE, 0, sizeof(obj), obj,NULL,NULL);
if(z!=CL_SUCCESS){
cout<<"enqueue buffB"<<endl;return 1;}
// Set arguments to kernel
z= kernel.setArg(0, bufferA);
if(z!=CL_SUCCESS){
cout<<"kerarg A"<<endl;return 1;}
z= kernel.setArg(1, bufferB);
if(z!=CL_SUCCESS){
cout<<"kerarg buff B"<<endl;return 1;}
z= kernel.setArg(2, bufferC);
if(z!=CL_SUCCESS){
cout<<"kerarg C"<<endl;return 1;}
z= kernel.setArg(3, bufferD);
if(z!=CL_SUCCESS){
cout<<"kerarg C"<<endl;return 1;}
// Run the kernel on specific ND range
NDRange global(LIST_SIZE);
NDRange local(1);
queue.enqueueNDRangeKernel(kernel, NullRange, global, local,NULL,NULL);
// Read buffer C into a local list
int *C = new int[LIST_SIZE];
queue.enqueueReadBuffer(bufferC, CL_TRUE, 0, LIST_SIZE * sizeof(int), C,NULL,NULL);
queue.enqueueReadBuffer(bufferD, CL_TRUE, 0, sizeof(obj), obj,NULL,NULL);
for(int i = 0; i < LIST_SIZE; i ++)
std::cout << A[i] << " + " << B[i] << " = " << C[i] << std::endl;
printf("\nx=%d\n",*(obj->ptr));
return 0;
}
和内核代码是
class A
{
public:
//int* ptr; //generates error in building
int ptr;
};
__kernel void vector_add(__global int *d,__global int *b,__global int *c,__global class A *obj)
{
size_t id=get_global_id(0);
c[id]=d[id]+b[id];
__global int *p=(__global int *)obj->ptr;
printf("kernel *p= %d p= %d obj->ptr= %d \n",*(p),p,obj->ptr);
}