我必须使用OpenCL在c ++中乘以两个多项式。我是使用这个库的新手,我认为我的内核方法不好,因为输出没有显示它应该是什么。有人能帮助我吗?
int main(){
//get all platforms (drivers)
std::vector<cl::Platform> all_platforms;
cl::Platform::get(&all_platforms);
if (all_platforms.size() == 0){
std::cout << " No platforms found. Check OpenCL installation!\n";
exit(1);
}
cl::Platform default_platform = all_platforms[0];
std::cout << "Using platform: " << default_platform.getInfo<CL_PLATFORM_NAME>() << "\n";
//get default device of the default platform
std::vector<cl::Device> all_devices;
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
if (all_devices.size() == 0){
std::cout << " No devices found. Check OpenCL installation!\n";
exit(1);
}
cl::Device default_device = all_devices[0];
std::cout << "Using device: " << default_device.getInfo<CL_DEVICE_NAME>() << "\n";
cl::Context context({ default_device });
cl::Program::Sources sources;
// kernel calculates for each element C=A+B
std::string kernel_code =
"int multiply_karatsuba(int x, int y)"
"{"
" int dx = x / 1000;"
" int dy = y / 1000;"
" int mx = x % 1000;"
" int my = y % 1000;"
" int z2 = dx * dy;"
" int z0 = mx * my;"
" int z1 = (dx + mx) * (dy + my) - z2 - z0;"
" int res = z2 * 1000 * 1000 + z1 * 1000 + z0;"
"return res;"
"}"
" void kernel simple_multiply(global const int *A, global const int* B, global int* C, global const int *l){ "
" int i = get_global_id(0);"
" for (int j = 0; j < l[0]; j++){"
" C[i + j] = C[i + j] + multiply_karatsuba(A[i], B[j]);"
"}"
"} ";
/*std::string kernel_code =
" void kernel simple_multiply(global const int *A, global const int* B, global int* C, global const int *l){ "
" int i = get_global_id(0);"
" for (int j = 0; j < l[0]; j++){"
" C[i + j] = C[i + j] + A[i] * B[j];"
"}"
"} ";*/
sources.push_back({ kernel_code.c_str(), kernel_code.length() });
cl::Program program(context, sources);
if (program.build({ default_device }) != CL_SUCCESS){
std::cout << " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device) << "\n";
exit(1);
}
// create buffers on the device
cl::Buffer buffer_A(context, CL_MEM_READ_WRITE, sizeof(int) * 10);
cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, sizeof(int) * 10);
cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, sizeof(int) * 20);
cl::Buffer buffer_l(context, CL_MEM_READ_WRITE, sizeof(int) * 10);
int A[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
int B[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 };
int l[1] = { 9 };
//create queue to which we will push commands for the device.
cl::CommandQueue queue(context, default_device);
//write arrays A and B to the device
queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(int) * 10, A);
queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, sizeof(int) * 10, B);
queue.enqueueWriteBuffer(buffer_l, CL_TRUE, 0, sizeof(int) * 10, l);
//run the kernel
cl::Kernel kernel_add = cl::Kernel(program, "simple_multiply");
kernel_add.setArg(0, buffer_A);
kernel_add.setArg(1, buffer_B);
kernel_add.setArg(2, buffer_C);
kernel_add.setArg(3, buffer_l);
queue.enqueueNDRangeKernel(kernel_add, cl::NullRange, cl::NDRange(10), cl::NullRange);
queue.finish();
int C[20];
//read result C from the device to array C
queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, sizeof(int) * 20, C);
std::cout << " result: \n";
/*for (int i = 0; i<20; i++){
std::cout << C[i] << " ";
}*/
printPoly(C,20);
std::cout<<std::endl;
return 0;
}