Question

我必须使用OpenCL在c ++中乘以两个多项式。我是使用这个库的新手，我认为我的内核方法不好，因为输出没有显示它应该是什么。有人能帮助我吗？

int main(){
    //get all platforms (drivers)
    std::vector<cl::Platform> all_platforms;
    cl::Platform::get(&all_platforms);
    if (all_platforms.size() == 0){
        std::cout << " No platforms found. Check OpenCL installation!\n";
        exit(1);
    }
    cl::Platform default_platform = all_platforms[0];
    std::cout << "Using platform: " << default_platform.getInfo<CL_PLATFORM_NAME>() << "\n";

    //get default device of the default platform
    std::vector<cl::Device> all_devices;
    default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
    if (all_devices.size() == 0){
        std::cout << " No devices found. Check OpenCL installation!\n";
        exit(1);
    }
    cl::Device default_device = all_devices[0];
    std::cout << "Using device: " << default_device.getInfo<CL_DEVICE_NAME>() << "\n";


    cl::Context context({ default_device });

    cl::Program::Sources sources;

    // kernel calculates for each element C=A+B
    std::string kernel_code =
        "int multiply_karatsuba(int x, int y)"
        "{"
        "   int dx = x / 1000;"
        "   int dy = y / 1000;"
        "   int mx = x % 1000;"
        "   int my = y % 1000;"

        "   int z2 = dx * dy;"
        "   int z0 = mx * my;"
        "   int z1 = (dx + mx) * (dy + my) - z2 - z0;"
        "   int res = z2 * 1000 * 1000 + z1 * 1000 + z0;"
        "return res;"
        "}"

        " void kernel simple_multiply(global const int *A, global const int* B, global int* C, global const int *l){       "
        "       int i = get_global_id(0);"
        "       for (int j = 0; j < l[0]; j++){"
        "           C[i + j] = C[i + j] + multiply_karatsuba(A[i], B[j]);"
                "}"
        "} ";

    /*std::string kernel_code =
        " void kernel simple_multiply(global const int *A, global const int* B, global int* C, global const int *l){       "
        "       int i = get_global_id(0);"
        "       for (int j = 0; j < l[0]; j++){"
        "           C[i + j] = C[i + j] + A[i] * B[j];"
        "}"
        "} ";*/

    sources.push_back({ kernel_code.c_str(), kernel_code.length() });

    cl::Program program(context, sources);
    if (program.build({ default_device }) != CL_SUCCESS){
        std::cout << " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device) << "\n";
        exit(1);
    }


    // create buffers on the device
    cl::Buffer buffer_A(context, CL_MEM_READ_WRITE, sizeof(int) * 10);
    cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, sizeof(int) * 10);
    cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, sizeof(int) * 20);  
    cl::Buffer buffer_l(context, CL_MEM_READ_WRITE, sizeof(int) * 10);

    int A[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
    int B[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }; 
    int l[1] = { 9 };


    //create queue to which we will push commands for the device.
    cl::CommandQueue queue(context, default_device);

    //write arrays A and B to the device
    queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(int) * 10, A);
    queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, sizeof(int) * 10, B);
    queue.enqueueWriteBuffer(buffer_l, CL_TRUE, 0, sizeof(int) * 10, l);


    //run the kernel    

    cl::Kernel kernel_add = cl::Kernel(program, "simple_multiply");
    kernel_add.setArg(0, buffer_A);
    kernel_add.setArg(1, buffer_B);
    kernel_add.setArg(2, buffer_C);     
    kernel_add.setArg(3, buffer_l);
    queue.enqueueNDRangeKernel(kernel_add, cl::NullRange, cl::NDRange(10), cl::NullRange);
    queue.finish();

    int C[20];


    //read result C from the device to array C
    queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, sizeof(int) * 20, C);

    std::cout << " result: \n";
    /*for (int i = 0; i<20; i++){
        std::cout << C[i] << " ";
    }*/
    printPoly(C,20);
    std::cout<<std::endl;


    return 0;
}

C ++ OpenCL多项式乘法

0 个答案: