我的项目中有以下内核:
__kernel void zero(__global float* vh)
{
const float2 id = (float2)(get_global_id(0),get_global_id(1));
const float2 sz = (float2)(1,get_global_size(0));
vh[(int)dot(id,sz)] = 1;
}
这就是我发布它的方式
std::vector<cl::Platform> platforms;
std::vector<cl::Device> devices;
cl::Platform::get(&platforms);
platforms.at(0).getDevices(CL_DEVICE_TYPE_GPU, &devices);
std::string GPUname;
devices.at(0).getInfo(CL_DEVICE_NAME, &GPUname);
std::cout << "Program runs on GPU: " << GPUname << std::endl;
cl::Context context(devices);
cl::CommandQueue queue(context, devices.at(0));
std::ifstream srcfile("kernels.cl");
std::string src(std::istreambuf_iterator<char>(srcfile), std::istreambuf_iterator<char>(0));
cl::Program program(context, cl::Program::Sources(1, std::make_pair(src.c_str(), src.size())));
program.build(devices, "-Werror");
cl::Kernel kzero = cl::Kernel(program, "zero");
cl::Buffer buffer(context, CL_MEM_READ_ONLY, N * N * sizeof(float));
cl::NDRange gndr(N, N), lndr(8, 8);
kzero.setArg(0, buffer);
queue.enqueueNDRangeKernel(kzero, cl::NullRange, gndr, lndr);
float data[N * N];
queue.enqueueReadBuffer(buffer, CL_TRUE, 0, N * N * sizeof(float), data);
queue.finish();
std::ofstream ofs;
ofs.open("solution.txt", std::ofstream::out);
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
ofs << data[i + N * j] << (j == N - 1 ? '\n' : ' ');
ofs.close();
std::cout << "File written";
return 0;
但是每次运行我的程序时,文件只包含零,尽管它应该是1。哪里可以出错?我被困在里面了?