我一直在尝试编写一个用值填充OpenCL图像的OpenCL内核。但是,我一直遇到一些没有被写入的纹素的问题。我似乎无法使用write_image()函数来写入具有不同x和y坐标的纹素。
我在这里创建了一个减少计划。希望这很简单,可读:
#include <iostream>
#include <cassert>
#include <OpenCL/OpenCL.h>
const char* clSource[] = {
"kernel void set(write_only image2d_t image)\n",
"{\n",
" int x = get_global_id(0);\n",
" int y = get_global_id(1);\n",
" float4 result = float4(1.0, 1.0, 1.0, 1.0);\n",
" printf(\"Writing dimensions %d x %d: %d, %d, %d, %d\\n\", x, y,\n",
" (int)result.x*255, (int)result.y*255, (int)result.z*255, (int)result.w*255);\n",
" write_imagef(image, int2(x, y), result);\n",
"}\n",
};
int main(int argc, const char * argv[])
{
const unsigned int WIDTH = 3;
const unsigned int HEIGHT = 3;
cl_int clError;
cl_platform_id platform;
clError = clGetPlatformIDs(1, &platform, nullptr);
assert(clError == CL_SUCCESS);
cl_device_id device;
clError = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, nullptr);
assert(clError == CL_SUCCESS);
cl_context_properties properties[] = {
CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
0
};
cl_context openCLContext = clCreateContext(properties, 1, &device, nullptr, nullptr, &clError);
assert(clError == CL_SUCCESS);
cl_command_queue commandQueue = clCreateCommandQueue(openCLContext, device, 0, &clError);
assert(clError == CL_SUCCESS);
cl_program program = clCreateProgramWithSource(openCLContext, sizeof(clSource) / sizeof(const char*), clSource, nullptr, &clError);
assert(clError == CL_SUCCESS);
clError = clBuildProgram(program, 1, &device, "", nullptr, nullptr);
assert(clError == CL_SUCCESS);
cl_kernel kernel = clCreateKernel(program, "set", &clError);
assert(clError == CL_SUCCESS);
cl_image_format imageFormat;
imageFormat.image_channel_data_type = CL_UNORM_INT8;
imageFormat.image_channel_order = CL_RGBA;
cl_image_desc imageDesc;
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
imageDesc.image_width = WIDTH;
imageDesc.image_height = HEIGHT;
imageDesc.image_depth = 1;
imageDesc.image_array_size = 1;
imageDesc.image_row_pitch = 0;
imageDesc.image_slice_pitch = 0;
imageDesc.num_mip_levels = 0;
imageDesc.num_samples = 0;
imageDesc.buffer = nullptr;
cl_mem clTexture = clCreateImage(openCLContext, CL_MEM_WRITE_ONLY, &imageFormat, &imageDesc, nullptr, &clError);
assert(clError == CL_SUCCESS);
clError = clSetKernelArg(kernel, 0, sizeof(cl_mem), &clTexture);
assert(clError == CL_SUCCESS);
size_t globalWorkOffset[] = {0, 0, 0};
size_t globalWorkSize[] = {WIDTH, HEIGHT, 0};
size_t localWorkSize[] = {1, 1, 0};
cl_event event1;
clError = clEnqueueNDRangeKernel(commandQueue, kernel, 2, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, &event1);
assert(clError == CL_SUCCESS);
unsigned char* bitmap = new unsigned char[WIDTH * HEIGHT * 4];
size_t origin[] = {0, 0, 0};
size_t region[] = {WIDTH, HEIGHT, 1};
cl_event event2;
clError = clEnqueueReadImage(commandQueue, clTexture, CL_TRUE, origin, region, 0, 0, bitmap, 1, &event1, &event2);
std::cout << "============================================" << std::endl;
clError = clWaitForEvents(1, &event2);
assert(clError == CL_SUCCESS);
for (size_t i = 0; i < HEIGHT; ++i) {
for (size_t j = 0; j < WIDTH; ++j) {
std::cout << "Reading dimensions " << j << " x " << i << ": ";
std::cout << static_cast<int>(bitmap[4*(i*WIDTH+j)+0]) << ", ";
std::cout << static_cast<int>(bitmap[4*(i*WIDTH+j)+1]) << ", ";
std::cout << static_cast<int>(bitmap[4*(i*WIDTH+j)+2]) << ", ";
std::cout << static_cast<int>(bitmap[4*(i*WIDTH+j)+3]) << std::endl;
}
}
delete[] bitmap;
clError = clReleaseEvent(event1);
assert(clError == CL_SUCCESS);
clError = clReleaseEvent(event2);
assert(clError == CL_SUCCESS);
clError = clReleaseMemObject(clTexture);
assert(clError == CL_SUCCESS);
clError = clReleaseKernel(kernel);
assert(clError == CL_SUCCESS);
clError = clReleaseProgram(program);
assert(clError == CL_SUCCESS);
clError = clReleaseCommandQueue(commandQueue);
assert(clError == CL_SUCCESS);
clError = clReleaseDevice(device);
assert(clError == CL_SUCCESS);
clError = clReleaseContext(openCLContext);
assert(clError == CL_SUCCESS);
return 0;
}
毕竟,这是OS X(10.9)上的输出:
Writing dimensions 0 x 0: 255, 255, 255, 255
Writing dimensions 1 x 0: 255, 255, 255, 255
Writing dimensions 2 x 0: 255, 255, 255, 255
Writing dimensions 0 x 1: 255, 255, 255, 255
Writing dimensions 1 x 1: 255, 255, 255, 255
Writing dimensions 2 x 1: 255, 255, 255, 255
Writing dimensions 0 x 2: 255, 255, 255, 255
Writing dimensions 1 x 2: 255, 255, 255, 255
Writing dimensions 2 x 2: 255, 255, 255, 255
============================================
Reading dimensions 0 x 0: 255, 255, 255, 255
Reading dimensions 1 x 0: 0, 0, 0, 0
Reading dimensions 2 x 0: 0, 0, 0, 0
Reading dimensions 0 x 1: 0, 0, 0, 0
Reading dimensions 1 x 1: 255, 255, 255, 255
Reading dimensions 2 x 1: 0, 0, 0, 0
Reading dimensions 0 x 2: 0, 0, 0, 0
Reading dimensions 1 x 2: 0, 0, 0, 0
Reading dimensions 2 x 2: 255, 255, 255, 255
Program ended with exit code: 0
我在ATI Radeon HD 5750上的效果与在NVIDIA GeForce GT 650M上的效果相同。
OpenCL to OpenGL texture problems和opencl image2d_t doesn't write back values似乎有类似的问题,但这些都没有任何帮助我。
我做错了吗?或者Mavericks驱动程序根本不支持图像写入?
答案 0 :(得分:1)
问题在于构建矢量值的方式。而不是:
typeN(a, b, ..., k)
你应该这样做:
(typeN)(a, b, ..., k)
前者实际上会在非Apple平台上导致编译错误,所以我实际上并不确定Apple的编译器是如何解释该代码的。
因此,对于您的内核,需要更改的两个相关行是:
float4 result = float4(1.0, 1.0, 1.0, 1.0);
...
write_imagef(image, int2(x, y), result);
现在应该成为:
float4 result = (float4)(1.0, 1.0, 1.0, 1.0);
...
write_imagef(image, (int2)(x, y), result);
答案 1 :(得分:1)
我能够通过以下内核更改成功编译并运行您的程序:
const char* clSource[] = {
"__kernel void set(write_only image2d_t image)\n",
"{\n",
" int x = get_global_id(0);\n",
" int y = get_global_id(1);\n",
" float4 result = (float4)(1.0, 1.0, 1.0, 1.0);\n",
" printf(\"Writing dimensions %d x %d: %d, %d, %d, %d\\n\", x, y,\n",
" (int)result.x*255, (int)result.y*255, (int)result.z*255, (int)result.w*255);\n",
" write_imagef(image, (int2)(x, y), result);\n",
"}\n",
};
作为一个例子,你不能写float4(1.0 ......但是你必须把它写成C样式的类型转换(float4)。我不知道为什么它甚至用你的驱动程序干净地编译。
输出中另一个非常奇怪的问题是你的输出似乎来自第23行的WIDTH和HEIGHT为3的情况。来自版本的输出确实是3吗?
无论在更改后它是否正常工作。