Question

我一直在尝试编写一个用值填充OpenCL图像的OpenCL内核。但是，我一直遇到一些没有被写入的纹素的问题。我似乎无法使用write_image（）函数来写入具有不同x和y坐标的纹素。

我在这里创建了一个减少计划。希望这很简单，可读：

#include <iostream>

#include <cassert>

#include <OpenCL/OpenCL.h>

const char* clSource[] = {
    "kernel void set(write_only image2d_t image)\n",
    "{\n",
    "    int x = get_global_id(0);\n",
    "    int y = get_global_id(1);\n",
    "    float4 result = float4(1.0, 1.0, 1.0, 1.0);\n",
    "    printf(\"Writing dimensions %d x %d: %d, %d, %d, %d\\n\", x, y,\n",
    "        (int)result.x*255, (int)result.y*255, (int)result.z*255, (int)result.w*255);\n",
    "    write_imagef(image, int2(x, y), result);\n",
    "}\n",
};

int main(int argc, const char * argv[])
{
    const unsigned int WIDTH = 3;
    const unsigned int HEIGHT = 3;
    cl_int clError;
    cl_platform_id platform;
    clError = clGetPlatformIDs(1, &platform, nullptr);
    assert(clError == CL_SUCCESS);
    cl_device_id device;
    clError = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, nullptr);
    assert(clError == CL_SUCCESS);
    cl_context_properties properties[] = {
        CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
        0
    };
    cl_context openCLContext = clCreateContext(properties, 1, &device, nullptr, nullptr, &clError);
    assert(clError == CL_SUCCESS);
    cl_command_queue commandQueue = clCreateCommandQueue(openCLContext, device, 0, &clError);
    assert(clError == CL_SUCCESS);

    cl_program program = clCreateProgramWithSource(openCLContext, sizeof(clSource) / sizeof(const char*), clSource, nullptr, &clError);
    assert(clError == CL_SUCCESS);
    clError = clBuildProgram(program, 1, &device, "", nullptr, nullptr);
    assert(clError == CL_SUCCESS);
    cl_kernel kernel = clCreateKernel(program, "set", &clError);
    assert(clError == CL_SUCCESS);

    cl_image_format imageFormat;
    imageFormat.image_channel_data_type = CL_UNORM_INT8;
    imageFormat.image_channel_order = CL_RGBA;
    cl_image_desc imageDesc;
    imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
    imageDesc.image_width = WIDTH;
    imageDesc.image_height = HEIGHT;
    imageDesc.image_depth = 1;
    imageDesc.image_array_size = 1;
    imageDesc.image_row_pitch = 0;
    imageDesc.image_slice_pitch = 0;
    imageDesc.num_mip_levels = 0;
    imageDesc.num_samples = 0;
    imageDesc.buffer = nullptr;
    cl_mem clTexture = clCreateImage(openCLContext, CL_MEM_WRITE_ONLY, &imageFormat, &imageDesc, nullptr, &clError);
    assert(clError == CL_SUCCESS);
    clError = clSetKernelArg(kernel, 0, sizeof(cl_mem), &clTexture);
    assert(clError == CL_SUCCESS);
    size_t globalWorkOffset[] = {0, 0, 0};
    size_t globalWorkSize[] = {WIDTH, HEIGHT, 0};
    size_t localWorkSize[] = {1, 1, 0};
    cl_event event1;
    clError = clEnqueueNDRangeKernel(commandQueue, kernel, 2, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, &event1);
    assert(clError == CL_SUCCESS);

    unsigned char* bitmap = new unsigned char[WIDTH * HEIGHT * 4];
    size_t origin[] = {0, 0, 0};
    size_t region[] = {WIDTH, HEIGHT, 1};
    cl_event event2;
    clError = clEnqueueReadImage(commandQueue, clTexture, CL_TRUE, origin, region, 0, 0, bitmap, 1, &event1, &event2);

    std::cout << "============================================" << std::endl;
    clError = clWaitForEvents(1, &event2);
    assert(clError == CL_SUCCESS);
    for (size_t i = 0; i < HEIGHT; ++i) {
        for (size_t j = 0; j < WIDTH; ++j) {
            std::cout << "Reading dimensions " << j << " x " << i << ": ";
            std::cout << static_cast<int>(bitmap[4*(i*WIDTH+j)+0]) << ", ";
            std::cout << static_cast<int>(bitmap[4*(i*WIDTH+j)+1]) << ", ";
            std::cout << static_cast<int>(bitmap[4*(i*WIDTH+j)+2]) << ", ";
            std::cout << static_cast<int>(bitmap[4*(i*WIDTH+j)+3]) << std::endl;
        }
    }
    delete[] bitmap;

    clError = clReleaseEvent(event1);
    assert(clError == CL_SUCCESS);
    clError = clReleaseEvent(event2);
    assert(clError == CL_SUCCESS);
    clError = clReleaseMemObject(clTexture);
    assert(clError == CL_SUCCESS);
    clError = clReleaseKernel(kernel);
    assert(clError == CL_SUCCESS);
    clError = clReleaseProgram(program);
    assert(clError == CL_SUCCESS);
    clError = clReleaseCommandQueue(commandQueue);
    assert(clError == CL_SUCCESS);
    clError = clReleaseDevice(device);
    assert(clError == CL_SUCCESS);
    clError = clReleaseContext(openCLContext);
    assert(clError == CL_SUCCESS);
    return 0;
}

毕竟，这是OS X（10.9）上的输出：

Writing dimensions 0 x 0: 255, 255, 255, 255
Writing dimensions 1 x 0: 255, 255, 255, 255
Writing dimensions 2 x 0: 255, 255, 255, 255
Writing dimensions 0 x 1: 255, 255, 255, 255
Writing dimensions 1 x 1: 255, 255, 255, 255
Writing dimensions 2 x 1: 255, 255, 255, 255
Writing dimensions 0 x 2: 255, 255, 255, 255
Writing dimensions 1 x 2: 255, 255, 255, 255
Writing dimensions 2 x 2: 255, 255, 255, 255
============================================
Reading dimensions 0 x 0: 255, 255, 255, 255
Reading dimensions 1 x 0: 0, 0, 0, 0
Reading dimensions 2 x 0: 0, 0, 0, 0
Reading dimensions 0 x 1: 0, 0, 0, 0
Reading dimensions 1 x 1: 255, 255, 255, 255
Reading dimensions 2 x 1: 0, 0, 0, 0
Reading dimensions 0 x 2: 0, 0, 0, 0
Reading dimensions 1 x 2: 0, 0, 0, 0
Reading dimensions 2 x 2: 255, 255, 255, 255
Program ended with exit code: 0

我在ATI Radeon HD 5750上的效果与在NVIDIA GeForce GT 650M上的效果相同。

OpenCL to OpenGL texture problems和opencl image2d_t doesn't write back values似乎有类似的问题，但这些都没有任何帮助我。

我做错了吗？或者Mavericks驱动程序根本不支持图像写入？

Answer 1

问题在于构建矢量值的方式。而不是：

typeN(a, b, ..., k)

你应该这样做：

(typeN)(a, b, ..., k)

前者实际上会在非Apple平台上导致编译错误，所以我实际上并不确定Apple的编译器是如何解释该代码的。

因此，对于您的内核，需要更改的两个相关行是：

float4 result = float4(1.0, 1.0, 1.0, 1.0);
...
write_imagef(image, int2(x, y), result);

现在应该成为：

float4 result = (float4)(1.0, 1.0, 1.0, 1.0);
...
write_imagef(image, (int2)(x, y), result);

Answer 2

我能够通过以下内核更改成功编译并运行您的程序：

const char* clSource[] = {
    "__kernel void set(write_only image2d_t image)\n",
    "{\n",
    "    int x = get_global_id(0);\n",
    "    int y = get_global_id(1);\n",
    "    float4 result = (float4)(1.0, 1.0, 1.0, 1.0);\n",
    "    printf(\"Writing dimensions %d x %d: %d, %d, %d, %d\\n\", x, y,\n",
    "        (int)result.x*255, (int)result.y*255, (int)result.z*255, (int)result.w*255);\n",
    "    write_imagef(image, (int2)(x, y), result);\n",
    "}\n",
};

作为一个例子，你不能写float4（1.0 ......但是你必须把它写成C样式的类型转换（float4）。我不知道为什么它甚至用你的驱动程序干净地编译。

输出中另一个非常奇怪的问题是你的输出似乎来自第23行的WIDTH和HEIGHT为3的情况。来自版本的输出确实是3吗？

无论在更改后它是否正常工作。

OpenCL映像在OS X中写入

2 个答案: