在macOS上带有CL_MEM_HOST_WRITE_ONLY的clCreateBuffer不能在GPU上分配大于128 MiB的缓冲区

时间:2018-09-30 14:50:36

标签: macos opencl

在macOS上带有clCreateBuffer标志的

CL_MEM_HOST_WRITE_ONLY无法为GPU分配大于128 MiB的缓冲区,即使其大小小于CL_DEVICE_MAX_MEM_ALLOC_SIZE。在这种情况下,将返回-6CL_OUT_OF_HOST_MEMORY)。

在我的MacBook Pro(15英寸,视网膜,2013年末),macOS 10.13.6上,GT 750M(最大分配:512 MiB)和Iris Pro 5200(最大分配:384 MiB)在{{1 }}的129 MiB缓冲区。对于我的Ubuntu桌面,在GTX 760(最大分配:512 MiB)上,可以进行这种分配。以前或以后的macOS版本可能没有此问题。

以下示例脚本(CL_MEM_HOST_WRITE_ONLY)可以演示此问题。使用参数test.cpp调用程序将打印所有OpenCL设备及其OpenCL设备#;用-1调用程序将在#1设备上运行该程序:

1

在macOS上的GT 750M上调用程序的输出:

#include <cstdio>
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif  

using namespace std;

cl_device_id OpenCLGetGPU(int idev, int *gpu_count)
{
    cl_device_id dev_id;
    cl_uint num_plat;
    clGetPlatformIDs(0, NULL, &num_plat);
    cl_platform_id *plats = new cl_platform_id[num_plat];
    clGetPlatformIDs(num_plat, plats, NULL);

    cl_uint num_dev = 0, idev_gb = 0;
    char plat_name[128], device_name[128], vend_name[128];
    int count = 0;

    if (idev == -1)
        printf("OpenCL devices:\n");

    for (cl_uint i = 0 ; i < num_plat ; i++, num_dev = 0) {
        clGetPlatformInfo(plats[i], CL_PLATFORM_NAME,
                sizeof(plat_name), plat_name, NULL);
        clGetDeviceIDs(plats[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num_dev);
        count += num_dev;

        cl_device_id* devices = new cl_device_id[num_dev];
        clGetDeviceIDs(plats[i], CL_DEVICE_TYPE_ALL, num_dev, devices, NULL);

        for (cl_uint j = 0 ; j < num_dev ; j++, idev_gb++) {
            clGetDeviceInfo(devices[j], CL_DEVICE_NAME,
                    sizeof(device_name), device_name, NULL);
            clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR,
                    sizeof(vend_name), vend_name, NULL);
            if (idev == -1)
                printf("  %i. %s, Vendor: %s, Platform: %s.\n",
                        idev_gb, device_name, vend_name, plat_name);
            if (idev == (int) idev_gb) dev_id = devices[j];
        }

        delete[] devices, devices = NULL;
    }

    delete[] plats, plats = NULL;

    if (idev == -1)
        exit(0);
    else if (idev == -2) {
        *gpu_count = count;
        return 0;
    }

    cl_platform_id plat;
    clGetDeviceInfo(dev_id, CL_DEVICE_NAME, sizeof(device_name), device_name, NULL);
    clGetDeviceInfo(dev_id, CL_DEVICE_VENDOR, sizeof(vend_name), vend_name, NULL);
    clGetDeviceInfo(dev_id, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL);
    clGetPlatformInfo(plat, CL_PLATFORM_NAME, sizeof(plat_name), plat_name, NULL);
    printf("  Device: %d. %s, Vendor: %s, Platform/Driver: %s. \n",
            idev, device_name, vend_name, plat_name);

    return dev_id;
}

int main(int argc, char *argv[])
{
    size_t MiB = 1024 * 1024, max_alloc;
    int gid = 0;
    cl_int err;

    if (argc > 1)
        gid = strtol(argv[1], NULL, 10);

    cl_device_id dev_id = OpenCLGetGPU(gid, NULL);
    cl_platform_id plat;
    clGetDeviceInfo(dev_id, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL);
    clGetDeviceInfo(dev_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_alloc), &max_alloc, NULL);
    printf("CL_DEVICE_MAX_MEM_ALLOC_SIZE: %g MiB\n", 1.0 * max_alloc / MiB);

    cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
    props[1] = (cl_context_properties) plat;
    cl_context ctx = clCreateContext(props, 1, &dev_id, NULL, NULL, &err);
    cl_command_queue q = clCreateCommandQueue(ctx, dev_id, 0, &err);

    /* Allocate memory on device */
    int sz = 128;   
    cl_mem a = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_HOST_WRITE_ONLY, sz * MiB, NULL, &err);
    printf("clCreateBuffer with CL_MEM_HOST_WRITE_ONLY, \t%d \tMiB, err = %d\n", sz, err);
    sz = 129;
    cl_mem b = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_HOST_WRITE_ONLY, sz * MiB, NULL, &err);
    printf("clCreateBuffer with CL_MEM_HOST_WRITE_ONLY, \t%d \tMiB, err = %d\n", sz, err);
    sz = 129;
    cl_mem c = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sz * MiB, NULL, &err);
    printf("clCreateBuffer without CL_MEM_HOST_WRITE_ONLY, \t%d \tMiB, err = %d\n", sz, err);

    return EXIT_SUCCESS;
}

此“问题”可能更像是一个行为报告-因为该功能似乎没有违反OpenCL的标准(至少针对1.2),并且macOS正在弃用OpenCL。但这是出乎意料的,因为Windows或Linux之类的平台没有此问题。这可能是由于对OpenCL 1.2的不完全支持所致,因为 Device: 2. GeForce GT 750M, Vendor: NVIDIA, Platform/Driver: Apple. CL_DEVICE_MAX_MEM_ALLOC_SIZE: 512 MiB clCreateBuffer with CL_MEM_HOST_WRITE_ONLY, 128 MiB, err = 0 clCreateBuffer with CL_MEM_HOST_WRITE_ONLY, 129 MiB, err = -6 clCreateBuffer without CL_MEM_HOST_WRITE_ONLY, 129 MiB, err = 0 发出了警告:

clinfo

还报告了NOTE: your OpenCL library only supports OpenCL 1.0, but some installed platforms support OpenCL 1.2. Programs using 1.2 features may crash or behave unexpectedly (一种OpenCL 1.2 API)的类似错误(12),并且仅在macOS上才出现。

0 个答案:

没有答案