我正在尝试使用clCreateProgramWithBinary获取基本程序。这是我知道如何使用它而不是“真正的”应用程序。
我看到其中一个参数是二进制文件列表。我究竟要如何创建一个二进制文件来测试?我有一些测试代码,它从源代码创建程序,构建和排队。是否在此过程中的某个时刻创建了二进制文件,我可以将其输入clCreateProgramWithBinary?
这是我的一些代码,只是为了了解我的整体流程。为简单起见,我省略了注释和错误检查。
program = clCreateProgramWithSource(clctx, 1, &dumbkernelsource, NULL, &errcode);
errcode = clBuildProgram(program, env->num_devices, env->device, NULL, NULL, NULL);
mykernel = clCreateKernel(program, "flops", &errcode);
errcode = clGetKernelWorkGroupInfo(mykernel, *(env->device), CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
global = num_workgroups * local;
errcode = clEnqueueNDRangeKernel(commands, mykernel, 1, NULL, &global, &local, 0, NULL, NULL);
答案 0 :(得分:3)
编译程序后,可以使用clGetProgramInfo获取其二进制代码,然后将其保存到文件中。
示例代码(未尝试编译,但应该是这些内容):
program = clCreateProgramWithSource(clctx, 1, &dumbkernelsource, NULL, &errcode);
errcode = clBuildProgram(program, env->num_devices, env->device, NULL, NULL, NULL);
int number_of_binaries;
char **binary;
int *binary_sizes;
errcode = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, NULL, 0, &number_of_binaries);
binary_sizes = new int[number_of_binaries];
binary = new char*[number_of_binaries];
errcode = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, binary_sizes, number_of_binaries*sizeof(int), &number_of_binaries);
for (int i = 0; i < number_of_binaries; ++i) binary[i] = new char[binary_sizes[i]];
errcode = clGetProgramInfo(program, CL_PROGRAM_BINARIES, binary, number_of_binaries*sizeof(char*), &number_of_binaries);
答案 1 :(得分:1)
最小可运行示例
从CL C源编译嵌入式矢量增量着色器,将二进制文件保存到a.bin
,加载二进制着色器并运行它:
./a.out
断言在程序结束时完成。
忽略CL C着色器,从a.bin
加载二进制文件并运行它:
./a.out 0
编译并运行:
gcc -ggdb3 -std=c99 -Wall -Wextra a.c -lOpenCL && ./a.out
在Ubuntu 16.10中测试,NVIDIA NVS5400,驱动程序375.39。
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include <CL/cl.h>
const char *source =
"__kernel void kmain(__global int *out) {\n"
" out[get_global_id(0)]++;\n"
"}\n"
;
#define BIN_PATH "a.bin"
char* common_read_file(const char *path, long *length_out) {
char *buffer;
FILE *f;
long length;
f = fopen(path, "r");
assert(NULL != f);
fseek(f, 0, SEEK_END);
length = ftell(f);
fseek(f, 0, SEEK_SET);
buffer = malloc(length);
if (fread(buffer, 1, length, f) < (size_t)length) {
return NULL;
}
fclose(f);
if (NULL != length_out) {
*length_out = length;
}
return buffer;
}
int main(int argc, char **argv) {
FILE *f;
char *binary;
cl_command_queue command_queue;
cl_context context;
cl_device_id device;
cl_int input[] = {1, 2}, errcode_ret, binary_status;
cl_kernel kernel, binary_kernel;
cl_mem buffer;
cl_platform_id platform;
cl_program program, binary_program;
const size_t global_work_size = sizeof(input) / sizeof(input[0]);
int use_cache;
long lenght;
size_t binary_size;
if (argc > 1) {
use_cache = !strcmp(argv[1], "0");
} else {
use_cache = 0;
}
/* Get the binary, and create a kernel with it. */
clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
command_queue = clCreateCommandQueue(context, device, 0, NULL);
if (use_cache) {
binary = common_read_file(BIN_PATH, &lenght);
binary_size = lenght;
} else {
program = clCreateProgramWithSource(context, 1, &source, NULL, NULL);
clBuildProgram(program, 1, &device, "", NULL, NULL);
kernel = clCreateKernel(program, "kmain", NULL);
clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_size, NULL);
binary = malloc(binary_size);
clGetProgramInfo(program, CL_PROGRAM_BINARIES, binary_size, &binary, NULL);
f = fopen(BIN_PATH, "w");
fwrite(binary, binary_size, 1, f);
fclose(f);
}
binary_program = clCreateProgramWithBinary(
context, 1, &device, &binary_size,
(const unsigned char **)&binary, &binary_status, &errcode_ret
);
free(binary);
clBuildProgram(binary_program, 1, &device, NULL, NULL, NULL);
binary_kernel = clCreateKernel(binary_program, "kmain", &errcode_ret);
/* Run the kernel created from the binary. */
buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(input), input, NULL);
clSetKernelArg(binary_kernel, 0, sizeof(buffer), &buffer);
clEnqueueNDRangeKernel(command_queue, binary_kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
clFlush(command_queue);
clFinish(command_queue);
clEnqueueReadBuffer(command_queue, buffer, CL_TRUE, 0, sizeof(input), input, 0, NULL, NULL);
/* Assertions. */
assert(input[0] == 2);
assert(input[1] == 3);
/* Cleanup. */
clReleaseMemObject(buffer);
clReleaseKernel(kernel);
clReleaseKernel(binary_kernel);
clReleaseProgram(program);
clReleaseProgram(binary_program);
clReleaseCommandQueue(command_queue);
clReleaseContext(context);
return EXIT_SUCCESS;
}
我强烈推荐cat a.bin
,其中包含用于此实现的人类可读(和可编辑)PTX程序集。
答案 2 :(得分:0)
正式的OpenCL编程指南书有一个很好的例子。还有一个Google代码项目opencl-book-samples,其中包含了本书中的代码。您要查找的示例是here。