我正在尝试在OpenCL中运行我的第一个代码,并且正在使用OpenCL in Action一书。这是本书样本的代码。此代码不运行,显然在设置内核参数时失败。当我运行代码时,打印输出是:“无法将缓冲区设置为内核参数”。该代码在AMD上运行良好,但是当我在NVIDIA平台上运行它时它无效。有什么想法为什么代码不能正常运行?
#define _CRT_SECURE_NO_WARNINGS
#define PROGRAM_FILE "blank.cl"
#define KERNEL_FUNC "blank"
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <string.h>
#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
/* Find a GPU or CPU associated with the first available platform */
cl_device_id create_device() {
cl_platform_id platform;
cl_device_id dev;
int err;
/* Identify a platform */
err = clGetPlatformIDs(1, &platform, NULL);
if (err < 0) {
perror("Couldn't identify a platform");
exit(1);
}
/* Access a device */
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev, NULL);
if (err == CL_DEVICE_NOT_FOUND) {
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL);
}
if (err < 0) {
perror("Couldn't access any devices");
exit(1);
}
return dev;
}
/* Create program from a file and compile it */
cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {
cl_program program;
FILE *program_handle;
char *program_buffer, *program_log;
size_t program_size, log_size;
int err;
/* Read program file and place content into buffer */
program_handle = fopen(filename, "r");
if (program_handle == NULL) {
perror("Couldn't find the program file");
exit(1);
}
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
rewind(program_handle);
program_buffer = (char*)malloc(program_size + 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size, program_handle);
fclose(program_handle);
/* Create program from file */
program = clCreateProgramWithSource(ctx, 1,
(const char**)&program_buffer, &program_size, &err);
if (err < 0) {
perror("Couldn't create the program");
exit(1);
}
free(program_buffer);
/* Build program */
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err < 0) {
/* Find size of log and print to std output */
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
0, NULL, &log_size);
program_log = (char*)malloc(log_size + 1);
program_log[log_size] = '\0';
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
log_size + 1, program_log, NULL);
printf("%s\n", program_log);
free(program_log);
exit(1);
}
return program;
}
int main() {
/* OpenCL data structures */
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel kernel;
cl_int i, j, err;
/* Data and buffers */
float data_one[100], data_two[100], result_array[100];
cl_mem buffer_one, buffer_two;
void* mapped_memory;
/* Initialize arrays */
for (i = 0; i<100; i++) {
data_one[i] = 1.0f*i;
data_two[i] = -1.0f*i;
result_array[i] = 0.0f;
}
/* Create a device and context */
device = create_device();
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if (err < 0) {
perror("Couldn't create a context");
exit(1);
}
/* Build the program and create the kernel */
program = build_program(context, device, PROGRAM_FILE);
kernel = clCreateKernel(program, KERNEL_FUNC, &err);
if (err < 0) {
perror("Couldn't create a kernel");
exit(1);
};
/* Create buffers */
buffer_one = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_COPY_HOST_PTR, sizeof(data_one), data_one, &err);
if (err < 0) {
perror("Couldn't create a buffer object");
exit(1);
}
buffer_two = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_COPY_HOST_PTR, sizeof(data_two), data_two, NULL);
/* Set buffers as arguments to the kernel */
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer_one);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &buffer_two);
if (err < 0) {
perror("Couldn't set the buffer as the kernel argument");
exit(1);
}
/* Create a command queue */
queue = clCreateCommandQueue(context, device, 0, &err);
if (err < 0) {
perror("Couldn't create a command queue");
exit(1);
};
/* Enqueue kernel */
err = clEnqueueTask(queue, kernel, 0, NULL, NULL);
if (err < 0) {
perror("Couldn't enqueue the kernel");
exit(1);
}
/* Enqueue command to copy buffer one to buffer two */
err = clEnqueueCopyBuffer(queue, buffer_one, buffer_two, 0, 0,
sizeof(data_one), 0, NULL, NULL);
if (err < 0) {
perror("Couldn't perform the buffer copy");
exit(1);
}
/* Enqueue command to map buffer two to host memory */
mapped_memory = clEnqueueMapBuffer(queue, buffer_two, CL_TRUE,
CL_MAP_READ, 0, sizeof(data_two), 0, NULL, NULL, &err);
if (err < 0) {
perror("Couldn't map the buffer to host memory");
exit(1);
}
/* Transfer memory and unmap the buffer */
memcpy(result_array, mapped_memory, sizeof(data_two));
err = clEnqueueUnmapMemObject(queue, buffer_two, mapped_memory,
0, NULL, NULL);
if (err < 0) {
perror("Couldn't unmap the buffer");
exit(1);
}
/* Display updated buffer */
for (i = 0; i<10; i++) {
for (j = 0; j<10; j++) {
printf("%6.1f", result_array[j + i * 10]);
}
printf("\n");
}
/* Deallocate resources */
clReleaseMemObject(buffer_one);
clReleaseMemObject(buffer_two);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseProgram(program);
clReleaseContext(context);
return 0;
}
这里是一个单独的blank.cl文件中的内核:
__kernel void blank(__global float *a, __global float *b) {
}
答案 0 :(得分:1)
我怀疑你在Windows上运行它。此代码没有考虑Windows EOL,它是2个字符:&#39; \ r \ n&#39;从文件中读取内核后的最后一个字符是&#39; \ r&#39;哪个OpenCL编译器无法消化。
有2个选项 - 您可以将blank.cl文件转换为unix格式或
在program_buffer[program_size-1] = '\0';
之后添加program_buffer[program_size] = '\0';
。
答案 1 :(得分:0)
我刚才意识到问题是因为单独的* .cl文件中的内核声明错误。代码工作正常。我刚才有错误的内核!