我学习openCL并且尝试testdimension,但它们对我不起作用。这是内核代码:
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
typedef struct _data {
unsigned long wId; // group_id
unsigned long iId[4]; // global_item_id
} DATA;
__kernel void tKernel(__global DATA *x, __global DATA *y, __global DATA *z) {
// x dimension
int xGrId = get_group_id(0);
int xLId = get_local_id(0);
int xGlId = get_global_id(0);
// y dimension
int yGrId = get_group_id(1);
int yLId = get_local_id(1);
int yGlId = get_global_id(1);
// z dimension
int zGrId = get_group_id(2);
int zLId = get_local_id(2);
int zGlId = get_global_id(2);
x += xGrId;
x->wId = xGrId;
x->iId[xLId] = xGlId;
y += yGrId;
y->wId = yGrId;
y->iId[yLId] = yGlId;
z += zGrId;
z->wId = zGrId;
z->iId[zLId] = zGlId;
}
这是我的主持人代码:
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define GLOBAL_ITEM_SIZE (16)
#define LOCAL_ITEM_SIZE (4)
#define MAX_SOURCE_SIZE (0x100000)
typedef struct _data {
unsigned long wId;
unsigned long iId[LOCAL_ITEM_SIZE];
} DATA;
int main()
{
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem xMobj = NULL;
cl_mem yMobj = NULL;
cl_mem zMobj = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
size_t group_size = GLOBAL_ITEM_SIZE / LOCAL_ITEM_SIZE;
DATA x[group_size];
DATA y[group_size];
DATA z[group_size];
FILE *fp;
const char fileName[] = "./kernel.cl";
size_t source_size;
char *source_str;
/* Load kernel source file */
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(EXIT_FAILURE);
}
source_str = (char *)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
/* Get Platform/Device Information */
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
assert(ret == CL_SUCCESS);
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
assert(ret == CL_SUCCESS);
/* Create OpenCL Context */
context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
/* Create command queue */
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
/* Create Buffer Objects */
xMobj = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(x), NULL, &ret);
yMobj = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(y), NULL, &ret);
zMobj = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(z), NULL, &ret);
/* Create kernel program from source file */
program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
assert(ret == CL_SUCCESS);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
if (ret != CL_SUCCESS) {
printf("\nFail to build the program\n");
char buffer[10240];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
printf("%s\n", buffer);
exit(EXIT_FAILURE);
}
/* Create data parallel OpenCL kernel */
kernel = clCreateKernel(program, "tKernel", &ret);
assert(ret == CL_SUCCESS);
/* Set OpenCL kernel arguments */
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&xMobj);
assert(ret == CL_SUCCESS);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&yMobj);
assert(ret == CL_SUCCESS);
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&zMobj);
assert(ret == CL_SUCCESS);
size_t global_item_size[3] = {GLOBAL_ITEM_SIZE, GLOBAL_ITEM_SIZE, 1};
size_t local_item_size[3] = {LOCAL_ITEM_SIZE, LOCAL_ITEM_SIZE, 1};
/* Execute OpenCL kernel as data parallel */
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
global_item_size, local_item_size, 0, NULL, NULL);
if (ret == CL_INVALID_WORK_GROUP_SIZE) {
printf("Invalid work group size: error when compute group size: %d/%d", GLOBAL_ITEM_SIZE, LOCAL_ITEM_SIZE);
exit(EXIT_FAILURE);
}
/* Transfer result to host */
ret = clEnqueueReadBuffer(command_queue, xMobj, CL_TRUE, 0, sizeof(x), x, 0, NULL, NULL);
assert(ret == CL_SUCCESS);
ret = clEnqueueReadBuffer(command_queue, yMobj, CL_TRUE, 0, sizeof(y), y, 0, NULL, NULL);
printf("%d\n", ret);
assert(ret == CL_SUCCESS);
ret = clEnqueueReadBuffer(command_queue, zMobj, CL_TRUE, 0, sizeof(z), z, 0, NULL, NULL);
assert(ret == CL_SUCCESS);
/* Display Results */
int i;
int j;
printf("X dimension:\n");
for (i = 0; i < group_size; i++) {
printf("%d: -> group_id %lu ~> work_item_ids: ", i, x[i].wId);
for (j = 0; j < LOCAL_ITEM_SIZE; j++)
printf("%2lu, ", x[i].iId[j]);
printf("\n");
}
printf("\n");
printf("Y dimension:\n");
for (i = 0; i < group_size; i++) {
printf("%d: -> group_id %lu ~> work_item_ids: ", i, y[i].wId);
for (j = 0; j < LOCAL_ITEM_SIZE; j++)
printf("%2lu, ", y[i].iId[j]);
printf("\n");
}
printf("\n");
printf("Z dimension:\n");
for (i = 0; i < group_size; i++) {
printf("%d: -> group_id %lu ~> work_item_ids: ", i, z[i].wId);
for (j = 0; j < LOCAL_ITEM_SIZE; j++)
printf("%2lu, ", z[i].iId[j]);
printf("\n");
}
/* Finalization */
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(xMobj);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free(source_str);
return 0;
}
我在Mac OS X 10.6上构建代码。输出是:
X dimension:
0: -> group_id 0 ~> work_item_ids: 0, 1, 2, 3,
1: -> group_id 1 ~> work_item_ids: 4, 5, 6, 7,
2: -> group_id 2 ~> work_item_ids: 8, 9, 10, 11,
3: -> group_id 3 ~> work_item_ids: 12, 13, 14, 15,
Y dimension:
0: -> group_id 0 ~> work_item_ids: 0, 0, 0, 0,
1: -> group_id 0 ~> work_item_ids: 0, 0, 0, 0,
2: -> group_id 0 ~> work_item_ids: 0, 0, 0, 0,
3: -> group_id 0 ~> work_item_ids: 0, 0, 0, 0,
Z dimension:
0: -> group_id 0 ~> work_item_ids: 0, 0, 0, 0,
1: -> group_id 0 ~> work_item_ids: 0, 0, 0, 0,
2: -> group_id 0 ~> work_item_ids: 0, 0, 0, 0,
3: -> group_id 0 ~> work_item_ids: 0, 0, 0, 0,
如您所见,Y为空(Z维度必须为空,因为我没有设置它)。 我尝试将内核中的 y * 参数设置为 x 值,结果类似于 x 维度,因此内存副本没问题。任何人都可以帮助我吗?
答案 0 :(得分:1)
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
global_item_size, local_item_size, 0, NULL, NULL);
此调用的第三个参数是NDRange空间的维数,您将其设置为1,即一维。将其设置为2。