我正在使用OpenCL并行化LBM,并且遇到了如何为自定义数据类型生成内核头文件作为内核参数的问题。我根据需要(rebound.cl
)在内核文件(typedef struct {...} t_speed;
)中定义数据类型,并在头文件中生成数据类型t_speed
,这显然在语法上不正确,并且构建随后失败。虽然这比一个主要问题更令人烦恼,但修复它会节省很多时间!
内核文件: rebound.cl
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#elif defined(cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#else
#error "Double precision floating point not supported by OpenCL implementation."
#endif
#define NSPEEDS 5
typedef struct {
double speeds[NSPEEDS];
} t_speed;
__kernel void rebound (__global t_speed* cells,
__global t_speed* tmp_cells,
__global const unsigned char* obstacles,
const unsigned short int count)
{
int i = get_global_id(0);
if (i < count) {
if (obstacles[i]) {
cells[i].speeds[1] = tmp_cells[i].speeds[3]; /* East -> West */
cells[i].speeds[3] = tmp_cells[i].speeds[1]; /* West -> East*/
cells[i].speeds[2] = tmp_cells[i].speeds[4]; /* North -> South */
cells[i].speeds[4] = tmp_cells[i].speeds[2]; /* South -> North */
}
}
}
内核头文件: rebound.cl.h
/***** GCL Generated File *********************/
/* Automatically generated file, do not edit! */
/**********************************************/
#include <OpenCL/opencl.h>
typedef struct {
double [5] speeds;
} _t_speed_unalign;
typedef _t_speed_unalign __attribute__ ((aligned(8))) t_speed;
extern void (^rebound_kernel)(const cl_ndrange *ndrange, t_speed* cells, t_speed* tmp_cells, cl_uchar* obstacles, cl_ushort count);