我是OpenCl的新手。我正在尝试在OpenCl中模拟向量。在这方面,我有以下结构:
#define VECTOR_INIT_CAPACITY 126
typedef struct {
int capacity;
int total;
__local void* items[VECTOR_INIT_CAPACITY];
} vector;
void vector_init( vector *v){
v->capacity = VECTOR_INIT_CAPACITY;
v->total = 0;
}
int vector_total(vector *v) {
return v->total;
}
void vector_add( vector *v, __local void* item) {
v->items[v->total] = item;
v->total++;
}
__local void* vector_get( vector *v, int index) {
if (index >= 0 && index < v->total)
return (v->items[index]);
return NULL;
}
关于结构对齐和OpenCl限制,是否可以为vector提供这样的结构。我问这个问题,因为当我将一些项插入到矢量对象中时,我的代码崩溃了。
例如,代码崩溃了以下内核:
__kernel void packet_routing(__global byte* heap_, __global uint* next, __global byte* headers,__global int* packet_begining_index,
__global int* packet_length,__global uint* test_result){
int gid = get_global_id(0);
__local byte local_heap[50000];
__local Heap heap ;
heap.ptr = local_heap;
heap.next = 0;//gid*MAX_HEAP_SIZE_FOR_EACH_KERNEL;
vector layers;
vector_init(&layers);
for ( int i = 0 ; i < 120; i ++){
__local int *x = malloc_(&heap,sizeof(int));
*x = i;
vector_add(&layers,(__local void*)x);
}
__local int *z = (__local int*) vector_get(&layers,0);
test_result[gid] = *z;
}
我还需要说,由于我正在研究的项目,我必须按照以下方式模拟OpenCl中的malloc函数,它就像每个GPU内核的本地堆一样:
typedef struct /*__attribute__((__packed__))*/{
__local
byte* ptr;
int next;
}Heap;
__local void* malloc_(__local Heap *heap, int size) {
uint old = heap->next;
heap->next = heap->next + size;
return heap->ptr + old;
}