在OpenCl中具有大型数组的结构

时间:2016-03-26 00:38:33

标签: c opencl c99

我是OpenCl的新手。我正在尝试在OpenCl中模拟向量。在这方面,我有以下结构:

#define VECTOR_INIT_CAPACITY 126
typedef struct {
    int capacity;
    int total;
    __local void* items[VECTOR_INIT_CAPACITY];
} vector;

void vector_init( vector *v){
    v->capacity = VECTOR_INIT_CAPACITY;
    v->total = 0;
}

int vector_total(vector *v) {
    return v->total; 
}

void vector_add( vector *v, __local  void* item) {
   v->items[v->total] = item;
   v->total++;
}

__local void* vector_get(  vector *v, int index) {
    if (index >= 0 && index < v->total)
        return (v->items[index]);
    return NULL;
}

关于结构对齐和OpenCl限制,是否可以为vector提供这样的结构。我问这个问题,因为当我将一些项插入到矢量对象中时,我的代码崩溃了。

例如,代码崩溃了以下内核:

__kernel void packet_routing(__global byte* heap_, __global uint* next,  __global byte* headers,__global int* packet_begining_index,
                         __global int* packet_length,__global uint* test_result){

    int gid = get_global_id(0);

    __local byte local_heap[50000];

    __local Heap heap ;
    heap.ptr = local_heap;
    heap.next = 0;//gid*MAX_HEAP_SIZE_FOR_EACH_KERNEL;

    vector layers;

    vector_init(&layers);
    for ( int i = 0 ; i < 120; i ++){
        __local int *x = malloc_(&heap,sizeof(int));
        *x = i;
        vector_add(&layers,(__local void*)x);
    }

    __local int *z = (__local int*) vector_get(&layers,0);
    test_result[gid] = *z;
} 

我还需要说,由于我正在研究的项目,我必须按照以下方式模拟OpenCl中的malloc函数,它就像每个GPU内核的本地堆一样:

typedef struct /*__attribute__((__packed__))*/{
    __local
    byte* ptr;
    int next;
}Heap;

__local void* malloc_(__local Heap *heap, int size) {

    uint old = heap->next;
    heap->next = heap->next + size;
    return heap->ptr + old;
 }

0 个答案:

没有答案