这个并行算法有什么问题?

时间:2014-12-29 13:31:07

标签: c++ c algorithm opencl

我试图在openCL中为L系统毕达哥拉斯树编写并行算法:

var:A,B; 
const: (,);
axiom:A;
rules:(B->BB),(A->B[A]A)

但我不能超过第9次迭代。第10次迭代返回无序字符串。这是我的内核:

#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_amd_printf : enable
__kernel void l_system(int string_lenght){}

__kernel void l_system_interation(int string_lenght, __global char *sentence, __local char                  *string, __global int * local_char_num)
{
int local_x = (int)get_local_id(0);
int local_size = (int)get_local_size(0);
int x = (int)get_global_id(0);
int size = (int)get_global_size(0);
int group = (int)get_group_id(0);
int local_mem_index;

if(x < string_lenght){
    //local mem index - offset for next group, copy char to local
    local_mem_index = local_x * 5;
    string[local_mem_index] = sentence[x];

    if(local_x == 0){
        //reset counter
        atomic_xchg(&local_char_num[group], 0);
        //atomic_add(&local_char_num[0], group);
    }
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);  


if(x < string_lenght){


    if(string[local_mem_index] == 'A'){

        atomic_add(&local_char_num[group], 5);
        string[local_mem_index] = 'B';
        string[local_mem_index + 1] = '(';
        string[local_mem_index + 2] = 'A';
        string[local_mem_index + 3] = ')';
        string[local_mem_index + 4] = 'A';  
    }
    else if(string[local_mem_index] == 'B'){
        atomic_add(&local_char_num[group], 2);
        string[local_mem_index + 1] = 'B'; 
        //reset 3rd char of local_mem
        string[local_mem_index + 2] = '0';

    }
    else{
    atomic_add(&local_char_num[group], 1);
    //reset 3rd char of local_mem
    string[local_mem_index + 2] = '0';
    string[local_mem_index + 2] = '0';

    }
}   
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);


//1 compute unit for every char from src
if(x < string_lenght){

    //local first compute unit writes to result whole group string
    if(local_x == 0){


        int j = 0;
        //find offset for write to result string
        if(x != 0){
            for(int l = 1;l <= group; l++)
            {
                j += atomic_xchg(&local_char_num[group-l], local_char_num[group-l]);
                //if(l == 0)                        
            }
            atomic_xchg(&local_char_num[99+group], local_char_num[group]);
        }

        for(int i = 0; i < local_size; i++){        
            //only valid chars
            if(string_lenght > (x+i)){

                local_mem_index = i * 5;

                //B rule, copy (,)
                if(string[local_mem_index+2] != 'A'){

                    sentence[j++] = string[local_mem_index];

                    if(string[local_mem_index] == 'B'){
                        sentence[j++] = string[local_mem_index+1];
                    }

                    continue;//B,(,); next index;  

                }
                else{ // A rule
                    sentence[j++] = string[local_mem_index];
                    sentence[j++] = string[local_mem_index+1];
                    sentence[j++] = string[local_mem_index+2];
                    sentence[j++] = string[local_mem_index+3];
                    sentence[j++] = string[local_mem_index+4];

                }//if 'A'
                //sentence[j] = 0;

            }//if x+i
        }//for

    }// lx == 0 
}
barrier(CLK_GLOBAL_MEM_FENCE);  
}

我认为,某些东西溢出,但无法找到...可能我的代码在主要内容有问题:

cl_int letter_count = 0;
cl_int next_letter_count = 1;
for (int i = 0; i < iter_count; i++)
{
    //printf("%s\n", sentence_init);
    letter_count = next_letter_count;
    next_letter_count = STRING_LENGTH_PAR((i + 1));

    printf("in count: %d out count: %d\n", letter_count, next_letter_count);
    CheckOpenCLError(clSetKernelArg(kernel_iteration, 0, sizeof(cl_int), &letter_count), "clSetKernelArg: letter_count");

    CheckOpenCLError(clSetKernelArg(kernel_iteration, 2, sizeof(cl_char)* (local * RULE_SIZE + 1), NULL), "clSetKernelArg: tmp_string");


    CheckOpenCLError(clEnqueueNDRangeKernel(queue, kernel_iteration, 1, NULL, &global, &local, 0, NULL, &kernel_iteration_event), "clEnqueueNDRangeKernel: kernel_iteration");

    CheckOpenCLError(clFinish(queue), "clFinish");


    kernel_computing_time += getEventTime(kernel_iteration_event);

}

CheckOpenCLError(clEnqueueReadBuffer(queue, sentence_dev, CL_TRUE, 0, sizeof(cl_char)* (next_letter_count), sentence_result, 0, NULL, &result_iteration_event), "clEnqueueReadBuffer: result_iteration_event");
cl_int *p = (cl_int*)malloc(sizeof(cl_int)*(STRING_LENGTH_PAR(iter_count)));
CheckOpenCLError(clEnqueueReadBuffer(queue, p_dev, CL_TRUE, 0, sizeof(cl_int)* (STRING_LENGTH_PAR(iter_count)), p, 0, NULL, &result_iteration_event), "clEnqueueReadBuffer: result_iteration_event");

0 个答案:

没有答案