Question

我试图在C中实现2D卷积。我有2个实现：第一个是顺序的，第二个是使用openMP的并行实现。问题是并行代码的运行时间总是超过顺序代码的运行时。原始算法是使用std :: vector在C ++中实现的，并且工作正常，但我尝试使用指针和动态内存分配在C中实现它。

以下是算法中采取的步骤：

1-从单独的文件中读取input_matrix和内核。

2-计算卷积

3-将结果写入文件。

我为input_matrix，kernel和output_matrix使用动态内存分配。边框列和边框行假定为邻居，并假设内核具有奇数个行和列。

这是顺序卷积：

int index1, index2, a, b;

    for(int z = 0; z<rows*columns;++z)
        *(output_matrix + z) = 0;

    clock_t begin = clock();

    for(int x=0;x<rows;++x){
        for(int y=0;y<columns;++y){
            for(int i=0;i<krows;++i){
                for(int j=0;j<kcolumns;++j){
                    a=x+i-krows/2;  
                    b=y+j-kcolumns/2;                   
                    if(a<0)
                        index1=rows+a;
                    else if(a>rows-1)
                        index1=a-rows;
                    else 
                        index1=a;

                    if(b<0)
                        index2=columns+b;
                    else if(b>columns-1)
                        index2=b-columns;
                    else 
                        index2=b;

                    output_matrix[x*columns+y]+=input_matrix[index1*columns+index2]*kernel_matrix[i*kcolumns+j];

                }
            }

        }
    }

    clock_t end = clock();

    printf("Sequential runtime = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);

以下是卷积的并行代码：

int index1, index2, a, b,x,y;

for(int z = 0; z<rows*columns;++z)
    *(output_matrix + z) = 0;

clock_t begin = clock();
#pragma omp parallel
# pragma omp for private(x,y,a, b, index1, index2) 
for(int z=0;z<rows*columns;++z){
    x=z/columns;
    y=z%columns;
    for(int i=0;i<krows;++i){
        for(int j=0;j<kcolumns;++j){
            a=x+i-krows/2;  
            b=y+j-kcolumns/2;                   
            if(a<0)
                index1=rows+a;
            else if(a>rows-1)
                index1=a-rows;
            else 
                index1=a;

            if(b<0)
                index2=columns+b;
            else if(b>columns-1)
                index2=b-columns;
            else 
                index2=b;

            output_matrix[x*columns+y]+=input_matrix[index1*columns+index2]*kernel_matrix[i*kcolumns+j];

        }
    }
}

clock_t end = clock();
printf("Parallel runtime using OMP= %f\n", (double)(end - begin) / CLOCKS_PER_SEC);

使用openMP并行2D卷积

0 个答案: