使用C ++中的openmp循环平铺矩阵乘法

时间:2015-06-22 12:19:23

标签: c++ parallel-processing openmp matrix-multiplication

我刚开始使用OpenMP并熟悉基础知识。 循环平铺功能在串行执行时工作得更快,但是当我尝试使用OpenMP时,它会变得很慢。

循环平铺是我从循环平铺的维基百科页面以及MIT-OCW上的视频中研究过的。

我想知道如何正确实现这一点,以及为什么我的代码无效。

#include <iostream>
#include <stdio.h>
#include <omp.h>
#include <time.h>

using namespace std;

#define SIZE 10000
#define N 100
#define S 25

int n = N;
int s = S;

double a[SIZE],b[SIZE],c[SIZE];

// Initializing the matrices 

void mat_init(double *a, double *b, int n)
{
    for(int i=0; i<n; i++)
        for(int j=0; j<n; j++)
            a[i*n + j] = 1;

    for(int i=0; i<n; i++)
        for(int j=0; j<n; j++)
            b[i*n + j] = 2;

}


void mat_multi(double *a, double *b, double *c, int n)
{
    //double start_t = omp_get_wtime();
    clock_t start=clock();

    int i,j,k;

    #pragma omp num_threads(5) for private(i,j,k)
    for( i=0; i<n; i++)
        for( j=0; j<n; j++)
            for( k=0; k<n; k++)
                c[i*n+j]+=a[i*n+k]*b[k*n+j];

    start = clock() - start;

    double ms = ((double)(start)*1000)/CLOCKS_PER_SEC;
    //double stop_t = omp_get_wtime();
    cout<<"Naive multiplication requires "<<ms<<"ms"<<endl;
}

void mat_print(double *a, int n)
{
    cout<<endl<<endl<<endl<<"************************************************************"<<endl;

    for (int i = 0; i < n; ++i)
    {
        cout<<endl;
        for (int j = 0; j < n; ++j)
        {
            /* code */
            cout<<a[i*n+j]<<" ";
        }
    }

    cout<<endl<<endl<<endl<<"************************************************************"<<endl;
}

void mat_empty(double *a, int n)
{
    for (int i = 0; i < n; ++i)
    {
        /* code */
        for (int j = 0; j < n; ++j)
        {
            /* code */
            c[i*n+j]=0;
        }
    }
}

void tiled_mat_multiply(double *a, double *b, double *c, int n)
{
    int i,j,k,i1,j1,k1,tid;

    clock_t start = clock();

    double start_t,stop_t;

    omp_set_nested(1);
    #pragma omp parallel shared(a,b,c) private(i1,j1,k1,i,j,k,tid) num_threads(omp_get_num_procs()) 
    {

        /*
        tid = omp_get_thread_num();

        if(tid == 0)
        {
            cout<<"Master thread encountered "<<endl<<endl;
            start_t = omp_get_wtime();
        }
        */  

        #pragma omp for 
        for ( i1 = 0; i1 < n; i1+=s)
            for ( j1 = 0; j1 < n; j1+=s)
                for ( k1 = 0; k1 < n; k1+=s)
                    for( i=i1; i <i1+s && i<n; i++)
                        for ( j=j1; j< j1+s && j<n; ++j)
                            for( k=k1; k< k1+s && k<n; ++k)
                                c[i*n+j]+=a[i*n+k]*b[k*n+j];
    }


    /*if(tid==0)
    {
        stop_t = omp_get_wtime();
    }*/

    start = clock() - start;
    double ms = ((double)(start)*1000)/CLOCKS_PER_SEC;

    cout<<"Tiled matrix multiplication requires "<<ms<<"ms"<<endl;

}

int main()
{
    mat_init(a,b,n);
    mat_multi(a,b,c,n);
    mat_print(c,n);
    mat_empty(c,n);
    tiled_mat_multiply(a,b,c,n);
    mat_print(c,n);
    return 0;
}

0 个答案:

没有答案