Question

我正在使用MKL库实现稀疏矩阵乘法，并在各种Intel处理器（如Intel Xeon E5和Intel Xeon Phi）上进行基准测试。虽然我能够在Xeon E5上以令人满意的结果进行基准测试，但每次在Xeon Phi上运行相同的代码时，我都会在被调用"mkl_dcsrmultcsr"函数的3次迭代后获得分段错误。我无法弄清楚原因，请让我知道原因。以下是代码

    #include "stdio.h"
    #include "stdlib.h"
    #include "time.h"
    #include "omp.h"
    #include "mkl.h"
    #include "mkl_spblas.h"

    double timerval ()
    {
        struct timeval st;
        gettimeofday(&st, NULL);
        return (st.tv_sec+st.tv_usec*1e-6);
    }

    int main(){

    double *nz, *nzc;
    int *ia,*ja, *ic,*jc,*pos;
    int info=1;

    int i, j, k;
    FILE *fp1,*fp2,*fp3,*fp4;

    double avg_time = 0, s_time, e_time;
    //open file to write results
    //FILE *fp1;

    char trans = 'N';   
    int sort = 1;

    int m=4;
    int iterations;
    int request = 0;

    /* iterate the loop for input size from 2exp3 to 2exp10 */
    for (iterations=0; iterations<8; iterations++)
    {
        m *= 2; // increase the dimension of Matrix with every iteration
        int n = m; // Assuming a square matrix.
        int nzmax =m*n ;
        double dense_const = 0.05;
        int temp5, temp6,temp3,temp4;
        int density=(m*n)*(dense_const);

        //memory allocation for matrix A and B
        nz = calloc((m*n),sizeof(double));
        ia = calloc((m*n),sizeof(int));
        ja = calloc((m*n),sizeof(int));

        //memory allocation for product matrix C
        nzc =calloc((m*n),sizeof(double));
        ic = calloc((m*n),sizeof(int));
        jc = calloc((m*n),sizeof(int));
        //Configuration parameters
        k=0;
        //density of the sparse matrix to be created. Assume 5% density.
        //position array for random initialisation of positions in input matrix
        pos= calloc((m*n), sizeof(int));
        int temp,temp1;

        //  printf("the density is %d\n",density);
        //  printf("check 1:\n");

        //randomly initialise positions
        for(i=0;i<density;i++)
        {
            temp1=rand()%(m*n);
            pos[i]=temp1;   
        }
        //  printf("check 2:\n");

        //sort the 'pos' array
        for (i = 0 ; i < density; i++) 
        {
            int d = i;
            int t;

            while ( d > 0 && pos[d] < pos[d-1]) 
            {
                t = pos[d];
                pos[d]   = pos[d-1];
                pos[d-1] = t;
                d--;
            }
        }
        //printf("check 3:\n");

        // initialise with non zero elements and extract column and row ptr vector
        j=1;
        ja[0]=1;

        int p=0;
        for(i = 0; i < density; i++)
        {
            temp=pos[i];
            nz[k] = rand();
            // nz[k] = 1;

            ia[k] = temp%m;
            k++;
            p++;
            temp5= pos[i];
            temp6=pos[i+1];

            temp3=temp5-(temp5%m);
            temp4=temp6-(temp6%m);

            if(!(temp3== temp4))
            {   
                if((temp3+m==temp6))
                            {}

                else    
                {   
                    ja[j]=p+1;
                    j++;
                }
            }       
        }
        printf("check1\n");

        request = 0;
        s_time = timerval();
        for(i=0; i<1000;i++)
        {       
            #pragma omp parallel
            {
             mkl_dcsrmultcsr(&trans, &request, &sort, &n, &n, &n, nz, ia, ja, nz, ia, ja, nzc, jc, ic, &nzmax, &info);
            }
        }
        e_time = timerval();
        avg_time = (e_time - s_time);

        /* write the timing information in "output.txt"*/
        avg_time = avg_time / 1000;

        printf("check 5:\n");
        if((fp2 = fopen("output.txt","a"))==NULL)
        {
            printf("error opening file\n");
        }
        //fseek(fp1,1000,SEEK_END);
        fprintf (fp2, "\n Input size: %d x %d ,Time: %lf and density is %d and info is %d \n", m,n, avg_time, density,info); 
        fclose(fp2);

        //mkl_free_buffers();

        free(ja);
        free(ia);
        free(nz);

        free(pos);  
        free(jc);
        free(ic);
        free(nzc);
    }
    return 0;
}

Answer 1

您可以在调用乘法函数之前使用mkl_（thread_）free_buffers（）。这对我来说很有用！

使用MKL进行稀疏矩阵乘法

1 个答案: