我正在使用MKL
库实现稀疏矩阵乘法,并在各种Intel处理器(如Intel Xeon E5和Intel Xeon Phi)上进行基准测试。
虽然我能够在Xeon E5上以令人满意的结果进行基准测试,但每次在Xeon Phi上运行相同的代码时,我都会在被调用"mkl_dcsrmultcsr"
函数的3次迭代后获得分段错误。我无法弄清楚原因,请让我知道原因。
以下是代码
#include "stdio.h"
#include "stdlib.h"
#include "time.h"
#include "omp.h"
#include "mkl.h"
#include "mkl_spblas.h"
double timerval ()
{
struct timeval st;
gettimeofday(&st, NULL);
return (st.tv_sec+st.tv_usec*1e-6);
}
int main(){
double *nz, *nzc;
int *ia,*ja, *ic,*jc,*pos;
int info=1;
int i, j, k;
FILE *fp1,*fp2,*fp3,*fp4;
double avg_time = 0, s_time, e_time;
//open file to write results
//FILE *fp1;
char trans = 'N';
int sort = 1;
int m=4;
int iterations;
int request = 0;
/* iterate the loop for input size from 2exp3 to 2exp10 */
for (iterations=0; iterations<8; iterations++)
{
m *= 2; // increase the dimension of Matrix with every iteration
int n = m; // Assuming a square matrix.
int nzmax =m*n ;
double dense_const = 0.05;
int temp5, temp6,temp3,temp4;
int density=(m*n)*(dense_const);
//memory allocation for matrix A and B
nz = calloc((m*n),sizeof(double));
ia = calloc((m*n),sizeof(int));
ja = calloc((m*n),sizeof(int));
//memory allocation for product matrix C
nzc =calloc((m*n),sizeof(double));
ic = calloc((m*n),sizeof(int));
jc = calloc((m*n),sizeof(int));
//Configuration parameters
k=0;
//density of the sparse matrix to be created. Assume 5% density.
//position array for random initialisation of positions in input matrix
pos= calloc((m*n), sizeof(int));
int temp,temp1;
// printf("the density is %d\n",density);
// printf("check 1:\n");
//randomly initialise positions
for(i=0;i<density;i++)
{
temp1=rand()%(m*n);
pos[i]=temp1;
}
// printf("check 2:\n");
//sort the 'pos' array
for (i = 0 ; i < density; i++)
{
int d = i;
int t;
while ( d > 0 && pos[d] < pos[d-1])
{
t = pos[d];
pos[d] = pos[d-1];
pos[d-1] = t;
d--;
}
}
//printf("check 3:\n");
// initialise with non zero elements and extract column and row ptr vector
j=1;
ja[0]=1;
int p=0;
for(i = 0; i < density; i++)
{
temp=pos[i];
nz[k] = rand();
// nz[k] = 1;
ia[k] = temp%m;
k++;
p++;
temp5= pos[i];
temp6=pos[i+1];
temp3=temp5-(temp5%m);
temp4=temp6-(temp6%m);
if(!(temp3== temp4))
{
if((temp3+m==temp6))
{}
else
{
ja[j]=p+1;
j++;
}
}
}
printf("check1\n");
request = 0;
s_time = timerval();
for(i=0; i<1000;i++)
{
#pragma omp parallel
{
mkl_dcsrmultcsr(&trans, &request, &sort, &n, &n, &n, nz, ia, ja, nz, ia, ja, nzc, jc, ic, &nzmax, &info);
}
}
e_time = timerval();
avg_time = (e_time - s_time);
/* write the timing information in "output.txt"*/
avg_time = avg_time / 1000;
printf("check 5:\n");
if((fp2 = fopen("output.txt","a"))==NULL)
{
printf("error opening file\n");
}
//fseek(fp1,1000,SEEK_END);
fprintf (fp2, "\n Input size: %d x %d ,Time: %lf and density is %d and info is %d \n", m,n, avg_time, density,info);
fclose(fp2);
//mkl_free_buffers();
free(ja);
free(ia);
free(nz);
free(pos);
free(jc);
free(ic);
free(nzc);
}
return 0;
}
答案 0 :(得分:0)
您可以在调用乘法函数之前使用mkl_(thread_)free_buffers()。这对我来说很有用!