我有一个Matlab mex函数,它重复调用一个名为calculate().
的C函数我创建了两个版本的函数:
版本A:每次mex()
调用calculate()
时,它只传递输入参数,calculate()
所需的所有内存都在calculate()
内分配和释放 - 每个时间!
版本B:calculate()
所需的内存在mex()
的开头分配,指针传递给calculate()
。内存仅在mex()
结束时释放。换句话说,分配/释放只对整个企业进行一次。
根据内存分配需要时间的概念,我根据天真的假设创建了版本B,这应该可以提高速度。但实际上它的执行时间增加了大约5倍!那里发生了什么?
以下是版本B的实际代码 - 您可以从我最初为版本A分配内存的注释中看到。
/* MEX business*/
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){
/* mexPrintf("Inside the mex function of CalcMz2.\n");
mexEvalString("drawnow;");*/
/*declare local variables*/
struct InputParameters voxelPars;
InputParameters *point2pars;
struct Zcontrast contrast;
struct Zcontrast* ptr2contrast;
ptr2contrast = &contrast;
mxArray *in, *out;
mxArray *temp;
double *output;
int nFields;
int i,j,k;
in = mxDuplicateArray(prhs[0]); /*'in' now is a copy of the input struct*/
nFields = mxGetNumberOfFields(prhs[0]);
if (nFields != 39) mexPrintf("Error: the number of fields in the input struct is incorrect.\n");
//associate outputs
out = plhs[0] = mxCreateDoubleScalar(0.0);
output = mxGetPr(out);
/*Passed variables memory allocation*/
double **pntrA;
pntrA = (double **)mxCalloc(18, sizeof(double*));
for (i=0; i <18; i++){
pntrA[i] = (double *) mxCalloc(18, sizeof(double));
}
double *pntrAinvB;
pntrAinvB = (double *) mxCalloc(18, sizeof(double));
/*used by MatrInv()*/
double **CopyOfMatrix;
CopyOfMatrix = (double**)mxCalloc(18, sizeof(double*));
for (i=0; i<18; i++){
CopyOfMatrix[i] = (double*)mxCalloc(18, sizeof(double));
}
double *vector;
double *col;
int *indx;
vector = (double *)mxCalloc(18, sizeof(double));
indx = (int *)mxCalloc(18, sizeof(int));
col = (double *)mxCalloc(18, sizeof(double));
/*used by CalculateY()*/
double *Aat;
Aat = (double*)mxCalloc(18*18, sizeof(double));
double *Aate;
Aate = (double*)mxCalloc(18*18, sizeof(double));
double *sum;
sum = (double*)mxCalloc(18, sizeof(double));
double *product;
product = (double*)mxCalloc(18, sizeof(double));
/*Reassign values passed from input struct*/
temp = mxGetFieldByNumber(prhs[0],0,0);
voxelPars.ampl = mxGetPr(temp);
temp = mxGetFieldByNumber(prhs[0],0,1);
voxelPars.phi = mxGetPr(temp);
temp = mxGetFieldByNumber(prhs[0],0,2);
voxelPars.count1 = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,3);
voxelPars.pw1 = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,4);
voxelPars.b1 = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,5);
voxelPars.puloffsetppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,6);
voxelPars.pw1dc = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,7);
voxelPars.cf = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,8);
voxelPars.M0w = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,9);
voxelPars.T1a = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,10);
voxelPars.T2a = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,11);
voxelPars.offsetappm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,12);
voxelPars.bwfraction = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,13);
voxelPars.M0a = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,14);
voxelPars.M0bw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,15);
voxelPars.T1bw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,16);
voxelPars.T2bw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,17);
voxelPars.offsetbwppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,18);
voxelPars.exratebw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,19);
voxelPars.M0b = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,20);
voxelPars.T1b = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,21);
voxelPars.T2b = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,22);
voxelPars.offsetbppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,23);
voxelPars.exrateb = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,24);
voxelPars.M0c = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,25);
voxelPars.T1c = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,26);
voxelPars.T2c = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,27);
voxelPars.offsetcppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,28);
voxelPars.exratec = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,29);
voxelPars.M0d = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,30);
voxelPars.T1d = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,31);
voxelPars.T2d = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,32);
voxelPars.offsetdppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,33);
voxelPars.exrated = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,34);
voxelPars.M0e = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,35);
voxelPars.T1e = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,36);
voxelPars.T2e = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,37);
voxelPars.offseteppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,38);
voxelPars.exratee = mxGetScalar(temp);
point2pars = &voxelPars;
/*Initialize contrast's values to zero*/
ptr2contrast->SimMza = ptr2contrast->SimMzb = ptr2contrast->contrastz = 1.0;
/* call calculate()*/
calculate(point2pars, ptr2contrast, pntrA, pntrAinvB, CopyOfMatrix,vector, col, indx, Aat, Aate, sum, product);
*output = ptr2contrast->contrastz;
//mexPrintf("The contrast has been calculated as :%f.\n", ptr2contrast->contrastz);
/*Free memory
mxFree(pntrA); mxFree(pntrAinvB);*/
mxFree(CopyOfMatrix); mxFree(Aat); mxFree(Aate);
mxFree(vector);mxFree(indx);mxFree(col);
mxFree(sum); mxFree(product);
}//end mex function
void calculate (struct InputParameters *voxelPars1, struct Zcontrast *contrast, double **pntrA,double *pntrAinvB, double **CopyOfMatrix, double *vector, double *col,int *indx, double *Aat, double *Aate, double *sum, double *product){
/*Local variables*/
int i, j, k, n1, m;
double pwms, pw1delay;
int npul;
double timeStepSize;
double W;
double Wa, Wbw, Wb, Wc, Wd, We;
double Cbw, Cb, Cc, Cd, Ce;
double M0a, M0bw, M0b, M0c, M0d, M0e; //declaring these as locals just to make life easier
double pa, pbw, pb, pc, pd, pe;
double Cabw, Cab, Cac, Cad, Cae;
double k1a, k1bw, k1b, k1c, k1d, k1e;
double k2a, k2bw, k2b, k2c, k2d, k2e;
// double **pntrA;
// pntrA = (double **)mxCalloc(18, sizeof(double*));
// for (i=0; i <18; i++){
// pntrA[i] = (double *) mxCalloc(18, sizeof(double));
// }
//
double AdinvB[18];
double AinvB[18];
for(i=0; i<18; i++){
AdinvB[i] = 0.0;
AinvB[i] = 0.0;
}
//
// /*Passed variables memory allocation*/
// double **CopyOfMatrix; /*used by MatriInv()*/
// CopyOfMatrix = (double**)mxCalloc(18, sizeof(double*));
// for (i=0; i<18; i++){
// CopyOfMatrix[i] = (double*)mxCalloc(18, sizeof(double));
// }
//
// double *vector;
// double *col;
// int *indx;
// vector = (double *)mxCalloc(18, sizeof(double));
// indx = (int *)mxCalloc(18, sizeof(int));
// col = (double *)mxCalloc(18, sizeof(double));
//
// /*used by CalculateY()*/
// double *Aat;
// Aat = (double*)mxCalloc(18*18, sizeof(double));
// /* for (i=0; i<18; i++){
// Aat[i] = (double*)mxCalloc(18, sizeof(double));
// }*/
//
// double *Aate;
// Aate = (double*)mxCalloc(18*18, sizeof(double));
// /* for (i=0; i<18; i++){
// Aate[i] = (double*)mxCalloc(18, sizeof(double));
// } */
//
// double *sum;
// sum = (double*)mxCalloc(18, sizeof(double));
// double *product;
// product = (double*)mxCalloc(18, sizeof(double));
double W1[voxelPars1->count1]; //Now THIS might require a pointer, since count1 is only passed in the argument struct. We'll see.
double W1x[voxelPars1->count1];
double W1y[voxelPars1->count1];
/* mexPrintf("Declared all local variables in calculate().\n");
mexEvalString("drawnow;");*/
/*Fill local variables with values from input struct voxelPars*/
....计算继续追踪
答案 0 :(得分:0)
我最近遇到过类似的问题。虽然我对matlab内存管理设施没有任何了解,但我可以说,对于我来说,切换到C-native malloc / free给了大概x5的加速。
似乎在你的情况下,也没有必要在Matlab堆上管理内存(使用mxCalloc和类似的东西) - 因为内存分配仅供mex-function内部使用。建议你尝试一下:只需将mxCalloc替换为malloc,mxFree替换为free。