我正在尝试使用omp来加速我的mexfunction。但是,它每次都会崩溃Matlab。我很抱歉长代码。但是,我不知道哪里出错,所以我必须发布所有这些错误。代码运行良好,不使用omp。
#include <math.h>
#include <vector>
#include <algorithm>
#include "mex.h"
#include <omp.h>
using namespace std;
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
/* Macros for the ouput and input arguments */
#define T1_OUT plhs[0]
#define T2_OUT plhs[1]
#define point_IN prhs[0]
#define PlaneLocation_IN prhs[1]
#define SearchRadius_IN prhs[2]
#define NeighborIndex_IN prhs[3]
#define D_IN prhs[4]
#define pi 3.14159265358979
double *point,*PL,R,*NI,*T1,*T2,*tempPr,*DP2P;
int i,j,k;
int NumOfPoints;
int NumOfNeighbors;
double vector2[3];
double vector1[3];
double cross[3];
double V;
vector<int> idIN;
vector<double> GPStime;
vector<double> LineInBallLength;
// define input pointers.
point = mxGetPr(point_IN);
PL = mxGetPr(PlaneLocation_IN);
R = mxGetScalar(SearchRadius_IN);
NI = mxGetPr(NeighborIndex_IN);
DP2P = mxGetPr(D_IN);
V = 4.0/3.0*pi*R*R*R;
NumOfPoints = mxGetM(point_IN);
NumOfNeighbors = mxGetN(NeighborIndex_IN);
T1_OUT = mxCreateDoubleMatrix(NumOfPoints,1,mxREAL);
T2_OUT = mxCreateDoubleMatrix(NumOfPoints,1,mxREAL);
T1 = mxGetPr(T1_OUT);
T2 = mxGetPr(T2_OUT);
double Dist;
double Coef;
double Noise;
#pragma omp parallel
{
#pragma omp for //if I comment this line and the two lines above, the code works OK.
for(i=0;i<NumOfPoints;i++){
double pointX = point[i+ NumOfPoints];
double pointY = point[i + NumOfPoints *2];
double pointZ = point[i + NumOfPoints * 3];
idIN.clear();
GPStime.clear();
for(j=0;j<NumOfPoints;j++){
vector2[0] = pointX - point[j+ NumOfPoints];
vector2[1] = pointY - point[j + NumOfPoints * 2];
vector2[2] = pointZ - point[j + NumOfPoints * 3];
vector1[0] = pointX - PL[j];
vector1[1] = pointY - PL[j + NumOfPoints];
vector1[2] = pointZ - PL[j + NumOfPoints * 2];
cross[0] = vector2[1]*vector1[2] - vector2[2]*vector1[1];
cross[1] = vector2[2]*vector1[0] - vector2[0]*vector1[2];
cross[2] = vector2[0]*vector1[1] - vector2[1]*vector1[0];
Dist = sqrt(cross[0]*cross[0] + cross[1]*cross[1] + cross[2]*cross[2]) / DP2P[j];
if (Dist <=R){
idIN.push_back(j);
GPStime.push_back(point[j]);
LineInBallLength.push_back(2*sqrt(R*R - Dist*Dist));
}
}
// Calculate Coef
sort(GPStime.begin(),GPStime.end());
GPStime.erase(unique(GPStime.begin(),GPStime.end()),GPStime.end());
Coef = double(GPStime.size()) * 100 / double(idIN.size());
Noise = 0;
for(k=0;k<idIN.size();k++){
Noise = Noise + LineInBallLength[k]*point[4*NumOfPoints+idIN[k]] / V * Coef;
}
T1[i] = Noise;
}
}
return;
}
此外,我尝试使用omp的另一个简短示例,它的工作原理。由于我的计算机有四个核心,计算时间大约是原始时间的四分之一。任何人都可以帮助我,谢谢。 顺便说一句,我是编写C代码的新手,所以关于C代码的任何其他建议都值得赞赏。
更新: 我尝试修改omp部分如下
#pragma omp parallel for shared(point, DP2P, R, PL) private(pointX,pointY,pointZ,idIN,GPStime,vector1,vector2,cross,Dist, LineInBallLength,Noise)
现在,matlab不会崩溃并且可以获得结果。但是,这个结果与没有omp的结果不同。对于大多数结果值,它们接近正确的结果(没有omp的结果),其中一些与正确的结果非常不同。