我正在尝试加速一段总共运行150,000,000次的代码。
我使用“Very Sleepy”对其进行了分析,表明代码花费的时间最多的是这3个区域,如图所示:
代码如下:
double nonLocalAtPixel(int ymax, int xmax, int y, int x , vector<nodeStructure> &nodeMST, int squareDimension, Mat &inputImage) {
vector<double> nodeWeights(8,0);
vector<double> nodeIntensities(8,0);
bool allZeroWeights = true;
int numberEitherside = (squareDimension - 1) / 2;
int index = 0;
for (int j = y - numberEitherside; j < y + numberEitherside + 1; j++) {
for (int i = x - numberEitherside; i < x + numberEitherside + 1; i++) {
// out of range or the centre pixel
if (j<0 || i<0 || j>ymax || i>xmax || (j == y && i == x)) {
index++;
continue;
}
else {
int centreNodeIndex = y*(xmax+1) + x;
int thisNodeIndex = j*(xmax+1) + i;
// add to intensity list
Scalar pixelIntensityScalar = inputImage.at<uchar>(j, i);
nodeIntensities[index] = ((double)*pixelIntensityScalar.val);
// find weight from p to q
float weight = findWeight(nodeMST, thisNodeIndex, centreNodeIndex);
if (weight!=0 && allZeroWeights) {
allZeroWeights = false;
}
nodeWeights[index] = (weight);
index++;
}
}
}
// find min b
int minb = -1;
int bCost = -1;
if (allZeroWeights) {
return 0;
}
else {
// iteratate all b values
for (int i = 0; i < nodeWeights.size(); i++) {
if (nodeWeights[i]==0) {
continue;
}
double thisbCost = nonLocalWithb(nodeIntensities[i], nodeIntensities, nodeWeights);
if (bCost<0 || thisbCost<bCost) {
bCost = thisbCost;
minb = nodeIntensities[i];
}
}
}
return minb;
}
首先,我假设Very Sleepy指示的花费时间意味着花费大部分时间来分配矢量并删除矢量?
其次,有没有建议加快这段代码?
由于
答案 0 :(得分:5)