我为灰色图像模糊写了一个盒子模糊代码,但结果比我预期的要慢得多。如何改善其表现?
void boxfilter(int* const srcImg, int* const desImg, const int mask, const int nrows, const int ncols)
{
LL *buffer, *rowImg, *rowPtr;
int *srcPtr, *desPtr;
LL pre, sum;
int row, col, i, len, r;
const int MAX_ROW_COL = nrows > ncols ? nrows : ncols;
const int MASK_SIZE = mask*mask;
LL *headL, *tailL;
int *head, *tail;
r = mask / 2;
rowImg = (LL *)malloc(sizeof(LL)*nrows*ncols);
buffer = (LL *)malloc(sizeof(LL)*(MAX_ROW_COL + mask));
len = nrows + 2 * r;
srcPtr = srcImg;
rowPtr = rowImg;
for (col = 0; col < ncols; ++col)
{
srcPtr = srcImg + col;
head = srcPtr + r*ncols;
tail = srcPtr + (nrows - 2 - r)*ncols;
for (i = 0; i < r; ++i)
{
buffer[i] = *head;
buffer[len - 1 - i] = *tail;
head += ncols;
tail -= ncols;
}
for (i = r; i < len - r; ++i)
{
buffer[i] = *srcPtr;
srcPtr += ncols;
}
sum = buffer[0];
for (i = 1; i < mask; ++i)
{
sum += buffer[i];
}
rowPtr = rowImg + col;
*rowPtr = sum;
pre = sum;
headL = buffer;
tailL = buffer + mask;
for (i = mask; i < len; ++i, ++headL, ++tailL)
{
*rowPtr = pre;
rowPtr += ncols;
pre = pre - *headL + *tailL;
}
}
len = ncols + 2 * r;
rowPtr = rowImg;
desPtr = desImg;
for (row = 0; row < nrows; ++row)
{
headL = rowPtr + r;
tailL = rowPtr + (ncols - 2 - r);
for (i = 0; i < r; ++i)
{
buffer[i] = *headL;
buffer[len - 1 - i] = *tailL;
++headL;
--tailL;
}
for (i = r; i < len - r; ++i)
{
buffer[i] = *rowPtr;
++rowPtr;
}
sum = buffer[0];
for (i = 1; i < mask; ++i)
{
sum += buffer[i];
}
*desPtr = sum/MASK_SIZE;
++desPtr;
pre = sum;
headL = buffer;
tailL = buffer + mask;
for (i = mask; i < len; ++i, ++headL, ++tailL)
{
*desPtr = pre/MASK_SIZE;
++desPtr;
pre = pre - *headL + *tailL;
}
}
free(rowImg);
free(buffer);
return;
}