我试图优化平滑函数,给定图像通过用图像周围的像素的平均值替换图像中的每个像素来平滑/模糊边缘(图像的尺寸是矩阵)。要优化的代码如下:
int i, j, ii, jj;
pixel_sum ps;
for (j = 0; j < dim; j++){
for (i = 0; i < dim; i++){
initialize_pixel_sum(&ps);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++){
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++){
accumulate_sum(&ps, src[RIDX(ii,jj,dim)]);
}
}
dst[RIDX(i,j,dim)].red = ps.red/ps.num;
dst[RIDX(i,j,dim)].green = ps.green/ps.num;
dst[RIDX(i,j,dim)].blue = ps.blue/ps.num;
}
}
我找到了一个优化版本,如下所示:
int i, j, myJ;
//cornors
dst[0].red = (src[0].red+src[1].red+src[dim].red+src[dim+1].red)>>2;
dst[0].blue = (src[0].blue+src[1].blue+src[dim].blue+src[dim+1].blue)>>2;
dst[0].green = (src[0].green+src[1].green+src[dim].green+src[dim+1].green)>>2;
i = dim*2-1;
dst[dim-1].red = (src[dim-2].red+src[dim-1].red+src[i-1].red+src[i].red)>>2;
dst[dim-1].blue = (src[dim-2].blue+src[dim-1].blue+src[i-1].blue+src[i].blue)>>2;
dst[dim-1].green = (src[dim-2].green+src[dim-1].green+src[i-1].green+src[i].green)>>2;
j = dim*(dim-1);
i = dim*(dim-2);
dst[j].red = (src[j].red+src[j + 1].red+src[i].red+src[i + 1].red)>>2;
dst[j].blue = (src[j].blue+src[j + 1].blue+src[i].blue+src[i + 1].blue)>>2;
dst[j].green = (src[j].green+src[j + 1].green+src[i].green+src[i + 1].green)>>2;
j = dim*dim-1;
i = dim*(dim-1)-1;
dst[j].red = (src[j - 1].red+src[j].red+src[i - 1].red+src[i].red)>>2;
dst[j].blue = (src[j - 1].blue+src[j].blue+src[i - 1].blue+src[i].blue)>>2;
dst[j].green = (src[j - 1].green+src[j].green+src[i - 1].green+src[i].green)>>2;
//sides
i = dim - 1;
for (j = 1; j < i; j++)
{
dst[j].red = (src[j].red+src[j-1].red+src[j+1].red+src[j+dim].red+src[j+1+dim].red+src[j-1+dim].red)/6;
dst[j].green = (src[j].green+src[j-1].green+src[j+1].green+src[j+dim].green+src[j+1+dim].green+src[j-1+dim].green)/6;
dst[j].blue = (src[j].blue+src[j-1].blue+src[j+1].blue+src[j+dim].blue+src[j+1+dim].blue+src[j-1+dim].blue)/6;
}
i = dim*dim-1;
for (j = i - dim + 2; j < i; j++)
{
dst[j].red = (src[j].red+src[j-1].red+src[j+1].red+src[j-dim].red+src[j+1-dim].red+src[j-1-dim].red)/6;
dst[j].green = (src[j].green+src[j-1].green+src[j+1].green+src[j-dim].green+src[j+1-dim].green+src[j-1-dim].green)/6;
dst[j].blue = (src[j].blue+src[j-1].blue+src[j+1].blue+src[j-dim].blue+src[j+1-dim].blue+src[j-1-dim].blue)/6;
}
for (j = dim+dim-1; j < dim*dim-1; j+=dim)
{
dst[j].red = (src[j].red+src[j-1].red+src[j-dim].red+src[j+dim].red+src[j-dim-1].red+src[j-1+dim].red)/6;
dst[j].green = (src[j].green+src[j-1].green+src[j-dim].green+src[j+dim].green+src[j-dim-1].green+src[j-1+dim].green)/6;
dst[j].blue = (src[j].blue+src[j-1].blue+src[j-dim].blue+src[j+dim].blue+src[j-dim-1].blue+src[j-1+dim].blue)/6;
}
i = i - (dim - 1);
for (j = dim; j < i; j+=dim)
{
dst[j].red = (src[j].red+src[j-dim].red+src[j+1].red+src[j+dim].red+src[j+1+dim].red+src[j-dim+1].red)/6;
dst[j].green = (src[j].green+src[j-dim].green+src[j+1].green+src[j+dim].green+src[j+1+dim].green+src[j-dim+1].green)/6;
dst[j].blue = (src[j].blue+src[j-dim].blue+src[j+1].blue+src[j+dim].blue+src[j+1+dim].blue+src[j-dim+1].blue)/6;
}
myJ = dim;
for (i = 1; i < dim-1; i++)
{
for (j = 1; j < dim-1; j++)
{
myJ ++;
dst[myJ].red = (src[myJ-1].red+src[myJ].red+src[myJ+1].red+src[myJ-dim-1].red+src[myJ-dim].red+src[myJ-dim+1].red+src[myJ+dim-1].red+src[myJ+dim].red+src[myJ+dim+1].red)/9;
dst[myJ].green = (src[myJ-1].green+src[myJ].green+src[myJ+1].green+src[myJ-dim-1].green+src[myJ-dim].green+src[myJ-dim+1].green+src[myJ+dim-1].green+src[myJ+dim].green+src[myJ+dim+1].green)/9;
dst[myJ].blue = (src[myJ-1].blue+src[myJ].blue+src[myJ+1].blue+src[myJ-dim-1].blue+src[myJ-dim].blue+src[myJ-dim+1].blue+src[myJ+dim-1].blue+src[myJ+dim].blue+src[myJ+dim+1].blue)/9;
}
myJ += 2;
}
有人可以解释这种优化是如何运作的吗?
答案 0 :(得分:1)
由于您没有显示许多内容的定义(accumulate_sum
,pixel_sum
,RIDX
等),因此很难确定。似乎代码的第一个版本按行和列迭代数据,第二个版本首先处理角点,然后处理边缘,然后处理其余数据。您根据周围的像素处理像素。角和边缘具有较少的相邻像素,因此处理它们更容易一些。打破这些特殊情况可以简化它们的代码。第二个版本然后unrolls处理其余数据时最里面的两个循环,这是唯一可能的,因为你已经消除了所有特殊情况(边和角)。
此更改是否实际上是&#34;优化&#34;留给读者的练习。您需要在两个版本上运行性能测试才能确定。即使第二个版本的效率更高,第一个版本的可读性也更高。