我正在进行优化C代码的任务;教授暗示代码运动应该是解决这一特定问题的主要工具。
这是最初未经优化的代码:
void naive_smooth(int dim, pixel *src, pixel *dst) {
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}
typedef struct {
unsigned short red;
unsigned short green;
unsigned short blue;
} pixel;
#define RIDX(i,j,n) ((i)*(n)+(j))
static pixel avg(int dim, int i, int j, pixel *src) {
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_pixel_sum(&sum);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
assign_sum_to_pixel(¤t_pixel, sum);
return current_pixel;
}
static void initialize_pixel_sum(pixel_sum *sum) {
sum->red = sum->green = sum->blue = 0;
sum->num = 0;
return;
}
static void accumulate_sum(pixel_sum *sum, pixel p) {
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
sum->num++;
return;
}
static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum) {
current_pixel->red = (unsigned short) (sum.red/sum.num);
current_pixel->green = (unsigned short) (sum.green/sum.num);
current_pixel->blue = (unsigned short) (sum.blue/sum.num);
return;
}
这是我到目前为止所获得的优化版本:
void smooth(int dim, pixel *src, pixel *dst) {
int i, j;
int ii, jj;
pixel_sum sum;
pixel current_pixel;
pixel p;
int localDim = dim;
for (i = 0; i < localDim; i++) {
int dimi = localDim * i;
int mini = min(i+1, localDim-1);
int maxi = max(i-1, 0);
for (j = 0; j < localDim; j++) {
int minj = min(j+1, localDim-1);
int maxj = max(j-1, 0);
sum.red = sum.green = sum.blue = sum.num = 0;
for(ii = maxi; ii <= mini; ii++) {
int dimii = localDim * ii;
for(jj = maxj; jj <= minj; jj++) {
p = src[dimii + jj];
sum.red += (int) p.red;
sum.green += (int) p.green;
sum.blue += (int) p.blue;
sum.num++;
}
}
current_pixel.red = (unsigned short) (sum.red/sum.num);
current_pixel.green = (unsigned short) (sum.green/sum.num);
current_pixel.blue = (unsigned short) (sum.blue/sum.num);
dst[dimi + j] = current_pixel;
}
}
}
到目前为止我对此所做的更改是我认为应该是代码运动优化原则的所有内容 - 我已将方法调用(avg,initialize_pixel_sum等)转换为本地代码,创建了本地版本全局变量(dim到localDim),并在循环外部的循环定义中移动函数(min和max函数以及dim / localDim上的乘法)。但是,虽然分配附带的测试套件表明它产生的结果与原始代码相同,但它显示CPE绝对没有变化...所以我重写了代码而没有引起任何优化效果。 / p>
我真的很困惑为什么这对任何事都没有影响,并且非常感谢任何关于什么类型的代码动作会导致这组函数的CPE发生变化的见解。
谢谢!