我要做的是采用这段代码:
char naive_smooth_descr[] = "naive_smooth: Naive baseline implementation";
void naive_smooth(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}
并将函数调用avg(dim, i, j, src);
替换为页面底部的实际代码。然后,获取该代码并将该代码中的所有函数调用替换为实际代码等。
如果你问为什么要做这一切,原因很简单:当你摆脱函数调用时,程序运行得更快,并且当上面的代码运行时,我试图获得每个元素最快的周期所有函数调用并将其替换为实际代码。
现在我真的只是在做这件事时遇到了很多麻烦。我是否使用括号代码然后复制并粘贴?我是否遗漏了括号?我是否包含代码的开头,例如static pixel avg(int dim, int i, int j, pixel *src)
然后是括号,然后是代码来替换函数调用?
我将在这里粘贴所有代码:
/* A struct used to compute averaged pixel value */
typedef struct {
int red;
int green;
int blue;
int num;
} pixel_sum;
/* Compute min and max of two integers, respectively */
static int min(int a, int b) { return (a < b ? a : b); }
static int max(int a, int b) { return (a > b ? a : b); }
/*
* initialize_ pixel_ sum - Initializes all fields of sum to 0
*/
static void initialize_ pixel_ sum (pixel_sum *sum)
{
sum->red = sum->green = sum->blue = 0;
sum->num = 0;
return;
}
/*
* accumulate_sum - Accumulates field values of p in corresponding
* fields of sum
*/
static void accumulate_ sum (pixel_sum *sum, pixel p)
{
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
sum->num++;
return;
}
/*
* assign_ sum_ to_ pixel - Computes averaged pixel value in current_pixel
*/
static void assign_ sum_ to_ pixel (pixel *current_ pixel, pixel_ sum sum)
{
current_pixel->red = (unsigned short) (sum.red/sum.num);
current_pixel->green = (unsigned short) (sum.green/sum.num);
current_pixel->blue = (unsigned short) (sum.blue/sum.num);
return;
}
/*
* avg - Returns averaged pixel value at (i,j)
*/
这是我想用[:}复制函数调用avg(dim, i, j, src);
的代码
static pixel avg (int dim, int i, int j, pixel *src)
{
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_pixel_sum(&sum);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
assign_sum_to_pixel(¤t_pixel, sum);
return current_pixel;
}
/*
* mysmooth - my smooth
*/
char mysmooth_ descr[] = "my smooth: My smooth";
void mysmooth (int dim, pixel *src, pixel *dst)
{
int i, j;
int ii, jj;
pixel_sum sum;
pixel current_pixel;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
{
initialize_pixel_sum(&sum);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
assign_sum_to_pixel(¤t_pixel, sum);
dst[RIDX(i, j, dim)] = current_pixel;
}
这就是我从avg()获取代码并用函数替换后我的代码应该是什么样的?
答案 0 :(得分:8)
如果你的代码库很小,包括类似10-12个函数,你可能想尝试在每个函数前面加上关键字inline
。
第二个选项,使用内联所有函数调用的编译器选项,不要手动执行(这就是编译器存在的原因)。你用的是什么编译器?您可以在线查看其内联所有函数调用的选项(如果有的话)。
第三,如果您使用GCC编译代码,则可以为函数指定always_inline
属性。以下是如何使用它:
static pixel avg (int dim, int i, int j, pixel *src) __attribute__((always_inline));
答案 1 :(得分:4)
inline
关键字。但是,只有当编译器认为调用更有效时,才能保证调用将被实际代码替换。avg()
必须是一个宏。然后保证将函数“call”替换为实际代码。答案 2 :(得分:2)
答案 3 :(得分:2)
我不得不说我同意确保你使用编译器优化和内联的方法......但是如果你仍然想要回答你的具体问题,我认为你得到的是:
for (j = 0; j < dim; j++)
{
/* ...avg() code body except for the return... */
dst[RIDX(i, j, dim)] = current_pixel;
}
答案 4 :(得分:0)
我展开了循环的开始和结束,以消除代码中的min()和max():
void smooth_B(int dim, struct pixel src[dim][dim], struct pixel dst[dim][dim]){
dst[0][0].red =(src[0][0].red +src[1][0].red +src[0][1].red +src[1][1].red )/4;
dst[0][0].green=(src[0][0].green+src[1][0].green+src[0][1].green+src[1][1].green)/4;
dst[0][0].blue =(src[0][0].blue +src[1][0].blue +src[0][1].blue +src[1][1].blue )/4;
for( int j=1; j<dim-1; j++){
dst[0][j].red =(src[0][j-1].red +src[1][j-1].red +src[0][j].red +src[1][j].red +src[0][j+1].red +src[1][j+1].red )/6;
dst[0][j].green=(src[0][j-1].green+src[1][j-1].green+src[0][j].green+src[1][j].green+src[0][j+1].green+src[1][j+1].green)/6;
dst[0][j].blue =(src[0][j-1].blue +src[1][j-1].blue +src[0][j].blue +src[1][j].blue +src[0][j+1].blue +src[1][j+1].blue )/6;
}
dst[0][dim-1].red =(src[0][dim-2].red +src[1][dim-2].red +src[0][dim-1].red +src[1][dim-1].red )/4;
dst[0][dim-1].green=(src[0][dim-2].green+src[1][dim-2].green+src[0][dim-1].green+src[1][dim-1].green)/4;
dst[0][dim-1].blue =(src[0][dim-2].blue +src[1][dim-2].blue +src[0][dim-1].blue +src[1][dim-1].blue )/4;
for( int i=1; i<dim-1; i++){
dst[i][0].red =(src[i-1][0].red +src[i-1][1].red +src[i][0].red +src[i][1].red +src[i+1][0].red +src[i+1][1].red )/6;
dst[i][0].green=(src[i-1][0].green+src[i-1][1].green+src[i][0].green+src[i][1].green+src[i+1][0].green+src[i+1][1].green)/6;
dst[i][0].blue =(src[i-1][0].blue +src[i-1][1].blue +src[i][0].blue +src[i][1].blue +src[i+1][0].blue +src[i+1][1].blue )/6;
for( int j=1; j<dim; j++){
dst[i][j].red =(src[i-1][j-1].red +src[i][j-1].red +src[i+1][j-1].red +src[i-1][j].red +src[i][j].red +src[i+1][j].red +src[i-1][j+1].red +src[i][j+1].red +src[i+1][j+1].red )/9;
dst[i][j].green=(src[i-1][j-1].green+src[i][j-1].green+src[i+1][j-1].green+src[i-1][j].green+src[i][j].green+src[i+1][j].green+src[i-1][j+1].green+src[i][j+1].green+src[i+1][j+1].green)/9;
dst[i][j].blue =(src[i-1][j-1].blue +src[i][j-1].blue +src[i+1][j-1].blue +src[i-1][j].blue +src[i][j].blue +src[i+1][j].blue +src[i-1][j+1].blue +src[i][j+1].blue +src[i+1][j+1].blue )/9;
}
dst[i][dim-1].red =(src[i-1][dim-2].red +src[i][dim-2].red +src[i+1][dim-2].red +src[i-1][dim-1].red +src[i][dim-1].red +src[i+1][dim-1].red )/6;
dst[i][dim-1].green=(src[i-1][dim-2].green+src[i][dim-2].green+src[i+1][dim-2].green+src[i-1][dim-1].green+src[i][dim-1].green+src[i+1][dim-1].green)/6;
dst[i][dim-1].blue =(src[i-1][dim-2].blue +src[i][dim-2].blue +src[i+1][dim-2].blue +src[i-1][dim-1].blue +src[i][dim-1].blue +src[i+1][dim-1].blue )/6;
}
dst[dim-1][0].red =(src[dim-2][0].red +src[dim-2][1].red +src[dim-1][0].red +src[dim-1][1].red )/4;
dst[dim-1][0].green=(src[dim-2][0].green+src[dim-2][1].green+src[dim-1][0].green+src[dim-1][1].green)/4;
dst[dim-1][0].blue =(src[dim-2][0].blue +src[dim-2][1].blue +src[dim-1][0].blue +src[dim-1][1].blue )/4;
for( int j=1; j<dim; j++){
dst[dim-1][j].red =(src[dim-2][j-1].red +src[dim-1][j-1].red +src[dim-2][j].red +src[dim-1][j].red +src[dim-2][j+1].red +src[dim-1][j+1].red )/6;
dst[dim-1][j].green=(src[dim-2][j-1].green+src[dim-1][j-1].green+src[dim-2][j].green+src[dim-1][j].green+src[dim-2][j+1].green+src[dim-1][j+1].green)/6;
dst[dim-1][j].blue =(src[dim-2][j-1].blue +src[dim-1][j-1].blue +src[dim-2][j].blue +src[dim-1][j].blue +src[dim-2][j+1].blue +src[dim-1][j+1].blue )/6;
}
dst[dim-1][dim-1].red =(src[dim-2][dim-2].red +src[dim-1][dim-2].red +src[dim-2][dim-1].red +src[dim-1][dim-1].red )/4;
dst[dim-1][dim-1].green=(src[dim-2][dim-2].green+src[dim-1][dim-2].green+src[dim-2][dim-1].green+src[dim-1][dim-1].green)/4;
dst[dim-1][dim-1].blue =(src[dim-2][dim-2].blue +src[dim-1][dim-2].blue +src[dim-2][dim-1].blue +src[dim-1][dim-1].blue )/4;
}
据我测量,它比原始代码快50%左右。下一步是消除重复计算。