我正在尝试对C程序进行一些优化。如果可能,我希望GCC自动向量化。要检查正在执行的操作,我使用“-S”选项生成程序的汇编文件,但只要优化级别大于0,GCC就会输出一个几乎为空的.s文件。我正在使用gcc-7,但我尝试使用旧版本,它也做了同样的事情。
C代码:
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#define HEIGHT 2000
#define WIDTH 1000
static unsigned int idx(unsigned int x, unsigned int y, unsigned int stride){
return y * stride + x;
}
static void integral_image(const unsigned char * restrict in, unsigned int * restrict out){
unsigned int row_sum = 0;
unsigned char *newin = __builtin_assume_aligned(in, 16);
unsigned char *newout = __builtin_assume_aligned(out, 16);
for(unsigned int x = 0; x < WIDTH; ++x){
row_sum += newin[x];
newout[x] = row_sum;
}
for(unsigned int y = 1; y < HEIGHT; ++y){
row_sum = 0;
for(unsigned w = 0; w < WIDTH; w += 4){
const unsigned int i1 = idx(w, y, WIDTH);
const unsigned int old1 = idx(w, y - 1, WIDTH);
const unsigned int i2 = idx(w + 1, y, WIDTH);
const unsigned int old2 = idx(w, y - 1, WIDTH);
const unsigned int i3 = idx(w + 2, y, WIDTH);
const unsigned int old3 = idx(w + 2, y - 1, WIDTH);
const unsigned int i4 = idx(w + 3, y, WIDTH);
const unsigned int old4 = idx(w + 3, y - 1, WIDTH);
row_sum += newin[i1];
newout[i1] = row_sum + newout[old1];
row_sum += newin[i2];
newout[i2] = row_sum + newout[old2];
row_sum += newin[i3];
newout[i3] = row_sum + newout[old3];
row_sum += newin[i4];
newout[i4] = row_sum + newout[old4];
}
}
}
现在GCC生成了.s文件:
.file "thrash.c"
.ident "GCC: (Ubuntu 7.1.0-10ubuntu1~16.04.york0) 7.1.0"
.section .note.GNU-stack,"",@progbits
谢谢!