Question

当我传递-O3选项以使用gcc编译C代码时，与没有优化编译时相比，它通常会将运行时间缩短10~30％左右。今天我发现我的程序的一个运行时间显着减少，约为1/10，使用-O3选项。没有优化，大约需要7秒钟才能完成。但是-O3选项在0.7秒内运行！我从未见过如此惊人的减少时间。

所以我想知道什么类型的程序模式更有可能从gcc优化选项中受益，或者在编程中有一些方法可以更加可行地完成优化。

下面是1/10代码。这是一个简单的程序，通过使用车轮分解算法计算所有质数小于和宏常数MAXX的总和。

#include <stdio.h>
#include <math.h>
#include <inttypes.h>
#include <time.h>

#define MAXX 5000000
#define PBLEN 92160
#define PBMAX 510510

int main(){
    clock_t startT, endT;
    startT = clock();
    int pArr[7] = {2, 3, 5, 7, 11, 13, 17};
    int pBase[PBLEN];
    pBase[0] = 1;
    int i, j, k, index = 1;
    for (i = 19; i <= PBMAX; ++i){
        for (j = 0; j < 7; ++j){
            if (i % pArr[j] == 0){
                goto next1;
            }
        }
        pBase[index] = i;
        ++index;
        next1:;
    }
    uint64_t sum = 2 + 3 + 5 + 7 + 11 + 13 + 17;
    for (i = 1; i < PBLEN; ++i){
        for (j = 0; j < 7; ++j){
            if (pArr[j] <= (int)sqrt((double)pBase[i]) + 1){
                if (pBase[i] % pArr[j] == 0){
                    goto next2;
                }
            }
            else{
                sum += pBase[i];
                goto next2;
            }
        }
        for (j = 1; j < PBLEN; ++j){
            if (pBase[j] <= (int)sqrt((double)pBase[i]) + 1){
                if (pBase[i] % pBase[j] == 0){
                    goto next2;
                }
            }
            else{
                sum += pBase[i];
                goto next2;
            }
        }
        next2:;
    }
    int temp, temp2;
    for (i = PBMAX; ; i += PBMAX){
        for (j = 0; j < PBLEN; ++j){
            temp = i + pBase[j];
            if (temp > MAXX){
                endT = clock();
                printf("%"PRIu64"\n\n", sum);
                printf("%.3f\n", (double)(endT - startT) / (double)CLOCKS_PER_SEC);
                return 0;
            }
            for (k = 0; k < 7; ++k){
                if (temp % pArr[k] == 0){
                    goto next3;
                }
            }
            for (k = 1; k < PBLEN; ++k){
                if (pBase[k] <= (int)sqrt((double)temp) + 1){
                    if (temp % pBase[k] == 0){
                        goto next3;
                    }
                }
                else{
                    sum += temp;
                    break;
                }
            }
            next3:;
        }
    }
}

Answer 1

我想知道如何通过查看代码来实现这一点。我猜测到目前为止最长的东西是sqrt（）。你有一个紧密的循环，你在一个不变的值上运行sqrt（）。 gcc可能决定进行一次调用，保存返回并重用它。相同的结果，远远少于对sqrt（）的调用，因此运行时间明显加快。如果将调用sqrt（）移出紧密循环并少运行它们，您可能会手动看到相同的结果。尽管如此，运行一个分析器。

所以，简短的回答 - 有时候gcc可以修复你自己可以解决的重大问题，但前提是它们的范围很小。通常你需要坐在探查器旁边，看看你真正占用了你的时间以及如何解决这个问题。

什么时候gcc优化最有效？

1 个答案: