我的编译器有多聪明?

时间:2012-03-12 13:12:41

标签: c++ compiler-construction

我正在尝试测量计算机执行不同操作所需的估计周期长度,因此我执行相同的操作100K次并计算平均值。 我使用loop unwinding更准确一点:我在每次迭代中执行10次基本操作,并将索引增加10,从而减少循环操作。

对于我的问题,这一切都不重要:编译器是否有任何方法可以理解我正在进行多次相同的操作而只执行一次? 这是我的循环:

for (i=0; i<iterations; i+=LOOP_FACTOR)
{
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
}

另外,我不知道这是否重要 - 我正在使用Eclipse。我认为这可能很重要,因为那里有不同的编译器。

4 个答案:

答案 0 :(得分:16)

在没有优化的GCC中,它按原样编译:

(gdb) disas main
Dump of assembler code for function main:
   0x00000000004004e4 <+0>: push   rbp
   0x00000000004004e5 <+1>: mov    rbp,rsp
   0x00000000004004e8 <+4>: mov    DWORD PTR [rip+0x200482],0x0        # 0x600974 <i>
   0x00000000004004f2 <+14>:    jmp    0x400567 <main+131>
   0x00000000004004f4 <+16>:    mov    DWORD PTR [rip+0x200472],0xffffffff        # 0x600970 <result>
   0x00000000004004fe <+26>:    mov    DWORD PTR [rip+0x200468],0xffffffff        # 0x600970 <result>
   0x0000000000400508 <+36>:    mov    DWORD PTR [rip+0x20045e],0xffffffff        # 0x600970 <result>
   0x0000000000400512 <+46>:    mov    DWORD PTR [rip+0x200454],0xffffffff        # 0x600970 <result>
   0x000000000040051c <+56>:    mov    DWORD PTR [rip+0x20044a],0xffffffff        # 0x600970 <result>
   0x0000000000400526 <+66>:    mov    DWORD PTR [rip+0x200440],0xffffffff        # 0x600970 <result>
   0x0000000000400530 <+76>:    mov    DWORD PTR [rip+0x200436],0xffffffff        # 0x600970 <result>
   0x000000000040053a <+86>:    mov    DWORD PTR [rip+0x20042c],0xffffffff        # 0x600970 <result>
   0x0000000000400544 <+96>:    mov    DWORD PTR [rip+0x200422],0xffffffff        # 0x600970 <result>
   0x000000000040054e <+106>:   mov    DWORD PTR [rip+0x200418],0xffffffff        # 0x600970 <result>
   0x0000000000400558 <+116>:   mov    eax,DWORD PTR [rip+0x200416]        # 0x600974 <i>
   0x000000000040055e <+122>:   add    eax,0x1
   0x0000000000400561 <+125>:   mov    DWORD PTR [rip+0x20040d],eax        # 0x600974 <i>
   0x0000000000400567 <+131>:   mov    eax,DWORD PTR [rip+0x200407]        # 0x600974 <i>
   0x000000000040056d <+137>:   cmp    eax,0x3e7
   0x0000000000400572 <+142>:   jle    0x4004f4 <main+16>
   0x0000000000400574 <+144>:   mov    eax,DWORD PTR [rip+0x2003f6]        # 0x600970 <result>
   0x000000000040057a <+150>:   mov    esi,eax
   0x000000000040057c <+152>:   mov    edi,0x40067c
   0x0000000000400581 <+157>:   mov    eax,0x0
   0x0000000000400586 <+162>:   call   0x4003e0 <printf@plt>
   0x000000000040058b <+167>:   pop    rbp
   0x000000000040058c <+168>:   ret

但如果你使用基本优化(gcc -O)运行,那么它会缩短为一次写入:

Dump of assembler code for function main:
   0x00000000004004e4 <+0>: sub    rsp,0x8
   0x00000000004004e8 <+4>: mov    eax,0x3e8
   0x00000000004004ed <+9>: sub    eax,0x1
   0x00000000004004f0 <+12>:    jne    0x4004ed <main+9>
   0x00000000004004f2 <+14>:    mov    DWORD PTR [rip+0x2003fc],0xffffffff        # 0x6008f8 <result>
   0x00000000004004fc <+24>:    mov    DWORD PTR [rip+0x2003f6],0x3e8        # 0x6008fc <i>
   0x0000000000400506 <+34>:    mov    esi,0xffffffff
   0x000000000040050b <+39>:    mov    edi,0x40060c
   0x0000000000400510 <+44>:    mov    eax,0x0
   0x0000000000400515 <+49>:    call   0x4003e0 <printf@plt>
   0x000000000040051a <+54>:    add    rsp,0x8
   0x000000000040051e <+58>:    ret  

我的测试代码是:

#define TIMES 1000

int result, i;

int main() {
    for (i=0; i<TIMES; i++)
    {
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
    }
    printf("%d", result);
}

答案 1 :(得分:6)

它可能会优化该代码。如果要分析-1,那么应该使用-O0运行。您可能还应生成一些没有循环的代码来分析单个指令。

答案 2 :(得分:5)

在没有优化的情况下分析代码没有太大意义。

相反,我建议将result声明为volatile

就像现在一样,您的代码可能会优化为:

result = -1;

<强>结果

使用完全优化编译的两个代码:

volatile int result = 10000;

00401000  mov         ecx,3E8h 
00401005  or          eax,0FFFFFFFFh 
00401008  jmp         wmain+10h (401010h) 
0040100A  lea         ebx,[ebx] 
00401010  sub         ecx,1 
    {
        result = -1;
00401013  mov         dword ptr [result (40301Ch)],eax 
        result = -1;
00401018  mov         dword ptr [result (40301Ch)],eax 
        result = -1;
0040101D  mov         dword ptr [result (40301Ch)],eax 
        result = -1;
00401022  mov         dword ptr [result (40301Ch)],eax 
        result = -1;
00401027  mov         dword ptr [result (40301Ch)],eax 
        result = -1;
0040102C  mov         dword ptr [result (40301Ch)],eax 
        result = -1;
00401031  mov         dword ptr [result (40301Ch)],eax 
        result = -1;
00401036  mov         dword ptr [result (40301Ch)],eax 
        result = -1;
0040103B  mov         dword ptr [result (40301Ch)],eax 
        result = -1;
00401040  mov         dword ptr [result (40301Ch)],eax 
00401045  jne         wmain+10h (401010h) 
    }
    cout << result;
00401047  mov         eax,dword ptr [result (40301Ch)] 
0040104C  mov         ecx,dword ptr [__imp_std::cout (402038h)] 
00401052  push        eax  
00401053  call        dword ptr [__imp_std::basic_ostream<char,std::char_traits<char> >::operator<< (40203Ch)] 

int result = 10000;

    for (int i=0; i< 1000 ; i += 1)
    {
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
        result = -1;
    }
    cout << result;
00401000  mov         ecx,dword ptr [__imp_std::cout (402038h)] 
00401006  push        0FFFFFFFFh 
00401008  mov         dword ptr [result (40301Ch)],0FFFFFFFFh 
00401012  call        dword ptr [__imp_std::basic_ostream<char,std::char_traits<char> >::operator<< (40203Ch)] 

答案 3 :(得分:2)

这取决于编译器的优化级别。因此,它可以通过以下几种方式进行优化:

  • 重复将被折叠成1个表达式(通过消除死/冗余分配)
  • 循环本身将被折叠,因为它在执行1次迭代或执行1000次迭代之间没有任何变化,因此它可以折叠到内部表达式的单个实例。

如果result成为volatile,这会变得有点棘手,这会“阻止”编译器假设它的值不会在循环或表达式序列之外发生变化,甚至可能就足够了防止10个内部陈述崩溃为1。

测试此方法的最佳方法是使用objdump或调试器等方法检查编译器的输出。