我正在尝试测量计算机执行不同操作所需的估计周期长度,因此我执行相同的操作100K次并计算平均值。 我使用loop unwinding更准确一点:我在每次迭代中执行10次基本操作,并将索引增加10,从而减少循环操作。
对于我的问题,这一切都不重要:编译器是否有任何方法可以理解我正在进行多次相同的操作而只执行一次? 这是我的循环:
for (i=0; i<iterations; i+=LOOP_FACTOR)
{
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
}
另外,我不知道这是否重要 - 我正在使用Eclipse。我认为这可能很重要,因为那里有不同的编译器。
答案 0 :(得分:16)
在没有优化的GCC中,它按原样编译:
(gdb) disas main
Dump of assembler code for function main:
0x00000000004004e4 <+0>: push rbp
0x00000000004004e5 <+1>: mov rbp,rsp
0x00000000004004e8 <+4>: mov DWORD PTR [rip+0x200482],0x0 # 0x600974 <i>
0x00000000004004f2 <+14>: jmp 0x400567 <main+131>
0x00000000004004f4 <+16>: mov DWORD PTR [rip+0x200472],0xffffffff # 0x600970 <result>
0x00000000004004fe <+26>: mov DWORD PTR [rip+0x200468],0xffffffff # 0x600970 <result>
0x0000000000400508 <+36>: mov DWORD PTR [rip+0x20045e],0xffffffff # 0x600970 <result>
0x0000000000400512 <+46>: mov DWORD PTR [rip+0x200454],0xffffffff # 0x600970 <result>
0x000000000040051c <+56>: mov DWORD PTR [rip+0x20044a],0xffffffff # 0x600970 <result>
0x0000000000400526 <+66>: mov DWORD PTR [rip+0x200440],0xffffffff # 0x600970 <result>
0x0000000000400530 <+76>: mov DWORD PTR [rip+0x200436],0xffffffff # 0x600970 <result>
0x000000000040053a <+86>: mov DWORD PTR [rip+0x20042c],0xffffffff # 0x600970 <result>
0x0000000000400544 <+96>: mov DWORD PTR [rip+0x200422],0xffffffff # 0x600970 <result>
0x000000000040054e <+106>: mov DWORD PTR [rip+0x200418],0xffffffff # 0x600970 <result>
0x0000000000400558 <+116>: mov eax,DWORD PTR [rip+0x200416] # 0x600974 <i>
0x000000000040055e <+122>: add eax,0x1
0x0000000000400561 <+125>: mov DWORD PTR [rip+0x20040d],eax # 0x600974 <i>
0x0000000000400567 <+131>: mov eax,DWORD PTR [rip+0x200407] # 0x600974 <i>
0x000000000040056d <+137>: cmp eax,0x3e7
0x0000000000400572 <+142>: jle 0x4004f4 <main+16>
0x0000000000400574 <+144>: mov eax,DWORD PTR [rip+0x2003f6] # 0x600970 <result>
0x000000000040057a <+150>: mov esi,eax
0x000000000040057c <+152>: mov edi,0x40067c
0x0000000000400581 <+157>: mov eax,0x0
0x0000000000400586 <+162>: call 0x4003e0 <printf@plt>
0x000000000040058b <+167>: pop rbp
0x000000000040058c <+168>: ret
但如果你使用基本优化(gcc -O
)运行,那么它会缩短为一次写入:
Dump of assembler code for function main:
0x00000000004004e4 <+0>: sub rsp,0x8
0x00000000004004e8 <+4>: mov eax,0x3e8
0x00000000004004ed <+9>: sub eax,0x1
0x00000000004004f0 <+12>: jne 0x4004ed <main+9>
0x00000000004004f2 <+14>: mov DWORD PTR [rip+0x2003fc],0xffffffff # 0x6008f8 <result>
0x00000000004004fc <+24>: mov DWORD PTR [rip+0x2003f6],0x3e8 # 0x6008fc <i>
0x0000000000400506 <+34>: mov esi,0xffffffff
0x000000000040050b <+39>: mov edi,0x40060c
0x0000000000400510 <+44>: mov eax,0x0
0x0000000000400515 <+49>: call 0x4003e0 <printf@plt>
0x000000000040051a <+54>: add rsp,0x8
0x000000000040051e <+58>: ret
我的测试代码是:
#define TIMES 1000
int result, i;
int main() {
for (i=0; i<TIMES; i++)
{
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
}
printf("%d", result);
}
答案 1 :(得分:6)
它可能会优化该代码。如果要分析-1,那么应该使用-O0运行。您可能还应生成一些没有循环的代码来分析单个指令。
答案 2 :(得分:5)
在没有优化的情况下分析代码没有太大意义。
相反,我建议将result
声明为volatile
。
就像现在一样,您的代码可能会优化为:
result = -1;
<强>结果
使用完全优化编译的两个代码:
00401000 mov ecx,3E8h
00401005 or eax,0FFFFFFFFh
00401008 jmp wmain+10h (401010h)
0040100A lea ebx,[ebx]
00401010 sub ecx,1
{
result = -1;
00401013 mov dword ptr [result (40301Ch)],eax
result = -1;
00401018 mov dword ptr [result (40301Ch)],eax
result = -1;
0040101D mov dword ptr [result (40301Ch)],eax
result = -1;
00401022 mov dword ptr [result (40301Ch)],eax
result = -1;
00401027 mov dword ptr [result (40301Ch)],eax
result = -1;
0040102C mov dword ptr [result (40301Ch)],eax
result = -1;
00401031 mov dword ptr [result (40301Ch)],eax
result = -1;
00401036 mov dword ptr [result (40301Ch)],eax
result = -1;
0040103B mov dword ptr [result (40301Ch)],eax
result = -1;
00401040 mov dword ptr [result (40301Ch)],eax
00401045 jne wmain+10h (401010h)
}
cout << result;
00401047 mov eax,dword ptr [result (40301Ch)]
0040104C mov ecx,dword ptr [__imp_std::cout (402038h)]
00401052 push eax
00401053 call dword ptr [__imp_std::basic_ostream<char,std::char_traits<char> >::operator<< (40203Ch)]
for (int i=0; i< 1000 ; i += 1)
{
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
result = -1;
}
cout << result;
00401000 mov ecx,dword ptr [__imp_std::cout (402038h)]
00401006 push 0FFFFFFFFh
00401008 mov dword ptr [result (40301Ch)],0FFFFFFFFh
00401012 call dword ptr [__imp_std::basic_ostream<char,std::char_traits<char> >::operator<< (40203Ch)]
答案 3 :(得分:2)
这取决于编译器的优化级别。因此,它可以通过以下几种方式进行优化:
如果result
成为volatile
,这会变得有点棘手,这会“阻止”编译器假设它的值不会在循环或表达式序列之外发生变化,甚至可能就足够了防止10个内部陈述崩溃为1。
测试此方法的最佳方法是使用objdump
或调试器等方法检查编译器的输出。