使用ocount对代码进行分析会显示更多循环,其中惩罚为惩罚,而较小的循环则为惩罚。我试图理解为什么惩罚标志开启时会有更多的惩罚?
uint16_t arr[1010];
uint32_t r[500];
void func()
{
uint32_t i = 0;
for (i = 0; i < 1000; i+=2)
{
arr[i] = i;
arr[i+1] = i+10;
#ifdef PENALTY_ON
r[i/2] = *(uint32_t *)((uint16_t *)&arr[i+1]);
#endif
}
#ifndef PENALTY_ON
for (i = 0; i < 1000; i+=2)
{
r[i/2] = *(uint32_t *)((uint16_t *)&arr[i+1]);
}
#endif
}
答案 0 :(得分:3)
使用-O3
在32位计算机上使用gcc进行编译使用PENALTY_ON
00000000 <func>:
0: 31 c0 xor %eax,%eax
2: 8d b6 00 00 00 00 lea 0x0(%esi),%esi
8: 8d 50 0a lea 0xa(%eax),%edx
b: 66 89 94 00 02 00 00 mov %dx,0x2(%eax,%eax,1)
12: 00
13: 8b 8c 00 02 00 00 00 mov 0x2(%eax,%eax,1),%ecx
1a: 89 c2 mov %eax,%edx
1c: 66 89 84 00 00 00 00 mov %ax,0x0(%eax,%eax,1)
23: 00
24: 83 c0 02 add $0x2,%eax
27: d1 ea shr %edx
29: 3d e8 03 00 00 cmp $0x3e8,%eax
2e: 89 0c 95 00 00 00 00 mov %ecx,0x0(,%edx,4)
35: 75 d1 jne 8 <func+0x8>
37: f3 c3 repz ret
没有PENALTY_ON
00000000 <func>:
0: 31 c0 xor %eax,%eax
2: 8d b6 00 00 00 00 lea 0x0(%esi),%esi
8: 8d 50 0a lea 0xa(%eax),%edx
b: 66 89 84 00 00 00 00 mov %ax,0x0(%eax,%eax,1)
12: 00
13: 66 89 94 00 02 00 00 mov %dx,0x2(%eax,%eax,1)
1a: 00
1b: 83 c0 02 add $0x2,%eax
1e: 3d e8 03 00 00 cmp $0x3e8,%eax
23: 75 e3 jne 8 <func+0x8>
25: 66 31 c0 xor %ax,%ax
28: 8b 8c 00 02 00 00 00 mov 0x2(%eax,%eax,1),%ecx
2f: 89 c2 mov %eax,%edx
31: 83 c0 02 add $0x2,%eax
34: d1 ea shr %edx
36: 3d e8 03 00 00 cmp $0x3e8,%eax
3b: 89 0c 95 00 00 00 00 mov %ecx,0x0(,%edx,4)
42: 75 e4 jne 28 <func+0x28>
44: f3 c3 repz ret
我认为原因是PENALTY_ON
b: 66 89 94 00 02 00 00 mov %dx,0x2(%eax,%eax,1)
12: 00
13: 8b 8c 00 02 00 00 00 mov 0x2(%eax,%eax,1),%ecx