以下是代码:
unsigned int factorial(unsigned int n) {
if (n == 0) return 1;
return n * factorial(n - 1);
}
int main() {
return factorial(0);
}
我使用
生成gcc和clang程序集g++ -g -O2 -c factorial.cpp (resp clang++)
objdump -d -M intel -S factorial.o
这是我获得的gcc
factorial.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_Z9factorialj>:
unsigned int factorial(unsigned int n)
{
if (n == 0)
0: 85 ff test edi,edi
2: b8 01 00 00 00 mov eax,0x1
7: 74 11 je 1a <_Z9factorialj+0x1a>
9: 0f 1f 80 00 00 00 00 nop DWORD PTR [rax+0x0]
10: 0f af c7 imul eax,edi
13: 83 ef 01 sub edi,0x1
16: 75 f8 jne 10 <_Z9factorialj+0x10>
18: f3 c3 repz ret
{
return 1;
}
return n * factorial(n - 1);
}
1a: f3 c3 repz ret
Disassembly of section .text.startup:
0000000000000000 <main>:
if (n == 0)
0: b8 01 00 00 00 mov eax,0x1
5: c3 ret
和clang
factorial.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_Z9factorialj>:
unsigned int factorial(unsigned int n)
{
0: b8 01 00 00 00 mov eax,0x1
if (n == 0)
5: 85 ff test edi,edi
7: 0f 84 ba 01 00 00 je 1c7 <_Z9factorialj+0x1c7>
d: b8 01 00 00 00 mov eax,0x1
{
return 1;
}
return n * factorial(n - 1);
12: 83 ff 08 cmp edi,0x8
15: 0f 82 a5 01 00 00 jb 1c0 <_Z9factorialj+0x1c0>
1b: 41 89 f8 mov r8d,edi
1e: 41 83 e0 f8 and r8d,0xfffffff8
22: 89 fa mov edx,edi
24: 83 e2 f8 and edx,0xfffffff8
27: 0f 84 93 01 00 00 je 1c0 <_Z9factorialj+0x1c0>
2d: 8d 4f f8 lea ecx,[rdi-0x8]
30: 89 c8 mov eax,ecx
32: c1 e8 03 shr eax,0x3
35: 0f ba e1 03 bt ecx,0x3
39: 72 24 jb 5f <_Z9factorialj+0x5f>
3b: 66 0f 6e c7 movd xmm0,edi
3f: 66 0f 70 c8 00 pshufd xmm1,xmm0,0x0
44: 66 0f 6f 05 00 00 00 movdqa xmm0,XMMWORD PTR [rip+0x0] # 4c <_Z9factorialj+0x4c>
4b: 00
4c: 66 0f fe c1 paddd xmm0,xmm1
50: 66 0f fe 0d 00 00 00 paddd xmm1,XMMWORD PTR [rip+0x0] # 58 <_Z9factorialj+0x58>
57: 00
58: b9 08 00 00 00 mov ecx,0x8
5d: eb 0e jmp 6d <_Z9factorialj+0x6d>
5f: 66 0f 6f 05 00 00 00 movdqa xmm0,XMMWORD PTR [rip+0x0] # 67 <_Z9factorialj+0x67>
66: 00
67: 31 c9 xor ecx,ecx
69: 66 0f 6f c8 movdqa xmm1,xmm0
6d: 85 c0 test eax,eax
6f: 0f 84 d4 00 00 00 je 149 <_Z9factorialj+0x149>
75: 89 d0 mov eax,edx
77: 29 c8 sub eax,ecx
79: 89 fe mov esi,edi
7b: 29 ce sub esi,ecx
7d: 66 0f 6f 15 00 00 00 movdqa xmm2,XMMWORD PTR [rip+0x0] # 85 <_Z9factorialj+0x85>
84: 00
85: 66 0f 6f 1d 00 00 00 movdqa xmm3,XMMWORD PTR [rip+0x0] # 8d <_Z9factorialj+0x8d>
8c: 00
8d: 0f 1f 00 nop DWORD PTR [rax]
90: 66 0f 6e e6 movd xmm4,esi
94: 66 0f 70 e4 00 pshufd xmm4,xmm4,0x0
99: 66 0f 6f ec movdqa xmm5,xmm4
9d: 66 0f fe ea paddd xmm5,xmm2
a1: 66 0f fe e3 paddd xmm4,xmm3
a5: 66 0f 70 f5 f5 pshufd xmm6,xmm5,0xf5
aa: 66 0f f4 e8 pmuludq xmm5,xmm0
ae: 66 0f 70 ed e8 pshufd xmm5,xmm5,0xe8
b3: 66 0f 70 c0 f5 pshufd xmm0,xmm0,0xf5
b8: 66 0f f4 c6 pmuludq xmm0,xmm6
bc: 66 0f 70 c0 e8 pshufd xmm0,xmm0,0xe8
c1: 66 0f 62 e8 punpckldq xmm5,xmm0
c5: 66 0f 70 c4 f5 pshufd xmm0,xmm4,0xf5
ca: 66 0f f4 e1 pmuludq xmm4,xmm1
ce: 66 0f 70 e4 e8 pshufd xmm4,xmm4,0xe8
d3: 66 0f 70 c9 f5 pshufd xmm1,xmm1,0xf5
d8: 66 0f f4 c8 pmuludq xmm1,xmm0
dc: 66 0f 70 c1 e8 pshufd xmm0,xmm1,0xe8
e1: 66 0f 62 e0 punpckldq xmm4,xmm0
e5: 8d 4e f8 lea ecx,[rsi-0x8]
e8: 66 0f 6e c1 movd xmm0,ecx
ec: 66 0f 70 c8 00 pshufd xmm1,xmm0,0x0
f1: 66 0f 6f c1 movdqa xmm0,xmm1
f5: 66 0f fe c2 paddd xmm0,xmm2
f9: 66 0f fe cb paddd xmm1,xmm3
fd: 66 0f 70 f0 f5 pshufd xmm6,xmm0,0xf5
102: 66 0f f4 c5 pmuludq xmm0,xmm5
106: 66 0f 70 c0 e8 pshufd xmm0,xmm0,0xe8
10b: 66 0f 70 ed f5 pshufd xmm5,xmm5,0xf5
110: 66 0f f4 ee pmuludq xmm5,xmm6
114: 66 0f 70 ed e8 pshufd xmm5,xmm5,0xe8
119: 66 0f 62 c5 punpckldq xmm0,xmm5
11d: 66 0f 70 e9 f5 pshufd xmm5,xmm1,0xf5
122: 66 0f f4 cc pmuludq xmm1,xmm4
126: 66 0f 70 c9 e8 pshufd xmm1,xmm1,0xe8
12b: 66 0f 70 e4 f5 pshufd xmm4,xmm4,0xf5
130: 66 0f f4 e5 pmuludq xmm4,xmm5
134: 66 0f 70 e4 e8 pshufd xmm4,xmm4,0xe8
139: 66 0f 62 cc punpckldq xmm1,xmm4
13d: 83 c6 f0 add esi,0xfffffff0
140: 83 c0 f0 add eax,0xfffffff0
143: 0f 85 47 ff ff ff jne 90 <_Z9factorialj+0x90>
149: 66 0f 70 d1 f5 pshufd xmm2,xmm1,0xf5
14e: 66 0f f4 c8 pmuludq xmm1,xmm0
152: 66 0f 70 c9 e8 pshufd xmm1,xmm1,0xe8
157: 66 0f 70 c0 f5 pshufd xmm0,xmm0,0xf5
15c: 66 0f f4 c2 pmuludq xmm0,xmm2
160: 66 0f 70 c0 e8 pshufd xmm0,xmm0,0xe8
165: 66 0f 62 c8 punpckldq xmm1,xmm0
169: 66 0f 70 c1 4e pshufd xmm0,xmm1,0x4e
16e: 66 0f 70 d1 f5 pshufd xmm2,xmm1,0xf5
173: 66 0f f4 c8 pmuludq xmm1,xmm0
177: 66 0f 70 c9 e8 pshufd xmm1,xmm1,0xe8
17c: 66 0f 70 c0 f5 pshufd xmm0,xmm0,0xf5
181: 66 0f f4 c2 pmuludq xmm0,xmm2
185: 66 0f 70 c0 e8 pshufd xmm0,xmm0,0xe8
18a: 66 0f 62 c8 punpckldq xmm1,xmm0
18e: 66 0f 70 c1 e5 pshufd xmm0,xmm1,0xe5
193: 66 0f 70 d1 f5 pshufd xmm2,xmm1,0xf5
198: 66 0f f4 c8 pmuludq xmm1,xmm0
19c: 66 0f 70 c9 e8 pshufd xmm1,xmm1,0xe8
1a1: 66 0f 70 c0 f5 pshufd xmm0,xmm0,0xf5
1a6: 66 0f f4 c2 pmuludq xmm0,xmm2
1aa: 66 0f 70 c0 e8 pshufd xmm0,xmm0,0xe8
1af: 66 0f 62 c8 punpckldq xmm1,xmm0
1b3: 66 0f 7e c8 movd eax,xmm1
1b7: 39 fa cmp edx,edi
1b9: 74 0c je 1c7 <_Z9factorialj+0x1c7>
1bb: 44 29 c7 sub edi,r8d
1be: 66 90 xchg ax,ax
1c0: 0f af c7 imul eax,edi
if (n == 0)
1c3: ff cf dec edi
1c5: 75 f9 jne 1c0 <_Z9factorialj+0x1c0>
}
1c7: c3 ret
1c8: 0f 1f 84 00 00 00 00 nop DWORD PTR [rax+rax*1+0x0]
1cf: 00
00000000000001d0 <main>:
int main()
{
return factorial(0);
1d0: b8 01 00 00 00 mov eax,0x1
1d5: c3 ret
我知道他们都注意到他们可以展开循环并预先计算价值,但我不知道为什么clang生成所有这些代码(我不知道这可以做什么以及为什么有sse的东西。 ..)
奖金问题:当gcc预先计算到factorial(7)的值时,clang会为任何值计算它。
最多factorial(31)
值看起来很好,对于factorial(32)
和factorial(33)
,它会返回0x80000000
,对于更大的值,它会返回xor eax, eax
。这个黑魔法是什么?