clang为简单的阶乘函数生成代码

时间:2016-10-27 16:23:21

标签: c++ gcc assembly clang

以下是代码:

unsigned int factorial(unsigned int n) {
    if (n == 0) return 1;
    return n * factorial(n - 1);
}

int main() {
    return factorial(0);
}

我使用

生成gcc和clang程序集
g++ -g -O2 -c factorial.cpp (resp clang++)
objdump -d -M intel -S factorial.o

这是我获得的gcc

factorial.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <_Z9factorialj>:
unsigned int factorial(unsigned int n)
{
    if (n == 0)
   0:   85 ff                   test   edi,edi
   2:   b8 01 00 00 00          mov    eax,0x1
   7:   74 11                   je     1a <_Z9factorialj+0x1a>
   9:   0f 1f 80 00 00 00 00    nop    DWORD PTR [rax+0x0]
  10:   0f af c7                imul   eax,edi
  13:   83 ef 01                sub    edi,0x1
  16:   75 f8                   jne    10 <_Z9factorialj+0x10>
  18:   f3 c3                   repz ret 
    {
        return 1;
    }

    return n * factorial(n - 1);
}
  1a:   f3 c3                   repz ret 

Disassembly of section .text.startup:

0000000000000000 <main>:
    if (n == 0)
   0:   b8 01 00 00 00          mov    eax,0x1
   5:   c3                      ret    

和clang

factorial.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <_Z9factorialj>:
unsigned int factorial(unsigned int n)
{
   0:   b8 01 00 00 00          mov    eax,0x1
    if (n == 0)
   5:   85 ff                   test   edi,edi
   7:   0f 84 ba 01 00 00       je     1c7 <_Z9factorialj+0x1c7>
   d:   b8 01 00 00 00          mov    eax,0x1
    {
        return 1;
    }

    return n * factorial(n - 1);
  12:   83 ff 08                cmp    edi,0x8
  15:   0f 82 a5 01 00 00       jb     1c0 <_Z9factorialj+0x1c0>
  1b:   41 89 f8                mov    r8d,edi
  1e:   41 83 e0 f8             and    r8d,0xfffffff8
  22:   89 fa                   mov    edx,edi
  24:   83 e2 f8                and    edx,0xfffffff8
  27:   0f 84 93 01 00 00       je     1c0 <_Z9factorialj+0x1c0>
  2d:   8d 4f f8                lea    ecx,[rdi-0x8]
  30:   89 c8                   mov    eax,ecx
  32:   c1 e8 03                shr    eax,0x3
  35:   0f ba e1 03             bt     ecx,0x3
  39:   72 24                   jb     5f <_Z9factorialj+0x5f>
  3b:   66 0f 6e c7             movd   xmm0,edi
  3f:   66 0f 70 c8 00          pshufd xmm1,xmm0,0x0
  44:   66 0f 6f 05 00 00 00    movdqa xmm0,XMMWORD PTR [rip+0x0]        # 4c <_Z9factorialj+0x4c>
  4b:   00 
  4c:   66 0f fe c1             paddd  xmm0,xmm1
  50:   66 0f fe 0d 00 00 00    paddd  xmm1,XMMWORD PTR [rip+0x0]        # 58 <_Z9factorialj+0x58>
  57:   00 
  58:   b9 08 00 00 00          mov    ecx,0x8
  5d:   eb 0e                   jmp    6d <_Z9factorialj+0x6d>
  5f:   66 0f 6f 05 00 00 00    movdqa xmm0,XMMWORD PTR [rip+0x0]        # 67 <_Z9factorialj+0x67>
  66:   00 
  67:   31 c9                   xor    ecx,ecx
  69:   66 0f 6f c8             movdqa xmm1,xmm0
  6d:   85 c0                   test   eax,eax
  6f:   0f 84 d4 00 00 00       je     149 <_Z9factorialj+0x149>
  75:   89 d0                   mov    eax,edx
  77:   29 c8                   sub    eax,ecx
  79:   89 fe                   mov    esi,edi
  7b:   29 ce                   sub    esi,ecx
  7d:   66 0f 6f 15 00 00 00    movdqa xmm2,XMMWORD PTR [rip+0x0]        # 85 <_Z9factorialj+0x85>
  84:   00 
  85:   66 0f 6f 1d 00 00 00    movdqa xmm3,XMMWORD PTR [rip+0x0]        # 8d <_Z9factorialj+0x8d>
  8c:   00 
  8d:   0f 1f 00                nop    DWORD PTR [rax]
  90:   66 0f 6e e6             movd   xmm4,esi
  94:   66 0f 70 e4 00          pshufd xmm4,xmm4,0x0
  99:   66 0f 6f ec             movdqa xmm5,xmm4
  9d:   66 0f fe ea             paddd  xmm5,xmm2
  a1:   66 0f fe e3             paddd  xmm4,xmm3
  a5:   66 0f 70 f5 f5          pshufd xmm6,xmm5,0xf5
  aa:   66 0f f4 e8             pmuludq xmm5,xmm0
  ae:   66 0f 70 ed e8          pshufd xmm5,xmm5,0xe8
  b3:   66 0f 70 c0 f5          pshufd xmm0,xmm0,0xf5
  b8:   66 0f f4 c6             pmuludq xmm0,xmm6
  bc:   66 0f 70 c0 e8          pshufd xmm0,xmm0,0xe8
  c1:   66 0f 62 e8             punpckldq xmm5,xmm0
  c5:   66 0f 70 c4 f5          pshufd xmm0,xmm4,0xf5
  ca:   66 0f f4 e1             pmuludq xmm4,xmm1
  ce:   66 0f 70 e4 e8          pshufd xmm4,xmm4,0xe8
  d3:   66 0f 70 c9 f5          pshufd xmm1,xmm1,0xf5
  d8:   66 0f f4 c8             pmuludq xmm1,xmm0
  dc:   66 0f 70 c1 e8          pshufd xmm0,xmm1,0xe8
  e1:   66 0f 62 e0             punpckldq xmm4,xmm0
  e5:   8d 4e f8                lea    ecx,[rsi-0x8]
  e8:   66 0f 6e c1             movd   xmm0,ecx
  ec:   66 0f 70 c8 00          pshufd xmm1,xmm0,0x0
  f1:   66 0f 6f c1             movdqa xmm0,xmm1
  f5:   66 0f fe c2             paddd  xmm0,xmm2
  f9:   66 0f fe cb             paddd  xmm1,xmm3
  fd:   66 0f 70 f0 f5          pshufd xmm6,xmm0,0xf5
 102:   66 0f f4 c5             pmuludq xmm0,xmm5
 106:   66 0f 70 c0 e8          pshufd xmm0,xmm0,0xe8
 10b:   66 0f 70 ed f5          pshufd xmm5,xmm5,0xf5
 110:   66 0f f4 ee             pmuludq xmm5,xmm6
 114:   66 0f 70 ed e8          pshufd xmm5,xmm5,0xe8
 119:   66 0f 62 c5             punpckldq xmm0,xmm5
 11d:   66 0f 70 e9 f5          pshufd xmm5,xmm1,0xf5
 122:   66 0f f4 cc             pmuludq xmm1,xmm4
 126:   66 0f 70 c9 e8          pshufd xmm1,xmm1,0xe8
 12b:   66 0f 70 e4 f5          pshufd xmm4,xmm4,0xf5
 130:   66 0f f4 e5             pmuludq xmm4,xmm5
 134:   66 0f 70 e4 e8          pshufd xmm4,xmm4,0xe8
 139:   66 0f 62 cc             punpckldq xmm1,xmm4
 13d:   83 c6 f0                add    esi,0xfffffff0
 140:   83 c0 f0                add    eax,0xfffffff0
 143:   0f 85 47 ff ff ff       jne    90 <_Z9factorialj+0x90>
 149:   66 0f 70 d1 f5          pshufd xmm2,xmm1,0xf5
 14e:   66 0f f4 c8             pmuludq xmm1,xmm0
 152:   66 0f 70 c9 e8          pshufd xmm1,xmm1,0xe8
 157:   66 0f 70 c0 f5          pshufd xmm0,xmm0,0xf5
 15c:   66 0f f4 c2             pmuludq xmm0,xmm2
 160:   66 0f 70 c0 e8          pshufd xmm0,xmm0,0xe8
 165:   66 0f 62 c8             punpckldq xmm1,xmm0
 169:   66 0f 70 c1 4e          pshufd xmm0,xmm1,0x4e
 16e:   66 0f 70 d1 f5          pshufd xmm2,xmm1,0xf5
 173:   66 0f f4 c8             pmuludq xmm1,xmm0
 177:   66 0f 70 c9 e8          pshufd xmm1,xmm1,0xe8
 17c:   66 0f 70 c0 f5          pshufd xmm0,xmm0,0xf5
 181:   66 0f f4 c2             pmuludq xmm0,xmm2
 185:   66 0f 70 c0 e8          pshufd xmm0,xmm0,0xe8
 18a:   66 0f 62 c8             punpckldq xmm1,xmm0
 18e:   66 0f 70 c1 e5          pshufd xmm0,xmm1,0xe5
 193:   66 0f 70 d1 f5          pshufd xmm2,xmm1,0xf5
 198:   66 0f f4 c8             pmuludq xmm1,xmm0
 19c:   66 0f 70 c9 e8          pshufd xmm1,xmm1,0xe8
 1a1:   66 0f 70 c0 f5          pshufd xmm0,xmm0,0xf5
 1a6:   66 0f f4 c2             pmuludq xmm0,xmm2
 1aa:   66 0f 70 c0 e8          pshufd xmm0,xmm0,0xe8
 1af:   66 0f 62 c8             punpckldq xmm1,xmm0
 1b3:   66 0f 7e c8             movd   eax,xmm1
 1b7:   39 fa                   cmp    edx,edi
 1b9:   74 0c                   je     1c7 <_Z9factorialj+0x1c7>
 1bb:   44 29 c7                sub    edi,r8d
 1be:   66 90                   xchg   ax,ax
 1c0:   0f af c7                imul   eax,edi
    if (n == 0)
 1c3:   ff cf                   dec    edi
 1c5:   75 f9                   jne    1c0 <_Z9factorialj+0x1c0>
}
 1c7:   c3                      ret    
 1c8:   0f 1f 84 00 00 00 00    nop    DWORD PTR [rax+rax*1+0x0]
 1cf:   00 

00000000000001d0 <main>:

int main()
{
    return factorial(0);
 1d0:   b8 01 00 00 00          mov    eax,0x1
 1d5:   c3                      ret    

我知道他们都注意到他们可以展开循环并预先计算价值,但我不知道为什么clang生成所有这些代码(我不知道这可以做什么以及为什么有sse的东西。 ..)

奖金问题:当gcc预先计算到factorial(7)的值时,clang会为任何值计算它。 最多factorial(31)值看起来很好,对于factorial(32)factorial(33),它会返回0x80000000,对于更大的值,它会返回xor eax, eax。这个黑魔法是什么?

0 个答案:

没有答案