汇编中具有大量arg的调用函数的工作原理

时间:2018-07-21 01:29:51

标签: c linux function assembly x86-64

这是一组示例函数,第一个包含20个参数,第二个包含2:

int a(int n1, int n2, int n3, int n4, int n5, int n6, int n7, int n8, int n9, int n10, int n11, int n12, int n13, int n14, int n15, int n16, int n17, int n18, int n19, int n20) {
    return n1 * n2 * n3 * n4 * n5 * n6 * n7 * n8 * n9 * n10 * n11 * n12 * n13 * n14 * n15 * n16 * n17 * n18 * n19 * n20;
}

int b(int n1, int n2) {
    return a(n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1)
      + a(n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1)
      + a(n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1);
}

它将compiled移至该程序集:

a(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int):
  push    rbp
  mov     rbp, rsp
  mov     DWORD PTR [rbp-4], edi
  mov     DWORD PTR [rbp-8], esi
  mov     DWORD PTR [rbp-12], edx
  mov     DWORD PTR [rbp-16], ecx
  mov     DWORD PTR [rbp-20], r8d
  mov     DWORD PTR [rbp-24], r9d
  mov     eax, DWORD PTR [rbp-4]
  imul    eax, DWORD PTR [rbp-8]
  imul    eax, DWORD PTR [rbp-12]
  imul    eax, DWORD PTR [rbp-16]
  imul    eax, DWORD PTR [rbp-20]
  imul    eax, DWORD PTR [rbp-24]
  imul    eax, DWORD PTR [rbp+16]
  imul    eax, DWORD PTR [rbp+24]
  imul    eax, DWORD PTR [rbp+32]
  imul    eax, DWORD PTR [rbp+40]
  imul    eax, DWORD PTR [rbp+48]
  imul    eax, DWORD PTR [rbp+56]
  imul    eax, DWORD PTR [rbp+64]
  imul    eax, DWORD PTR [rbp+72]
  imul    eax, DWORD PTR [rbp+80]
  imul    eax, DWORD PTR [rbp+88]
  imul    eax, DWORD PTR [rbp+96]
  imul    eax, DWORD PTR [rbp+104]
  imul    eax, DWORD PTR [rbp+112]
  imul    eax, DWORD PTR [rbp+120]
  pop     rbp
  ret
b(int, int):
  push    rbp
  mov     rbp, rsp
  push    rbx
  sub     rsp, 8
  mov     DWORD PTR [rbp-12], edi
  mov     DWORD PTR [rbp-16], esi
  mov     r9d, DWORD PTR [rbp-12]
  mov     r8d, DWORD PTR [rbp-12]
  mov     ecx, DWORD PTR [rbp-16]
  mov     edx, DWORD PTR [rbp-12]
  mov     esi, DWORD PTR [rbp-16]
  mov     eax, DWORD PTR [rbp-12]
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, eax
  call    a(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int)
  add     rsp, 112
  mov     ebx, eax
  mov     r9d, DWORD PTR [rbp-12]
  mov     r8d, DWORD PTR [rbp-12]
  mov     ecx, DWORD PTR [rbp-16]
  mov     edx, DWORD PTR [rbp-12]
  mov     esi, DWORD PTR [rbp-16]
  mov     eax, DWORD PTR [rbp-12]
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, eax
  call    a(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int)
  add     rsp, 112
  add     ebx, eax
  mov     r9d, DWORD PTR [rbp-12]
  mov     r8d, DWORD PTR [rbp-12]
  mov     ecx, DWORD PTR [rbp-16]
  mov     edx, DWORD PTR [rbp-12]
  mov     esi, DWORD PTR [rbp-16]
  mov     eax, DWORD PTR [rbp-12]
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, DWORD PTR [rbp-12]
  push    rdi
  mov     edi, DWORD PTR [rbp-16]
  push    rdi
  mov     edi, eax
  call    a(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int)
  add     rsp, 112
  add     eax, ebx
  mov     rbx, QWORD PTR [rbp-8]
  leave
  ret

我对此有一些疑问。首先,我注意到随着数量的增加,它似乎在切换处理args的方式:

push    rbp
mov     rbp, rsp
mov     DWORD PTR [rbp-4], edi
...
imul    eax, DWORD PTR [rbp-8]
...

想知道那里发生了什么,为什么会那样做。似乎用push处理第一个arg,然后用mov处理接下来的8个左右,然后仅相对于imul寄存器使用其余的eax。不知道您可以有多少个参数是否有限制。

我想知道的第二件事是以下内容。说而不是a(),函数b()调用了一些“系统函数”或其他一些外部库调用。想知道如何解压缩args。好吧,我想,没关系,我假设C编译器会将所有链接的外部库编译成汇编/机器代码。所以我猜没在那里。

最后一件事是,如果系统调用都具有一定数量的参数,类似于x86中max操作数为3的方式。或者系统调用可以具有任意数量的参数。似乎他们想限制它的性能,因此他们只使用pushmov之类的早期指令,而不是imul

感谢您的帮助,只是希望在汇编程序中调用函数时,特别是在有大量参数的情况下,寻找有关如何处理参数的说明。

1 个答案:

答案 0 :(得分:5)

您需要了解有关堆栈框架和应用程序二进制接口(ABI或调用约定)的更多信息。 ABI定义了参数由调用方传递给被调用方的方式,哪些寄存器是易失的,以及如何清除堆栈。

存在许多ABI,因为只要呼叫者和被呼叫者同意,任何人都可以设计自己的ABI。但是,只有很少的ABI被广泛使用。在Windows上,大多数32位程序使用stdcall,cdecl,Microsoft的fastcall或Borland的fastcall,而64位程序大多数使用Microsoft x64调用约定。在Unix 64位程序上,始终使用System V AMD64 ABI,这也是编译器使用的程序。

让我们看一下带有注释的代码:

push    rbp                     ; save the old stack frame
mov     rbp, rsp                ; establish new stack frame
mov     DWORD PTR [rbp-4], edi  ; save the first six arguments
mov     DWORD PTR [rbp-8], esi
mov     DWORD PTR [rbp-12], edx
mov     DWORD PTR [rbp-16], ecx
mov     DWORD PTR [rbp-20], r8d
mov     DWORD PTR [rbp-24], r9d
mov     eax, DWORD PTR [rbp-4]  ; load n1
imul    eax, DWORD PTR [rbp-8]  ; eax = eax * n2
imul    eax, DWORD PTR [rbp-12] ; eax = eax * n3
imul    eax, DWORD PTR [rbp-16]
imul    eax, DWORD PTR [rbp-20]
imul    eax, DWORD PTR [rbp-24]
imul    eax, DWORD PTR [rbp+16] ; eax = eax * n7
imul    eax, DWORD PTR [rbp+24] ; eax = eax * n8
imul    eax, DWORD PTR [rbp+32]
imul    eax, DWORD PTR [rbp+40]
imul    eax, DWORD PTR [rbp+48]
imul    eax, DWORD PTR [rbp+56]
imul    eax, DWORD PTR [rbp+64]
imul    eax, DWORD PTR [rbp+72]
imul    eax, DWORD PTR [rbp+80]
imul    eax, DWORD PTR [rbp+88]
imul    eax, DWORD PTR [rbp+96]
imul    eax, DWORD PTR [rbp+104]
imul    eax, DWORD PTR [rbp+112]
imul    eax, DWORD PTR [rbp+120]
pop     rbp                      ; restore old stack frame
ret                              ; exit

注意:前两行与参数无关;他们将创建一个堆栈框架,以便您可以轻松访问局部变量和参数。如果没有堆栈框架,您仍然可以使用[rsp + *]访问它们,但是偏移量需要根据您使用的任何PUSHPOP进行调整。

以下是将参数存储到局部变量的说明。寄存器经常更改,并且需要存储在寄存器中传递的参数,以防您以后需要使用它们。但是,在这种情况下,则没有必要。因此优化的代码可以是

push    rbp                     ; save the old stack frame
mov     rbp, rsp                ; establish new stack frame
mov     eax, edi                ; eax = n1
imul    eax, esi                ; eax = eax * n2
imul    eax, edx                ; eax = eax * n3
imul    eax, ecx                ; eax = eax * n4
imul    eax, e8d                ; eax = eax * n5
imul    eax, e9d                ; eax = eax * n6
imul    eax, DWORD PTR [rbp+16] ; eax = eax * n7
imul    eax, DWORD PTR [rbp+24] ; eax = eax * n8
imul    eax, DWORD PTR [rbp+32] ; eax = eax * n9
imul    eax, DWORD PTR [rbp+40]
imul    eax, DWORD PTR [rbp+48]
imul    eax, DWORD PTR [rbp+56]
imul    eax, DWORD PTR [rbp+64]
imul    eax, DWORD PTR [rbp+72]
imul    eax, DWORD PTR [rbp+80]
imul    eax, DWORD PTR [rbp+88]
imul    eax, DWORD PTR [rbp+96]
imul    eax, DWORD PTR [rbp+104]
imul    eax, DWORD PTR [rbp+112]
imul    eax, DWORD PTR [rbp+120]
pop     rbp                      ; restore old stack frame
ret                              ; exit

从上面的示例中,您应该能够猜到第一个参数是在edi中传递的(或rdididil取决于大小) ,第二个在esi中,然后在edxecxr8dr9d中(仅整数,浮点数在向量寄存器中传递)。当您有6个以上的参数时,另一个参数将被压入堆栈,并且可以使用[rbp + 16],[rbp + 24],...来访问。([rbp + 8]是旧的rbp; [rbp]是寄信人地址。

对于呼叫者

mov     r9d, DWORD PTR [rbp-12]  ; r9d = n6
mov     r8d, DWORD PTR [rbp-12]  ; r8d = n5
mov     ecx, DWORD PTR [rbp-16]  ; ecx = n4
mov     edx, DWORD PTR [rbp-12]  ; edx = n3
mov     esi, DWORD PTR [rbp-16]  ; esi = n2
mov     eax, DWORD PTR [rbp-12]  ; eax = n1  ; will assign to edi
mov     edi, DWORD PTR [rbp-12]  ; push n20
push    rdi
mov     edi, DWORD PTR [rbp-16]  ; push n19
push    rdi
mov     edi, DWORD PTR [rbp-12]
push    rdi
mov     edi, DWORD PTR [rbp-16]
push    rdi
mov     edi, DWORD PTR [rbp-12]
push    rdi
mov     edi, DWORD PTR [rbp-12]
push    rdi
mov     edi, DWORD PTR [rbp-16]
push    rdi
mov     edi, DWORD PTR [rbp-12]
push    rdi
mov     edi, DWORD PTR [rbp-16]
push    rdi
mov     edi, DWORD PTR [rbp-12]
push    rdi
mov     edi, DWORD PTR [rbp-12]
push    rdi
mov     edi, DWORD PTR [rbp-16]
push    rdi
mov     edi, DWORD PTR [rbp-12]
push    rdi
mov     edi, DWORD PTR [rbp-16]  ; push n7
push    rdi
mov     edi, eax                 ; edi = n1
call    a()                      ; call the function
add     rsp, 112                 ; clean up the stack, 14 * 8 = 112 bytes
mov     ebx, eax                 ; result is in eax

一个更简单的版本是

mov     r9d, DWORD PTR [rbp-12]  ; r9d = n6
mov     r8d, DWORD PTR [rbp-12]  ; r8d = n5
mov     ecx, DWORD PTR [rbp-16]  ; ecx = n4
mov     edx, DWORD PTR [rbp-12]  ; edx = n3
mov     esi, DWORD PTR [rbp-16]  ; esi = n2
mov     edi, DWORD PTR [rbp-12]  ; edi = n1
push    [rbp-12]                 ; push n20
push    [rbp-16]                 ; push n19
push    [rbp-12]
push    [rbp-16]
push    [rbp-12]
push    [rbp-16]
push    [rbp-12]
push    [rbp-16]
push    [rbp-12]
push    [rbp-16]
push    [rbp-12]
push    [rbp-16]
push    [rbp-12]                 ; push n8
push    [rbp-16]                 ; push n7
call    a()                      ; call the function
add     rsp, 112                 ; clean up the stack, 14 * 8 = 112 bytes
mov     ebx, eax                 ; result is in eax

请注意,参数以相反的顺序推送。

由于在调用函数之前可以将任何数字压入堆栈(在溢出之前),因此对参数的数量没有限制。