这是一组示例函数,第一个包含20个参数,第二个包含2:
int a(int n1, int n2, int n3, int n4, int n5, int n6, int n7, int n8, int n9, int n10, int n11, int n12, int n13, int n14, int n15, int n16, int n17, int n18, int n19, int n20) {
return n1 * n2 * n3 * n4 * n5 * n6 * n7 * n8 * n9 * n10 * n11 * n12 * n13 * n14 * n15 * n16 * n17 * n18 * n19 * n20;
}
int b(int n1, int n2) {
return a(n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1)
+ a(n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1)
+ a(n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1, n1, n2, n1, n2, n1);
}
它将compiled移至该程序集:
a(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int):
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-4], edi
mov DWORD PTR [rbp-8], esi
mov DWORD PTR [rbp-12], edx
mov DWORD PTR [rbp-16], ecx
mov DWORD PTR [rbp-20], r8d
mov DWORD PTR [rbp-24], r9d
mov eax, DWORD PTR [rbp-4]
imul eax, DWORD PTR [rbp-8]
imul eax, DWORD PTR [rbp-12]
imul eax, DWORD PTR [rbp-16]
imul eax, DWORD PTR [rbp-20]
imul eax, DWORD PTR [rbp-24]
imul eax, DWORD PTR [rbp+16]
imul eax, DWORD PTR [rbp+24]
imul eax, DWORD PTR [rbp+32]
imul eax, DWORD PTR [rbp+40]
imul eax, DWORD PTR [rbp+48]
imul eax, DWORD PTR [rbp+56]
imul eax, DWORD PTR [rbp+64]
imul eax, DWORD PTR [rbp+72]
imul eax, DWORD PTR [rbp+80]
imul eax, DWORD PTR [rbp+88]
imul eax, DWORD PTR [rbp+96]
imul eax, DWORD PTR [rbp+104]
imul eax, DWORD PTR [rbp+112]
imul eax, DWORD PTR [rbp+120]
pop rbp
ret
b(int, int):
push rbp
mov rbp, rsp
push rbx
sub rsp, 8
mov DWORD PTR [rbp-12], edi
mov DWORD PTR [rbp-16], esi
mov r9d, DWORD PTR [rbp-12]
mov r8d, DWORD PTR [rbp-12]
mov ecx, DWORD PTR [rbp-16]
mov edx, DWORD PTR [rbp-12]
mov esi, DWORD PTR [rbp-16]
mov eax, DWORD PTR [rbp-12]
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, eax
call a(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int)
add rsp, 112
mov ebx, eax
mov r9d, DWORD PTR [rbp-12]
mov r8d, DWORD PTR [rbp-12]
mov ecx, DWORD PTR [rbp-16]
mov edx, DWORD PTR [rbp-12]
mov esi, DWORD PTR [rbp-16]
mov eax, DWORD PTR [rbp-12]
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, eax
call a(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int)
add rsp, 112
add ebx, eax
mov r9d, DWORD PTR [rbp-12]
mov r8d, DWORD PTR [rbp-12]
mov ecx, DWORD PTR [rbp-16]
mov edx, DWORD PTR [rbp-12]
mov esi, DWORD PTR [rbp-16]
mov eax, DWORD PTR [rbp-12]
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, eax
call a(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int)
add rsp, 112
add eax, ebx
mov rbx, QWORD PTR [rbp-8]
leave
ret
我对此有一些疑问。首先,我注意到随着数量的增加,它似乎在切换处理args的方式:
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-4], edi
...
imul eax, DWORD PTR [rbp-8]
...
想知道那里发生了什么,为什么会那样做。似乎用push
处理第一个arg,然后用mov
处理接下来的8个左右,然后仅相对于imul
寄存器使用其余的eax
。不知道您可以有多少个参数是否有限制。
我想知道的第二件事是以下内容。说而不是a()
,函数b()
调用了一些“系统函数”或其他一些外部库调用。想知道如何解压缩args。好吧,我想,没关系,我假设C编译器会将所有链接的外部库编译成汇编/机器代码。所以我猜没在那里。
最后一件事是,如果系统调用都具有一定数量的参数,类似于x86中max操作数为3的方式。或者系统调用可以具有任意数量的参数。似乎他们想限制它的性能,因此他们只使用push
和mov
之类的早期指令,而不是imul
。
感谢您的帮助,只是希望在汇编程序中调用函数时,特别是在有大量参数的情况下,寻找有关如何处理参数的说明。
答案 0 :(得分:5)
您需要了解有关堆栈框架和应用程序二进制接口(ABI或调用约定)的更多信息。 ABI定义了参数由调用方传递给被调用方的方式,哪些寄存器是易失的,以及如何清除堆栈。
存在许多ABI,因为只要呼叫者和被呼叫者同意,任何人都可以设计自己的ABI。但是,只有很少的ABI被广泛使用。在Windows上,大多数32位程序使用stdcall,cdecl,Microsoft的fastcall或Borland的fastcall,而64位程序大多数使用Microsoft x64调用约定。在Unix 64位程序上,始终使用System V AMD64 ABI,这也是编译器使用的程序。
让我们看一下带有注释的代码:
push rbp ; save the old stack frame
mov rbp, rsp ; establish new stack frame
mov DWORD PTR [rbp-4], edi ; save the first six arguments
mov DWORD PTR [rbp-8], esi
mov DWORD PTR [rbp-12], edx
mov DWORD PTR [rbp-16], ecx
mov DWORD PTR [rbp-20], r8d
mov DWORD PTR [rbp-24], r9d
mov eax, DWORD PTR [rbp-4] ; load n1
imul eax, DWORD PTR [rbp-8] ; eax = eax * n2
imul eax, DWORD PTR [rbp-12] ; eax = eax * n3
imul eax, DWORD PTR [rbp-16]
imul eax, DWORD PTR [rbp-20]
imul eax, DWORD PTR [rbp-24]
imul eax, DWORD PTR [rbp+16] ; eax = eax * n7
imul eax, DWORD PTR [rbp+24] ; eax = eax * n8
imul eax, DWORD PTR [rbp+32]
imul eax, DWORD PTR [rbp+40]
imul eax, DWORD PTR [rbp+48]
imul eax, DWORD PTR [rbp+56]
imul eax, DWORD PTR [rbp+64]
imul eax, DWORD PTR [rbp+72]
imul eax, DWORD PTR [rbp+80]
imul eax, DWORD PTR [rbp+88]
imul eax, DWORD PTR [rbp+96]
imul eax, DWORD PTR [rbp+104]
imul eax, DWORD PTR [rbp+112]
imul eax, DWORD PTR [rbp+120]
pop rbp ; restore old stack frame
ret ; exit
注意:前两行与参数无关;他们将创建一个堆栈框架,以便您可以轻松访问局部变量和参数。如果没有堆栈框架,您仍然可以使用[rsp + *]访问它们,但是偏移量需要根据您使用的任何PUSH
和POP
进行调整。
以下是将参数存储到局部变量的说明。寄存器经常更改,并且需要存储在寄存器中传递的参数,以防您以后需要使用它们。但是,在这种情况下,则没有必要。因此优化的代码可以是
push rbp ; save the old stack frame
mov rbp, rsp ; establish new stack frame
mov eax, edi ; eax = n1
imul eax, esi ; eax = eax * n2
imul eax, edx ; eax = eax * n3
imul eax, ecx ; eax = eax * n4
imul eax, e8d ; eax = eax * n5
imul eax, e9d ; eax = eax * n6
imul eax, DWORD PTR [rbp+16] ; eax = eax * n7
imul eax, DWORD PTR [rbp+24] ; eax = eax * n8
imul eax, DWORD PTR [rbp+32] ; eax = eax * n9
imul eax, DWORD PTR [rbp+40]
imul eax, DWORD PTR [rbp+48]
imul eax, DWORD PTR [rbp+56]
imul eax, DWORD PTR [rbp+64]
imul eax, DWORD PTR [rbp+72]
imul eax, DWORD PTR [rbp+80]
imul eax, DWORD PTR [rbp+88]
imul eax, DWORD PTR [rbp+96]
imul eax, DWORD PTR [rbp+104]
imul eax, DWORD PTR [rbp+112]
imul eax, DWORD PTR [rbp+120]
pop rbp ; restore old stack frame
ret ; exit
从上面的示例中,您应该能够猜到第一个参数是在edi
中传递的(或rdi
,di
,dil
取决于大小) ,第二个在esi
中,然后在edx
,ecx
,r8d
和r9d
中(仅整数,浮点数在向量寄存器中传递)。当您有6个以上的参数时,另一个参数将被压入堆栈,并且可以使用[rbp + 16],[rbp + 24],...来访问。([rbp + 8]是旧的rbp; [rbp]是寄信人地址。
对于呼叫者
mov r9d, DWORD PTR [rbp-12] ; r9d = n6
mov r8d, DWORD PTR [rbp-12] ; r8d = n5
mov ecx, DWORD PTR [rbp-16] ; ecx = n4
mov edx, DWORD PTR [rbp-12] ; edx = n3
mov esi, DWORD PTR [rbp-16] ; esi = n2
mov eax, DWORD PTR [rbp-12] ; eax = n1 ; will assign to edi
mov edi, DWORD PTR [rbp-12] ; push n20
push rdi
mov edi, DWORD PTR [rbp-16] ; push n19
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16]
push rdi
mov edi, DWORD PTR [rbp-12]
push rdi
mov edi, DWORD PTR [rbp-16] ; push n7
push rdi
mov edi, eax ; edi = n1
call a() ; call the function
add rsp, 112 ; clean up the stack, 14 * 8 = 112 bytes
mov ebx, eax ; result is in eax
一个更简单的版本是
mov r9d, DWORD PTR [rbp-12] ; r9d = n6
mov r8d, DWORD PTR [rbp-12] ; r8d = n5
mov ecx, DWORD PTR [rbp-16] ; ecx = n4
mov edx, DWORD PTR [rbp-12] ; edx = n3
mov esi, DWORD PTR [rbp-16] ; esi = n2
mov edi, DWORD PTR [rbp-12] ; edi = n1
push [rbp-12] ; push n20
push [rbp-16] ; push n19
push [rbp-12]
push [rbp-16]
push [rbp-12]
push [rbp-16]
push [rbp-12]
push [rbp-16]
push [rbp-12]
push [rbp-16]
push [rbp-12]
push [rbp-16]
push [rbp-12] ; push n8
push [rbp-16] ; push n7
call a() ; call the function
add rsp, 112 ; clean up the stack, 14 * 8 = 112 bytes
mov ebx, eax ; result is in eax
请注意,参数以相反的顺序推送。
由于在调用函数之前可以将任何数字压入堆栈(在溢出之前),因此对参数的数量没有限制。