我正在尝试学习如何使用SSE
,我决定使用一个由n^d
程序调用的函数来实现一个计算C
的简单代码
这是我的NASM
代码:
section .data
resmsg: db '%d^%d = %d', 0
section .bss
section .text
extern printf
; ------------------------------------------------------------
; Function called from a c program, I only use n and d parameters but I left the others
; ------------------------------------------------------------
global main
T equ 8
n equ 12
d equ 16
m equ 20
Sid equ 24
Sn equ 28
main:
; ------------------------------------------------------------
; Function enter sequence
; ------------------------------------------------------------
push ebp ; save Base Pointer
mov ebp, esp ; Move Base Point to current frame
sub esp, 8 ; reserve space for two local vars
push ebx ; save some registries (don't know if needed)
push esi
push edi
; ------------------------------------------------------------
; copy function's parameters to registries from stack
; ------------------------------------------------------------
mov eax, [ebp+T] ; T
mov ebx, [ebp+n] ; n
mov ecx, [ebp+d] ; d
mov edx, [ebp+m] ; m
mov esi, [ebp+Sid] ; Sid
mov edi, [ebp+Sn] ; Sn
mov [ebp-8], ecx ; copy ecx into one of the local vars
;
; pow is computed by doing n*n d times
;
movss xmm0, [ebp+n] ; base
movss xmm1, [ebp+n] ; another copy of the base because xmm0 will be overwritten by the result
loop: mulss xmm0, xmm1 ; scalar mult from sse
dec ecx ; counter--
cmp ecx,0 ; check if counter is 0 to end loop
jnz loop ;
;
; let's store the result in eax by moving it to the stack and then copying to the registry (we use the other local var as support)
;
movss [ebp-4], xmm0
mov eax, [ebp-4]
;
; Print using C's printf
;
push eax ; result
mov ecx, [ebp-8] ; copy the original d back since we used it as loop's counter
push ecx ; exponent
push ebx ; base
push resmsg ; string format
call printf ; printf call
add esp, 24 ; clean the stack from both our local and printf's vars
; ------------------------------------------------------------
; Function exit sequence
; ------------------------------------------------------------
pop edi ; restore the registries
pop esi
pop ebx
mov esp, ebp ; restore the Stack Pointer
pop ebp ; restore the Base Pointer
ret ; get back to C program
现在,我期待的是打印
4 ^ 2 = 16
但相反,我得到了
4 ^ 2 = 0
我整个下午都花在这上面,我找不到解决方案,你有任何提示吗?
编辑:
由于它似乎是格式问题,我尝试使用
转换数据movss [ebp-4], xmm0
fld dword [ebp-4]
mov eax, dword [ebp-4]
而不是
movss [ebp-4], xmm0
mov eax, [ebp-4]
但我得到了相同的结果。
答案 0 :(得分:2)
MOVSS
移动单精度浮点数(32位)。我假设n
是一个整数,因此您无法将其加载到MOVSS
的XMM寄存器中。请改用CVTSI2SS
。 printf
无法处理单精度浮点数,它会被编译器转换为双精度浮点数。此时使用CVTSS2SI
很方便。因此代码应如下所示:
...
;
; pow is computed by doing n*n d times
;
cvtsi2ss xmm0, [ebp+n] ; load integer
sub ecx, 1 ; first step (n^1) is done
cvtsi2ss xmm1, [ebp+n] ; load integer
loop:
mulss xmm0, xmm1 ; scalar mult from sse
sub ecx, 1
jnz loop
cvtss2si eax, xmm0 ; result as integer
;
; Print using C's printf
;
push eax ; result
mov ecx, [ebp-8] ; copy the original d back since we used it as loop's counter
push ecx ; exponent
push ebx ; base
push resmsg ; string format
call printf ; printf call
add esp, 16 ; clean the stack only from printf's vars
...