Question

我在visual studio 2005下使用程序集（我是新手）工作，我想创建一个程序，用该规则计算算术级数：A _n = 2 * A _n-1 + A _n-2 但。我真的不知道如何使用寄存器，我只需要一个例子来继续我的练习。

这是我的代码：

.386
.MODEL flat,stdcall

.STACK 4096

extern ExitProcess@4:Near

.data                               
arraysize DWORD 10

setarray  DWORD 0 DUP(arraysize)
firstvar  DWORD 1
secondvar DWORD 2

.code                               
_main:                              
mov eax,[firstvar]
mov [setarray+0],eax        
mov eax,[secondvar]
mov [setarray+4],eax

mov ecx, arraysize              ;loop definition
mov ax, 8

Lp:
mov eax,[setarray+ax-4]
add eax,[setarray+ax-4]
add eax,[setarray+ax-8]
mov [setarray+ax],eax

add ax,4;
loop Lp

add ax,4;

    push    0                   ;Black box. Always terminate
    call    ExitProcess@4       ;program with this sequence

    end   _main              ;End of program. Label is the entry point.

Answer 1

您不能同时使用ax作为索引寄存器和eax作为数据寄存器。对于32位代码，请坚持使用32位寄存器，除非您现在正在做什么。您无意中使用了16位寻址模式，这可能是您不想要的。

mov ecx, arraysize-1              ;loop definition
mov ebx, 8

Lp:
mov eax,[setarray+ebx-4]
add eax,[setarray+ebx-4]
add eax,[setarray+ebx-8]
mov [setarray+ebx],eax

add ebx,4
dec ecx
jnc Lp

永远不要使用循环指令，即使某些现代处理器可以快速执行（大多数不能）。

Answer 2

我也是汇编程序的初学者，但我的算法有点不同：


    A   dword   1026 dup (0)          ; declare this in the data segm.

;       ...

    mov     esi, offset A         ; point at the results array
    mov     [esi], 1               ; initialize A(0)
    mov     [esi + 4], 2           ;  and A(1)
    xor  ecx, ecx


lp:     add     esi, 8

        mov eax, [esi - 4]          ; get A(n-1)
        add eax, eax                ; double it
        add eax, [esi - 8]          ; computes A(n)
        mov [esi], eax              ; and save it
        inc ecx                     ; next n
        cmp ecx, n                  ; be sure n is a dword, use
;       cmp ecx, dword ptr n        ; if it isn't
        jb      lp                     ; loop until ecx < n
;
;       now you should have the results in the A array with
;       esi pointing to the end of it

我没有编译它以查看它是否运行良好但它应该......

Answer 3

_n = 2 * A _n-1 + A _n-2与Fibonacci序列的公式几乎相同，所以你通过搜索可以找到很多有用的东西。（例如this q&a）。但是，我们需要2a + b而不是仅仅添加，而x86可以在一个LEA指令中执行此操作。

您永远不需要将循环变量存储在内存中，这就是寄存器的用途。因此，不需要每次迭代都需要将数据从内存中拉回来（往返时间约为5个周期），它只能使用寄存器（0周期额外延迟）。

您的数组可以使用.bss而不是.data，因此您不会将这些零存储在目标文件中。

arraysize equ 10     ; not DWORD: this is an assemble-time constant, not a value stored in memory

.bss
seq  DWORD 0 DUP(arraysize)   ; I think this is the right MASM syntax?
; NASM equivalent:  seq RESD arraysize

.code
_main:

    mov  edx, 1         ; A[0]
    mov  [seq], edx
    mov  eax, 2         ; A[1]
    mov  [seq+4], eax

    mov  ecx, 8         ; first 8 bytes stored
    ; assume the arraysize is > 2 and even, so no checks here
seqloop:
    lea  edx, [eax*2 + edx]  ;    edx=A[n],   eax=A[n-1]
    mov  [seq + ecx], edx    ; or edx=A[n-1], eax=A[n-2]
    lea  eax, [edx*2 + eax]  
    mov  [seq + ecx + 4], eax
    ; unrolled by two, so we don't need any MOV or XCHG instructions between registers, or any reloading from memory.

    add  ecx, 8             ; +8 bytes
    cmp  ecx, arraysize*4   ; (the *4 happens at assemble time)
    jb   seqloop

    ret

使用循环条件的数组索引意味着我们总共只使用了3个寄存器，并且仍然不需要保存/恢复任何通常的调用保留寄存器ESI，EDI，EBX或EBP。（当然，呼叫者的ESP也会恢复）。

如果您关心性能，那么Intel SnB系列CPU上的循环仅为6 uops（融合域）。对于更大的阵列大小，它可以每个时钟运行一个结果（每2个时钟一次迭代）。

我第一次参加集会

3 个答案: