展开y86循环

时间:2016-10-26 01:37:25

标签: assembly loop-unrolling y86

我正在尝试在y86代码中展开循环,但是当我尝试运行测试程序时,我得到了2个不同的值。注册代码是:

    xorq %rax,%rax      # count = 0;
    andq %rdx,%rdx      # len <= 0?
    jle Done        # if so, goto Done:

Loop:   
    mrmovq (%rdi), %r10 # read val from src...
    rmmovq %r10, (%rsi) # ...and store it to dst
    andq %r10, %r10     # val <= 0?
    jle Npos        # if so, goto Npos:
    #irmovq $1, %r10
    #addq %r10, %rax        
    iaddq $1, %rax      # count++
Npos:   
    irmovq $1, %r10
    subq %r10, %rdx     # len--
    #irmovq $8, %r10
    #addq %r10, %rdi        
    #addq %r10, %rsi        
    iaddq $8, %rdi      # src++
    iaddq $8, %rsi      # dst++
    andq %rdx,%rdx      # len > 0?
    jg Loop         # if so, goto Loop:
Done:
    ret

我制作的展开版本是:

xorq %rax,%rax      # count = 0;
    andq %rdx,%rdx      # len <= 0?
    jle Done        # if so, goto Done:

Loop:   
    mrmovq (%rdi), %r10     # read val from src…
    mrmovq 8(%rdi), %r11    # <- from class get second value
    rmmovq %r10, (%rsi)     # ...and store it to dst
    rmmovq %r11, 8(%rsi)         # store second val to dst
    andq %r10, %r10     # val <= 0?
    jle Npos            # if so, goto Npos:
    iaddq $1, %rax

Npos:   
    andq %r11, %r11 # check if src[1] <= 0
    jle Npos2       # if it is, don’t increase count
    iaddq $1, %rax

Npos2: 
    irmvoq %2, %r10
    iaddq $16, %rdi     # increase stack or base pointer to get next 2 vals
    iaddq $16, %rsi     # increase stack or base pointer to store next 2 vals
    subq %r10, %rdx     # decrease length by 2
    jge Loop            # go back into loop if length >= 2

len_cleanup:
    iaddq $2, %rdx

cleanup:
    irmovq $1, %r10
    subq %r10, %rdx
    jl Done             # if length < 0, jmp to Done, no cleanup needed
    mrmovq (%rdi), %r10     # get next val
    rmmovq %r10, (%rsi)     # move val onto stack
    andq %r10, %r10     # check if val <= 0
    jle Done            # skip count if val < 0
    iaddq $1, %rax      # same as iaddq $1, %rax

Done: 
    ret

我应该得到的结果是2但是从展开的那个返回的结果是返回3.我知道有一个额外的iaddq被执行但我不知道在哪里。我将循环展开两次,以便我检查2个值。

1 个答案:

答案 0 :(得分:1)

我刚修好了。我想在开始循环之前减少%rdx以正确展开函数。