将内联汇编移至独立文件

时间:2015-06-22 02:18:24

标签: assembly visual-studio-2013 inline-assembly

我一直在尝试将内联汇编中的一些函数移植到独立的汇编文件中。我除了一个人之外都取得了成功;下面是内联后跟我当前的独立版本。它试图在独立版本中使用onesByte的那一刻它无法返回相同的结果。我在这里问,因为我希望有人知道编译器如何处理独立的asm文件而不是内联汇编。

内联版本:

__declspec(align(16)) const __int64 onesByte[2] = { 0x0101010101010101, 0x0101010101010101 };

void TComb::checkOscillation5_SSE2(const uint8_t *p2p, const uint8_t *p1p, const uint8_t *s1p,
    const uint8_t *n1p, const uint8_t *n2p, uint8_t *dstp, int stride, int width, int height, int thresh)
{
    __asm
    {
        mov eax, p2p
        mov ebx, p1p
        mov edx, s1p
        mov edi, n1p
        mov esi, n2p
        pxor xmm6, xmm6
        dec thresh
        movd xmm7, thresh
        punpcklbw xmm7, xmm7
        punpcklwd xmm7, xmm7
        punpckldq xmm7, xmm7
        punpcklqdq xmm7, xmm7
    yloop :
        xor ecx, ecx
        align 16
    xloop :
        movdqa xmm0, [eax + ecx]
        movdqa xmm2, [ebx + ecx]
        movdqa xmm1, xmm0
        movdqa xmm3, xmm2
        pminub xmm0, [edx + ecx]
        pmaxub xmm1, [edx + ecx]
        pminub xmm2, [edi + ecx]
        pmaxub xmm3, [edi + ecx]
        pminub xmm0, [esi + ecx]
        pmaxub xmm1, [esi + ecx]
        movdqa xmm4, xmm3
        movdqa xmm5, xmm1
        psubusb xmm4, xmm2
        psubusb xmm5, xmm0
        psubusb xmm4, xmm7
        psubusb xmm5, xmm7
        psubusb xmm2, onesByte
        psubusb xmm0, onesByte
        psubusb xmm1, xmm2
        psubusb xmm3, xmm0
        pcmpeqb xmm1, xmm6
        pcmpeqb xmm3, xmm6
        pcmpeqb xmm4, xmm6
        pcmpeqb xmm5, xmm6
        mov eax, dstp
        por xmm1, xmm3
        pand xmm4, xmm5
        pand xmm1, xmm4
        movdqa[eax + ecx], xmm1
        add ecx, 16
        mov eax, p2p
        cmp ecx, width
        jl xloop
        mov eax, stride
        add ebx, stride
        add p2p, eax
        add edx, stride
        add edi, stride
        add dstp, eax
        add esi, stride
        mov eax, p2p
        dec height
        jnz yloop
    }
}

独立版:

.xmm
.model flat,c

.data

align 16

onesByte qword 2 dup(0101010101010101h)

checkOscillation5_SSE2 proc uses ebx esi edi p2p:dword,p1p:dword,s1p:dword,n1p:dword,n2p:dword,dstp:dword,stride:dword,width_:dword,height:dword,thresh:dword

    public checkOscillation5_SSE2

    mov eax,p2p
    mov ebx,p1p
    mov edx,s1p
    mov edi,n1p
    mov esi,n2p
    pxor xmm6,xmm6
    dec thresh
    movd xmm7,thresh
    punpcklbw xmm7,xmm7
    punpcklwd xmm7,xmm7
    punpckldq xmm7,xmm7
    punpcklqdq xmm7,xmm7
yloop:
    xor ecx,ecx
    align 16
xloop:
    movdqa xmm0,[eax+ecx]
    movdqa xmm2,[ebx+ecx]
    movdqa xmm1,xmm0
    movdqa xmm3,xmm2
    pminub xmm0,[edx+ecx]
    pmaxub xmm1,[edx+ecx]
    pminub xmm2,[edi+ecx]
    pmaxub xmm3,[edi+ecx]
    pminub xmm0,[esi+ecx]
    pmaxub xmm1,[esi+ecx]
    movdqa xmm4,xmm3
    movdqa xmm5,xmm1
    psubusb xmm4,xmm2
    psubusb xmm5,xmm0
    psubusb xmm4,xmm7
    psubusb xmm5,xmm7
    psubusb xmm2,oword ptr onesByte
    psubusb xmm0,oword ptr onesByte
    psubusb xmm1,xmm2
    psubusb xmm3,xmm0
    pcmpeqb xmm1,xmm6
    pcmpeqb xmm3,xmm6
    pcmpeqb xmm4,xmm6
    pcmpeqb xmm5,xmm6
    mov eax,dstp
    por xmm1,xmm3
    pand xmm4,xmm5
    pand xmm1,xmm4
    movdqa [eax+ecx],xmm1
    add ecx,16
    mov eax,p2p
    cmp ecx,width_
    jl xloop
    mov eax,stride
    add ebx,stride
    add p2p,eax
    add edx,stride
    add edi,stride
    add dstp,eax
    add esi,stride
    mov eax,p2p
    dec height
    jnz yloop

    ret

checkOscillation5_SSE2 endp

非常感谢任何有关此事的帮助或见解。

0 个答案:

没有答案