下面的代码使用SSE将数组arr1
与arr2
相乘,并将结果放到arr3
。数组有count
个元素。 arr1
,arr2
和arr3
的类型为float*
。问题是编译器不支持Intel语法。这段代码在AT& T语法中看起来如何?
编译器是GCC 4.4.7。
__asm__ volatile (
".intel_syntax noprefix \n\t"
"loop: \n\t"
"movups xmm0, [eax+edx] \n\t"
"movups xmm1, [ebx+edx] \n\t"
"mulps xmm0, xmm1 \n\t"
"movups [ecx+edx], xmm0 \n\t"
"sub edx, 16 \n\t"
"jnz loop \n\t"
:
: "a"(arr1), "b"(arr2), "c"(arr3), "d"(count)
: "xmm0", "xmm1"
);
答案 0 :(得分:0)
__asm__ volatile (
"loop: \n\t"
"subq $0x10, %%rdx \n\t"
"movups (%%rax,%%rdx), %%xmm0 \n\t"
"movups (%%rbx,%%rdx), %%xmm1 \n\t"
"mulps %%xmm1, %%xmm0 \n\t"
"movups %%xmm0, (%%rcx,%%rdx) \n\t"
"jnz loop \n\t"
:
: "a"(arr1), "b"(arr2), "c"(arr3), "d"(count)
: "xmm0", "xmm1"
);
arr1
,arr2
和arr3
是8字节指针,计数是8字节整数,因此寄存器为r_x