[编辑]
使用gdb调试器运行代码后,movapd
确实不是问题所在(感谢所有指出这一点的人)。逐行进行,故障在于xmm7
和xmm0
寄存器之间的第二次比较。流控制如下所示:
检查半径较大的一切都很好;
comisd xmm1, xmm3
进行相应比较
(p $xmm1.v2_double $1 = {10, 0} p $xmm3.v2_double $2 = {2, 0}
)
认为xmm1更大,并跳转到first
标签
如果comisd
以某种方式推断出xmm7
大于xmm1
({p $xmm7.v2_double $3 = {1.4142135623730951, 1}
p $xmm1.v2_double $4 = {10, 0}
)
(另外,我非常确定gdb中print命令大括号中的值是相反的顺序。我的意思是在xmm1
中10实际上在较低的四字中,而在较高的四字中为零。 ,所以我无法理解1.41如何大于10)
这是汇编代码
#extern int circles(int n, double* cr);
.intel_syntax noprefix
.data
three: .int 3
.text
.global circles
circles:
enter 0, 0
mov rax, 1
cpuid
test rdx, 0x2000000
jz notSupported
mov rbx, rsp
and rsp, 0xfffffffffffffff0
sub rsp, 512
fxsave [rsp]
xor r10, r10 #pair counter
xor r8, r8 #outter loop counter
xor r9, r9 #inner loop counter
mov rax, rdi
mul dword ptr three
sub rax, 3
#rax value 3n-3 since our step is length of 3
pivotCircle:
cmp r8, rax
je done
#this is the line where it goes wrong
movupd xmm0, [rsi + 8*r8] #first circle center
movsd xmm1, [rsi + 8*r8 + 16] #radius first circle
mov r9, r8
nextCircle:
add r9, 3
cmp r9, rax
jg nextPivot
movupd xmm2, [rsi + 8*r9] #second circle center
movsd xmm3, [rsi + 8*r9 + 16] #second circle radius
#calculating distance between the centers
movapd xmm7, xmm0
subpd xmm7, xmm2
mulpd xmm7, xmm7
movapd xmm2, xmm7
shufpd xmm2, xmm2, 0b11
addsd xmm7, xmm2
sqrtsd xmm7, xmm7 # |c1 - c2| in xmm7
#checking which radius is bigger
comisd xmm1, xmm3
jge first
jmp second
first: #first one greater
comisd xmm7, xmm1
jge nextCircle
movsd xmm6, xmm1
subsd xmm6, xmm7
comisd xmm3, xmm6
jg nextCircle
jmp found
second: #second one greater
comisd xmm7, xmm3
jge nextCircle
movsd xmm6, xmm3
subsd xmm6, xmm7
comisd xmm1, xmm6
jg nextCircle
found:
inc r10
jmp nextCircle
nextPivot:
add r8, 3
jmp pivotCircle
done:
fxrstor [rsp]
mov rsp, rbx
mov rax, r10
leave
ret
notSupported:
mov rdi, 1
call exit
这是main.c文件。数组中的元素以三个(a,b,半径)为一组。
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
extern int circles(int n, double* cr);
int main(int argc, char const *argv[])
{
int n;
double* cr;
scanf("%d", &n);
assert(n > 0);
cr = malloc(n * sizeof(double) * 3);
assert(cr != NULL);
for (int i = 0; i < n; i++)
{
scanf("%lf%lf%lf", cr+i*3, cr+i*3+1, cr+i*3+2);
assert(*(cr+3*i+2) > 0);
}
printf("%d\n", circles(n, cr));
return 0;
}