Question

我正在使用此组件http://sourceforge.net/projects/tponguard/ 现在我需要在64位编译。我被困在这个集会中。

就是这样：

  push esi
  push edi

  mov  esi, eax         //esi = Mem1
  mov  edi, edx         //edi = Mem2

  push ecx              //save byte count
  shr  ecx, 2           //convert to dwords
  jz   @Continue

  cld
@Loop1:                 //xor dwords at a time
  mov  eax, [edi]
  xor  [esi], eax
  add  esi, 4
  add  edi, 4
  dec  ecx
  jnz  @Loop1

@Continue:              //handle remaining bytes (3 or less)
  pop  ecx
  and  ecx, 3
  jz   @Done

@Loop2:                 //xor remaining bytes
  mov  al, [edi]
  xor  [esi], al
  inc  esi
  inc  edi
  dec  ecx
  jnz  @Loop2

@Done:
  pop  edi
  pop  esi

我改变了这个：

  push rsi
  push rdi

  mov  rsi, rax         //esi = Mem1
  mov  rdi, rdx         //edi = Mem2

  push rcx              //save byte count
  shr  rcx, 2           //convert to dwords
  jz   @Continue

  cld
@Loop1:                 //xor dwords at a time
  mov  rax, [rdi]
  xor  [rsi], rax
  add  rsi, 4
  add  rdi, 4
  dec  rcx
  jnz  @Loop1

@Continue:              //handle remaining bytes (3 or less)
  pop  rcx
  and  rcx, 3
  jz   @Done

@Loop2:                 //xor remaining bytes
  mov  al, [rdi]
  xor  [rsi], al
  inc  rsi
  inc  rdi
  dec  rcx
  jnz  @Loop2

@Done:
  pop  rdi
  pop  rsi

但现在我在xor [rsi]中遇到了访问冲突，rax

Answer 1

您正在查看的功能是

procedure XorMem(var Mem1; const Mem2; Count : Cardinal); register;

来自ogutil单位的

。

就个人而言，我不打算将其转换为x64汇编程序。为了做到这一点，你需要做一些棘手的细节。我更有意义的是移植到Pascal并让编译器处理细节。最简单最天真的翻译看起来像这样：

procedure XorMem(var Mem1; const Mem2; Count: Cardinal);
var
  p1, p2: PByte;
begin
  p1 := PByte(@Mem1);
  p2 := PByte(@Mem2);
  while Count>0 do
  begin
    p1^ := p1^ xor p2^;
    inc(p1);
    inc(p2);
    dec(Count);
  end;
end;

如果这对性能至关重要，那么你需要稍微展开循环来操作大型操作数。假设x86上的32位操作数和x64上的64位操作数。

对32位操作数进行操作的版本可能如下所示：

procedure XorMem(var Mem1; const Mem2; Count: Cardinal);
var
  p1, p2: PByte;
begin
  p1 := PByte(@Mem1);
  p2 := PByte(@Mem2);
  while Count>3 do
  begin
    PCardinal(p1)^ := PCardinal(p1)^ xor PCardinal(p2)^;
    inc(p1, 4);
    inc(p2, 4);
    dec(Count, 4);
  end;
  while Count>0 do
  begin
    p1^ := p1^ xor p2^;
    inc(p1);
    inc(p2);
    dec(Count);
  end;
end;

实际上，您可以轻松编写一个自动使用编译目标确定的32位或64位操作数的版本。诀窍是使用机器字大小的NativeUInt类型。

procedure XorMem(var Mem1; const Mem2; Count: Cardinal);
var
  p1, p2: PByte;
begin
  p1 := PByte(@Mem1);
  p2 := PByte(@Mem2);
  while Count>SizeOf(NativeUInt)-1 do
  begin
    PNativeUInt(p1)^ := PNativeUInt(p1)^ xor PNativeUInt(p2)^;
    inc(p1, SizeOf(NativeUInt));
    inc(p2, SizeOf(NativeUInt));
    dec(Count, SizeOf(NativeUInt));
  end;
  while Count>0 do
  begin
    p1^ := p1^ xor p2^;
    inc(p1);
    inc(p2);
    dec(Count);
  end;
end;

在启用优化的情况下编译时，此最终版本非常有效。我不会超越最终的Pascal版本。

将此汇编代码移植到x64时，为什么会出现访问冲突？

1 个答案: