为什么Delphi在不知名的地方插入nop?

时间:2013-09-27 01:41:28

标签: delphi assembly delphi-xe2

以下代码:

while Assigned(p) do begin
  np:= p^.next;
  h:= leaf_hash(p^.data);   <<-- inline routine
  h:= h mod nhashprime;
  p^.next:= nhashtab[h]; 
  nhashtab[h]:= p;
  p:= np;
end; { while }

生成以下程序集:

hlife.pas.605: h:= leaf_hash(p^.data);
00000000005D4602 498B4018         mov rax,[r8+$18]
00000000005D4606 48C1E830         shr rax,$30
00000000005D460A 498B5018         mov rdx,[r8+$18]
00000000005D460E 48C1EA20         shr rdx,$20
00000000005D4612 81E2FFFF0000     and edx,$0000ffff
00000000005D4618 4D8B5818         mov r11,[r8+$18]
00000000005D461C 49C1EB10         shr r11,$10
00000000005D4620 4181E3FFFF0000   and r11d,$0000ffff
00000000005D4627 418B7018         mov esi,[r8+$18]
00000000005D462B 81E6FFFF0000     and esi,$0000ffff
00000000005D4631 488D34F6         lea rsi,[rsi+rsi*8]
00000000005D4635 4403DE           add r11d,esi
00000000005D4638 4F8D1CDB         lea r11,[r11+r11*8]
00000000005D463C 4103D3           add edx,r11d
00000000005D463F 488D14D2         lea rdx,[rdx+rdx*8]
00000000005D4643 03C2             add eax,edx
hlife.pas.606: h:= h mod nhashprime;
00000000005D4645 8BC0             mov eax,eax   <<--- Why is there a NOP here?
00000000005D4647 4C63DB           movsxd r11,rbx
00000000005D464A 4899             cwd
00000000005D464C 49F7FB           idiv r11
00000000005D464F 488BC2           mov rax,rdx
hlife.pas.607: p^.next:= nhashtab[h];
00000000005D4652 488B5538         mov rdx,[rbp+$38]

Delphi在nhashtab[h]:= p;行之前插入NOP。 如果leaf_hash函数是正常函数,则它是有意义的 (不是真的因为RET仍会返回[5D4645]执行nop)
但现在这不是跳跃目标。

所以我(只是)很好奇,为什么会这样做?

[编辑]:SSCCE
好的,我已经有了一个SSCCE {它不是很短,但它必须这样做。

请注意编译器设置(Debug + Win64)

enter image description here

unit Unit16;

interface

uses
  Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
  Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls;

type
  pnode = ^node;
  tflavour = (tnode, tleaf, tleaf64);

  node = record
    case flavour: tflavour of
      tnode: (next: pnode; (* hash link *)
          nw, ne, sw, se: pnode; (* constant; nw not= 0 means nonleaf *)
          res: pnode); (* cache *)
      tleaf: (next1: pnode; (* hash link *)
          isnode: pnode; (* must always be zero for leaves *)
          nw1, ne1, sw1, se1: word; (* constant *)
          res1, res2: word; (* constant *)
        );
      tleaf64: (next2: pnode; (* hash link *)
          isnode1: pnode; (* must always be zero for leaves *)
          data: Uint64; (* constant *)
          res1a, res2a: word; (* constant *)
        )
  end;

  ppnode = array of pnode;

  THashBox = class(TPersistent)
  strict private
    leafhashpop: integer;
    leafhashlimit: integer;
    leafhashprime: integer;
    leafhashtab: ppnode;
    nodehashpop: integer;
    nodehashlimit: integer;
    nodehashprime: integer;
    nodehashtab: ppnode;
  private
    TotalTime, Occurrences: Uint64;
    StartTime, EndTime: Uint64;
    procedure resize_leaves();
  public
    constructor Create;
  end;

  TForm16 = class(TForm)
    Button1: TButton;
    procedure Button1Click(Sender: TObject);
  private
    HashBox: THashBox;
  public
  end;

var
  Form16: TForm16;

implementation

{$R *.dfm}

const
  maxmem = 2000*1000*1000;   {2GB}

var
  alloced: cardinal;

function rdtsc: int64; assembler;
asm
  { xor eax,eax;
  push rbx
  cpuid
  pop rbx }
  rdtsc
end;

function node_hash(n: pnode): cardinal; { inline; } assembler; overload;
// var
// a: pnativeint;
  // begin
  // Result:= nativeint(n^.se) + 3 * (nativeint(n^.sw) + 3 * (nativeint(n^.ne) + 3 * nativeint(n^.nw) + 3));
asm
  mov eax,[rcx+node.nw]
  lea eax,[eax+eax*2+3]
  add eax,[rcx+node.ne]
  lea eax,[eax+eax*2]
  add eax,[rcx+node.sw]
  lea eax,[eax+eax*2]
  add eax,[rcx+node.se]
end;

function leaf_hash(a, b, c, d: cardinal): cardinal; inline; overload;
begin
  Result:= (d + 9 * (c + 9 * (b + 9 * a)))
end;

function leaf_hash(data: Uint64): cardinal; inline; overload;
begin
  // Result:= d + 9 * (c + 9 * (b + 9 * a));
  Result:= ((data shr 48) + 9 * (((data shr 32) and $FFFF) + 9 * (((data shr 16) and $FFFF) + 9 * (data and $FFFF))));
  Inc(Result);
end;

procedure TForm16.Button1Click(Sender: TObject);
begin
  HashBox:= THashBox.Create;
  Hashbox.resize_leaves;
end;

function nextprime(old: integer): integer;
begin
  Result:= 1009;
end;

constructor THashBox.Create;
begin
  leafhashprime:= 7;
  SetLength(leafhashtab, leafhashprime);
end;

procedure THashBox.resize_leaves();
var
  i, i1, i2: integer;
  nhashprime: Cardinal;
  p: pnode;
  nhashtab: ppnode;
  np: pnode;
  h: Integer;
  n, n2: integer;
  diff1, diff2: integer;
begin
  nhashprime:= nextprime(4 * leafhashprime);
  if (nhashprime * sizeof(pnode) > maxmem - alloced) then begin
    leafhashlimit:= 2000 * 1000 * 1000;
    exit;
  end;
  (*
    *   Don't let the hash table buckets take more than 4% of the
    *   memory.  If we're starting to strain memory, let the buckets
    *   fill up a bit more.
  *)
  if (nhashprime > maxmem div 100) then begin
    nhashprime:= nextprime(maxmem div 100);
    if (nhashprime = leafhashprime) then begin
      leafhashlimit:= 2000 * 1000 * 1000;
      exit;
    end;
  end;
  SetLength(nhashtab, nhashprime); //make a new table, do not resize the existing one.
  alloced:= alloced + sizeof(pnode) * (nhashprime - leafhashprime);

  diff1:= maxint;
  for i1:= 0 to 100 do begin
    n:= 0;
    StartTime:= rdtsc;
    for i:= 0 to leafhashprime - 1 do begin
      p:= leafhashtab[i];
      if Assigned(p) then begin
        h:= node_hash(p);
        h:= h mod nhashprime;
        inc(n, h);
      end;
    end;
    EndTime:= rdtsc;
    if ((EndTime - StartTime) < diff1) then diff1:= (EndTime - StartTime);

  end;

  diff2:= maxint;
  for i1:= 0 to 100 do begin
    n2:= 0;
    StartTime:= rdtsc;
    for i:= 0 to leafhashprime - 1 do begin
      p:= leafhashtab[i];
      if Assigned(p) then begin
        inc(n2);
      end;
    end;
    EndTime:= rdtsc;
    if (endtime - starttime) < diff2 then diff2:= endtime - starttime;
  end;

  TotalTime:= diff1 - diff2;
  if n <> n2 then Occurrences:= nhashprime;

  for i:= 0 to leafhashprime - 1 do begin
    // for (p=hashtab[i]; p;) {
    p:= leafhashtab[i];
    while Assigned(p) do begin    <<--- put a breakpoint here
      np:= p^.next;
      h:= leaf_hash(p^.data);
      h:= h mod nhashprime;
      p^.next:= nhashtab[h];
      nhashtab[h]:= p;
      p:= np;
    end; { while }
  end; { for i }
  // free(hashtab);
  leafhashtab:= nhashtab;
  leafhashprime:= nhashprime;
  leafhashlimit:= leafhashprime;
end;

end.

你会看到这个反汇编:

Unit16.pas.196: h:= h mod nhashprime;
000000000059CE4B 4863C0           movsxd rax,rax
000000000059CE4E 448B5528         mov r10d,[rbp+$28]
000000000059CE52 458BD2           mov r10d,r10d     <<--- weird NOP here 
000000000059CE55 4899             cwd
000000000059CE57 49F7FA           idiv r10
000000000059CE5A 488BC2           mov rax,rdx
Unit16.pas.197: p^.next:= nhashtab[h];
000000000059CE5D 488B5538         mov rdx,[rbp+$38]

3 个答案:

答案 0 :(得分:5)

答案是

MOV EAX,EAX

不是无操作

在x64上操作64位寄存器的低32位将使高32位归零 因此,上述说明应该理解为:

MOVZX RAX,EAX 

根据AMD

  

32位操作的结果隐式零扩展为64位值。

答案 1 :(得分:4)

恕我直言,这不是nop的对象,但听起来就像未经优化的生成代码,以及错误的自己变量签名。

h:= h mod nhashprime;

可分为:

mov eax,eax       new h = eax, old h = eax  // which does not mean anything
movsxd r11,rbx    convert with sign nhashprime stored in rbx into temp registry r11
cwd               signed rax into rdx:rax
idiv r11          signed divide rdx:rax by r11 -> rax=quotient, rdx=remainder
mov rax,rdx       store remainder rdx into rax

您是否尝试启用代码生成优化?我想它会修复mov eax,eax的内容。

但您的原始代码也进行了次优化。 在你的情况下你应该使用无符号算术。

并且,你应该更好地使用两个nhashprime的幂,计算一个简单的and二进制运算而不是慢速除法:

var h, nhashprimeMask: cardinal; // use UNSIGNED arithmetic here!

// here nhashprime is a POWER OF TWO (128,256,512,1024,2048...)
nhashprimeMask := nhashprime-1; // compute the mask

while Assigned(p) do begin
  np:= p^.next;
  h:= leaf_hash(p^.data) and nhashprimeMask;
  p^.next:= nhashtab[h]; 
  nhashtab[h]:= p;
  p:= np;
end; { while }

这段代码会更快,并且应该更好地编译。

答案 2 :(得分:0)

这是一种优化,用于对齐代码,尤其是在循环中,以避免缓存行停滞等。