GUID的高效数据结构

时间:2011-03-14 09:55:21

标签: delphi data-structures delphi-2010 guid unique

我正在寻找一种数据结构,使我能够快速(优先O(1) - 快速地)确定给定的GUID是否是GUID集合的成员。

我目前的做法是使用带有0作为值的TDictionary。

虽然这很快就可以了,但是使用Hashmap重新定义GUID似乎是一种浪费,GUID被认为是唯一的定义,并且让Dictionary处理不需要的值。

必须有一个更好的解决方案,但我找不到一个。你能吗?

3 个答案:

答案 0 :(得分:13)

很少有数据结构提供O(1)访问。一个是数组,另一个是哈希地图(大卫的答案),我只知道另一个:Trie。下面是一个逐位Trie的简单实现:有一些有趣的属性:

  • 由于没有重新分配,因此免于内存碎片化。
  • O(1)添加和存在测试。当然,O(1)中涉及的常数相当大。

代码:

program Project23;

{$APPTYPE CONSOLE}

uses
  SysUtils, Generics.Collections;

type

  PGuidTrieNode=^TGuidTrieNode;
  TGuidTrieNode = record
    Sub:array[Boolean] of PGuidTrieNode;
  end;
  TGuidByteArray = array[0..15] of Byte;

  TGuidTrie = class
  protected
    Root: PGuidTrieNode;
  public
    constructor Create;
    destructor Destroy;override;

    procedure Add(G: TGUID);
    function Exists(G: TGUID): Boolean;
  end;

{ TGuidTrie }

procedure TGuidTrie.Add(G: TGUID);
var GBA: TGuidByteArray absolute G;
    Node: PGuidTrieNode;
    i: Integer;
    Bit: Integer;
    IsBitSet: Boolean;
const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
begin
  Assert(SizeOf(G) = SizeOf(TGuidByteArray));
  Node := Root;
  for i:=0 to High(GBA) do
  begin
    for Bit := 0 to 7 do
    begin
      IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
      if (i = High(GBA)) and (Bit = 7) then
        begin
          // Payload
          Node.Sub[IsBitSet] := Pointer(1);
        end
      else
        begin
          if not Assigned(Node.Sub[IsBitSet]) then
            Node.Sub[IsBitSet] := GetMemory(SizeOf(TGuidTrieNode));
          Node := Node.Sub[IsBitSet];
        end;
    end;
  end;
end;

constructor TGuidTrie.Create;
begin
  Root := GetMemory(SizeOf(TGuidTrieNode))
end;

destructor TGuidTrie.Destroy;

  procedure KillNode(Node: PGuidTrieNode);
  var i:Integer;
  begin
    if Assigned(Node.Sub[True]) then
        if Node.Sub[True] <> Pointer(1) then
        begin
          KillNode(Node.Sub[True]);
        end;
    FreeMemory(Node);
  end;

begin
  KillNode(Root);
  inherited;
end;

function TGuidTrie.Exists(G: TGUID): Boolean;
var GBA: TGuidByteArray absolute G;
    Node: PGuidTrieNode;
    i: Integer;
    Bit: Integer;
    IsBitSet: Boolean;
const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
begin
  Assert(SizeOf(G) = SizeOf(TGuidByteArray));
  Node := Root;
  for i:=0 to 15 do
  begin
    for Bit := 0 to 7 do
    begin
      IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
      if not Assigned(Node.Sub[IsBitSet]) then
      begin
        Result := False;
        Exit;
      end;
      Node := Node.Sub[IsBitSet];
    end;
  end;
  Result := True; // Node now contains the Payload
end;

const G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
      G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';

var T: TGuidTrie;

begin
  try

    T := TGuidTrie.Create;
    try
      if T.Exists(G1) then WriteLn('Exists')
                      else WriteLn('NOT Exists');
      T.Add(G1);
      if T.Exists(G1) then WriteLn('Exists')
                      else WriteLn('NOT Exists');

      if T.Exists(G2) then WriteLn('Exists')
                      else WriteLn('NOT Exists');
      T.Add(G2);
      if T.Exists(G2) then WriteLn('Exists')
                      else WriteLn('NOT Exists');
    finally T.Free;
    end;

  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
end.

答案 1 :(得分:7)

我认为你在99%的路上。

哈希听起来像是正确的解决方案。利用GUID的特殊性质的显而易见的方法是提供自己的散列函数,该函数将构成GUID的4个32位整数组合成单个32位整数。我只是对4个整数进行异或。

我认为您使用的是Generics.Collections.TDictionary。您可以通过将自定义比较器传递给构造函数来提供自己的哈希函数。我不担心存储备用值,我认为它不会以可辨别的方式影响性能。

我相信您将GUID存储为128位整数而不是字符串。

最后,我发现GU​​ID的默认比较器可能确实已经以这种方式生成哈希码。在进行任何更改之前,有必要检查一下。

修改

默认哈希码使用应用于二进制数据的Bob Jenkins哈希。 XOR会更快,但默认的哈希码似乎不会成为性能瓶颈。

换句话说,我认为TDictionary<TGUID,Integer>可以完全满足您的需求。

答案 2 :(得分:1)

type
    PGuidDictionaryItem = ^TGuidDictionaryItem;

    TGuidDictionaryItem = record
        Key: TGuid;
        Value: Pointer;
        Next: PGuidDictionaryItem;
    end;

    TGuidDictionary = class
    private
    const
        HashSize = 2048;
    var
        Size: integer;
        FTable: array [0..HashSize-1] of PGuidDictionaryItem;

        function GetHashCode(Guid: TGUID): integer;
    public
        constructor Create;
        destructor Destroy; override;

        procedure Add(Key: TGUID; Value: TObject);
        function TryFind(Key: TGUID; out Value: TObject): boolean;
        function Contains(Key: TGUID): Boolean;
        procedure Remove(Key: TGuid);
    end;

{ TGuidDictionary }

procedure TGuidDictionary.Add(Key: TGUID; Value: TObject);
var
    Hc: integer;
    PHi: PGuidDictionaryItem;
begin
    Hc := GetHashCode(Key);

    if FTable[Hc] <> nil then
    begin
        PHi := FTable[Hc];
        repeat
            if TGuidEx.EqualGuids(PHi.Key, Key) then
                Break;

            PHi := Phi.Next;
        until PHi = nil;
    end
    else
        Phi := nil;

    if PHi <> nil then
        PHi.Value := Value
    else
    begin
        New(PHi);
        PHi.Value := Value;
        PHi.Key := Key;
        PHi.Next := FTable[Hc];
        FTable[Hc] := PHi;
    end;
end;

function TGuidDictionary.Contains(Key: TGUID): Boolean;
var
    O: TObject;
begin
    Result := TryFind(Key, O);
end;

constructor TGuidDictionary.Create;
var
    i: integer;
begin
    inherited;

    for i := Low(FTable) to High(FTable) do
        FTable[i] := nil;
end;

destructor TGuidDictionary.Destroy;
var
    i: integer;
    Phi, PhiNext: PGuidDictionaryItem;
begin
    for i := Low(FTable) to High(FTable) do
    begin
        Phi := FTable[i];
        while Phi <> nil do
        begin
            PhiNext := Phi.Next;
            Dispose(Phi);
            Phi := PhiNext;
        end;
    end;

    inherited;
end;

function TGuidDictionary.GetHashCode(Guid: TGUID): integer;
var
    N: array [0..3] of integer absolute Guid;
begin
    Result := Abs(N[0] xor N[1] xor N[2] xor N[3]) mod HashSize;
end;

procedure TGuidDictionary.Remove(Key: TGuid);
var
    Hc: Integer;
    Phi, BeforPhi: PGuidDictionaryItem;

begin
    Hc := GetHashCode(Key);

    BeforPhi := nil;
    Phi := FTable[Hc];
    while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
    begin
        BeforPhi := Phi;
        Phi := Phi.Next;
    end;

    if Phi = nil then
        Exit;

    if BeforPhi <> nil then
        BeforPhi.Next := Phi.Next
    else
        FTable[Hc] := Phi.Next;

    Dispose(Phi);
end;

function TGuidDictionary.TryFind(Key: TGUID; out Value: TObject): boolean;
var
    Hc: Integer;
    Phi: PGuidDictionaryItem;
begin
    Hc := GetHashCode(Key);
    Phi := FTable[Hc];
    while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
        Phi := Phi.Next;

    if Phi <> nil then
        Value := TObject(Phi.Value)
    else
        Value := nil;

    Result := Phi <> nil;
end;

procedure TestDictMisc.TestGuidDictionary;
const
    G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
    G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';
var
    T: TGuidDictionary;
    Obj1, Obj2, O: TObject;
begin
    T := TGuidDictionary.Create;
    Obj1 := TObject.Create();
    Obj2 := TObject.Create();
    try
        CheckFalse(T.Contains(G1));

        T.Add(G1, Obj1);
        CheckTrue(T.Contains(G1));

        T.Add(G2, Obj2);
        CheckTrue(T.Contains(G2));

        T.Add(G2, Obj2);
        CheckTrue(T.Contains(G2));

        CheckTrue(T.TryFind(G1, {out} O));
        CheckSame(Obj1, O);

        CheckTrue(T.TryFind(G2, {out} O));
        CheckSame(Obj2, O);

        T.Remove(G1);
        CheckFalse(T.Contains(G1));
        CheckFalse(T.TryFind(G1, {out} O));

        T.Add(G1, Obj1);
        CheckTrue(T.TryFind(G1, {out} O));
        CheckSame(Obj1, O);

    finally
        Obj1.Free();
        Obj2.Free();

        T.Free;
    end;
end;