我正在寻找一种数据结构,使我能够快速(优先O(1) - 快速地)确定给定的GUID是否是GUID集合的成员。
我目前的做法是使用带有0作为值的TDictionary。
虽然这很快就可以了,但是使用Hashmap重新定义GUID似乎是一种浪费,GUID被认为是唯一的定义,并且让Dictionary处理不需要的值。
必须有一个更好的解决方案,但我找不到一个。你能吗?
答案 0 :(得分:13)
很少有数据结构提供O(1)访问。一个是数组,另一个是哈希地图(大卫的答案),我只知道另一个:Trie。下面是一个逐位Trie的简单实现:有一些有趣的属性:
代码:
program Project23;
{$APPTYPE CONSOLE}
uses
SysUtils, Generics.Collections;
type
PGuidTrieNode=^TGuidTrieNode;
TGuidTrieNode = record
Sub:array[Boolean] of PGuidTrieNode;
end;
TGuidByteArray = array[0..15] of Byte;
TGuidTrie = class
protected
Root: PGuidTrieNode;
public
constructor Create;
destructor Destroy;override;
procedure Add(G: TGUID);
function Exists(G: TGUID): Boolean;
end;
{ TGuidTrie }
procedure TGuidTrie.Add(G: TGUID);
var GBA: TGuidByteArray absolute G;
Node: PGuidTrieNode;
i: Integer;
Bit: Integer;
IsBitSet: Boolean;
const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
begin
Assert(SizeOf(G) = SizeOf(TGuidByteArray));
Node := Root;
for i:=0 to High(GBA) do
begin
for Bit := 0 to 7 do
begin
IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
if (i = High(GBA)) and (Bit = 7) then
begin
// Payload
Node.Sub[IsBitSet] := Pointer(1);
end
else
begin
if not Assigned(Node.Sub[IsBitSet]) then
Node.Sub[IsBitSet] := GetMemory(SizeOf(TGuidTrieNode));
Node := Node.Sub[IsBitSet];
end;
end;
end;
end;
constructor TGuidTrie.Create;
begin
Root := GetMemory(SizeOf(TGuidTrieNode))
end;
destructor TGuidTrie.Destroy;
procedure KillNode(Node: PGuidTrieNode);
var i:Integer;
begin
if Assigned(Node.Sub[True]) then
if Node.Sub[True] <> Pointer(1) then
begin
KillNode(Node.Sub[True]);
end;
FreeMemory(Node);
end;
begin
KillNode(Root);
inherited;
end;
function TGuidTrie.Exists(G: TGUID): Boolean;
var GBA: TGuidByteArray absolute G;
Node: PGuidTrieNode;
i: Integer;
Bit: Integer;
IsBitSet: Boolean;
const BitMask: array[0..7] of Byte = (1, 2, 4, 8, 16, 32, 64, 128);
begin
Assert(SizeOf(G) = SizeOf(TGuidByteArray));
Node := Root;
for i:=0 to 15 do
begin
for Bit := 0 to 7 do
begin
IsBitSet := (GBA[i] and BitMask[Bit]) <> 0;
if not Assigned(Node.Sub[IsBitSet]) then
begin
Result := False;
Exit;
end;
Node := Node.Sub[IsBitSet];
end;
end;
Result := True; // Node now contains the Payload
end;
const G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';
var T: TGuidTrie;
begin
try
T := TGuidTrie.Create;
try
if T.Exists(G1) then WriteLn('Exists')
else WriteLn('NOT Exists');
T.Add(G1);
if T.Exists(G1) then WriteLn('Exists')
else WriteLn('NOT Exists');
if T.Exists(G2) then WriteLn('Exists')
else WriteLn('NOT Exists');
T.Add(G2);
if T.Exists(G2) then WriteLn('Exists')
else WriteLn('NOT Exists');
finally T.Free;
end;
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;
end.
答案 1 :(得分:7)
我认为你在99%的路上。
哈希听起来像是正确的解决方案。利用GUID的特殊性质的显而易见的方法是提供自己的散列函数,该函数将构成GUID的4个32位整数组合成单个32位整数。我只是对4个整数进行异或。
我认为您使用的是Generics.Collections.TDictionary。您可以通过将自定义比较器传递给构造函数来提供自己的哈希函数。我不担心存储备用值,我认为它不会以可辨别的方式影响性能。
我相信您将GUID存储为128位整数而不是字符串。
最后,我发现GUID的默认比较器可能确实已经以这种方式生成哈希码。在进行任何更改之前,有必要检查一下。
修改强>
默认哈希码使用应用于二进制数据的Bob Jenkins哈希。 XOR会更快,但默认的哈希码似乎不会成为性能瓶颈。
换句话说,我认为TDictionary<TGUID,Integer>
可以完全满足您的需求。
答案 2 :(得分:1)
type
PGuidDictionaryItem = ^TGuidDictionaryItem;
TGuidDictionaryItem = record
Key: TGuid;
Value: Pointer;
Next: PGuidDictionaryItem;
end;
TGuidDictionary = class
private
const
HashSize = 2048;
var
Size: integer;
FTable: array [0..HashSize-1] of PGuidDictionaryItem;
function GetHashCode(Guid: TGUID): integer;
public
constructor Create;
destructor Destroy; override;
procedure Add(Key: TGUID; Value: TObject);
function TryFind(Key: TGUID; out Value: TObject): boolean;
function Contains(Key: TGUID): Boolean;
procedure Remove(Key: TGuid);
end;
{ TGuidDictionary }
procedure TGuidDictionary.Add(Key: TGUID; Value: TObject);
var
Hc: integer;
PHi: PGuidDictionaryItem;
begin
Hc := GetHashCode(Key);
if FTable[Hc] <> nil then
begin
PHi := FTable[Hc];
repeat
if TGuidEx.EqualGuids(PHi.Key, Key) then
Break;
PHi := Phi.Next;
until PHi = nil;
end
else
Phi := nil;
if PHi <> nil then
PHi.Value := Value
else
begin
New(PHi);
PHi.Value := Value;
PHi.Key := Key;
PHi.Next := FTable[Hc];
FTable[Hc] := PHi;
end;
end;
function TGuidDictionary.Contains(Key: TGUID): Boolean;
var
O: TObject;
begin
Result := TryFind(Key, O);
end;
constructor TGuidDictionary.Create;
var
i: integer;
begin
inherited;
for i := Low(FTable) to High(FTable) do
FTable[i] := nil;
end;
destructor TGuidDictionary.Destroy;
var
i: integer;
Phi, PhiNext: PGuidDictionaryItem;
begin
for i := Low(FTable) to High(FTable) do
begin
Phi := FTable[i];
while Phi <> nil do
begin
PhiNext := Phi.Next;
Dispose(Phi);
Phi := PhiNext;
end;
end;
inherited;
end;
function TGuidDictionary.GetHashCode(Guid: TGUID): integer;
var
N: array [0..3] of integer absolute Guid;
begin
Result := Abs(N[0] xor N[1] xor N[2] xor N[3]) mod HashSize;
end;
procedure TGuidDictionary.Remove(Key: TGuid);
var
Hc: Integer;
Phi, BeforPhi: PGuidDictionaryItem;
begin
Hc := GetHashCode(Key);
BeforPhi := nil;
Phi := FTable[Hc];
while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
begin
BeforPhi := Phi;
Phi := Phi.Next;
end;
if Phi = nil then
Exit;
if BeforPhi <> nil then
BeforPhi.Next := Phi.Next
else
FTable[Hc] := Phi.Next;
Dispose(Phi);
end;
function TGuidDictionary.TryFind(Key: TGUID; out Value: TObject): boolean;
var
Hc: Integer;
Phi: PGuidDictionaryItem;
begin
Hc := GetHashCode(Key);
Phi := FTable[Hc];
while (Phi <> nil) and not TGuidEx.EqualGuids(Phi.Key, Key) do
Phi := Phi.Next;
if Phi <> nil then
Value := TObject(Phi.Value)
else
Value := nil;
Result := Phi <> nil;
end;
procedure TestDictMisc.TestGuidDictionary;
const
G1: TGUID = '{68D09F12-3E0D-4963-B32C-4EE3BD90F69C}';
G2: TGUID = '{BEED37F6-9757-41DC-8463-AF094392652B}';
var
T: TGuidDictionary;
Obj1, Obj2, O: TObject;
begin
T := TGuidDictionary.Create;
Obj1 := TObject.Create();
Obj2 := TObject.Create();
try
CheckFalse(T.Contains(G1));
T.Add(G1, Obj1);
CheckTrue(T.Contains(G1));
T.Add(G2, Obj2);
CheckTrue(T.Contains(G2));
T.Add(G2, Obj2);
CheckTrue(T.Contains(G2));
CheckTrue(T.TryFind(G1, {out} O));
CheckSame(Obj1, O);
CheckTrue(T.TryFind(G2, {out} O));
CheckSame(Obj2, O);
T.Remove(G1);
CheckFalse(T.Contains(G1));
CheckFalse(T.TryFind(G1, {out} O));
T.Add(G1, Obj1);
CheckTrue(T.TryFind(G1, {out} O));
CheckSame(Obj1, O);
finally
Obj1.Free();
Obj2.Free();
T.Free;
end;
end;