为了找到N个数组的交集,我有这个实现,这是非常低效的。我知道必须有一个算法来加速这个。
注意:myarray是包含我要为其找到交集的所有其他数组的数组。
var
i, j, k: integer;
myarray: Array of Array of integer;
intersection: array of integer;
for I := 0 to length(myarray)-1 do
begin
for J := 0 to length(myarray)-1 do
begin
if i = j then
continue;
for k := 0 to length(myarray[i])-1 do
begin
if myarray[i][j] = myarray[j][k] then
begin
setLength(intersection, length(intersection)+1);
intersection[length(intersection)-1] := myarray[j][k];
end;
end;
end;
end;
我可以应用哪些优化来加快速度?有没有更快的方法呢?
编辑:数组中的数据未排序。
答案 0 :(得分:10)
有一种更快的方法:列表比较算法。它允许您以线性时间而不是二次时间比较两个列表。这是基本的想法:
这可以通过一些努力扩展到处理2个以上的列表。
答案 1 :(得分:5)
不幸的是,您还没有更新您的问题,所以仍然不清楚您在问什么。例如。你谈到一个交集(它应该搜索每个单独数组中存在的值),但是从(不工作)代码看,你似乎只是在任何数组中搜索重复项。
尽管Mason's answer指出了这种算法的明显通用解决方案,但我认为这种多维数组有些不同。我制定了两个例程来确定(1)交集以及(2)重复。两者都假定数组中长度不等的无序内容。
首先,我决定介绍一些新类型:
type
PChain = ^TChain;
TChain = array of Integer;
TChains = array of TChain;
其次,这两个例程都需要一些排序机制。通过使用/滥用TList
:
function CompareInteger(Item1, Item2: Pointer): Integer;
begin
Result := Integer(Item1) - Integer(Item2);
end;
procedure SortChain(var Chain: TChain);
var
List: TList;
begin
List := TList.Create;
try
List.Count := Length(Chain);
Move(Chain[0], List.List[0], List.Count * SizeOf(Integer));
List.Sort(CompareInteger);
Move(List.List[0], Chain[0], List.Count * SizeOf(Integer));
finally
List.Free;
end;
end;
但是通过调整Classes.QuickSort
的RTL代码可以获得更好的实现,这与上面的代码完全相同,而不复制数组(两次):
procedure SortChain(Chain: PChain; L, R: Integer);
var
I: Integer;
J: Integer;
Value: Integer;
Temp: Integer;
begin
repeat
I := L;
J := R;
Value := Chain^[(L + R) shr 1];
repeat
while Chain^[I] < Value do
Inc(I);
while Chain^[J] > Value do
Dec(J);
if I <= J then
begin
Temp := Chain^[I];
Chain^[I] := Chain^[J];
Chain^[J] := Temp;
Inc(I);
Dec(J);
end;
until I > J;
if L < J then
SortChain(Chain, L, J);
L := I;
until I >= R;
end;
要获得所有数组的交集,将最短数组中的所有值与所有其他数组中的值进行比较就足够了。因为最短的数组可能包含重复值,所以对该小数组进行排序,以便能够忽略重复项。从那时起,这只是在其他一个数组中找到(或者更确切地说找不到)相同值的问题。不需要对所有其他数组进行排序,因为在排序数组中找到值的机会是50%。
function GetChainsIntersection(const Chains: TChains): TChain;
var
IShortest: Integer;
I: Integer;
J: Integer;
K: Integer;
Value: Integer;
Found: Boolean;
FindCount: Integer;
begin
// Determine which of the chains is the shortest
IShortest := 0;
for I := 1 to Length(Chains) - 1 do
if Length(Chains[I]) < Length(Chains[IShortest]) then
IShortest := I;
// The length of result will at maximum be the length of the shortest chain
SetLength(Result, Length(Chains[IShortest]));
Value := 0;
FindCount := 0;
// Find for every value in the shortest chain...
SortChain(@Chains[IShortest], 0, Length(Chains[IShortest]) - 1);
for K := 0 to Length(Chains[IShortest]) - 1 do
begin
if (K > 0) and (Chains[IShortest, K] = Value) then
Continue;
Value := Chains[IShortest, K];
Found := False;
for I := 0 to Length(Chains) - 1 do
if I <> IShortest then
begin
Found := False;
for J := 0 to Length(Chains[I]) - 1 do
// ... the same value in other chains
if Chains[I, J] = Value then
begin
Found := True;
Break;
end;
if not Found then
Break;
end;
// Add a found value to the result
if Found then
begin
Result[FindCount] := Value;
Inc(FindCount);
end;
end;
// Truncate the length of result to the actual number of found values
SetLength(Result, FindCount);
end;
这也不需要单独对所有数组进行排序。所有值都复制到一维临时数组中。在对数组进行排序后,很容易找到重复数据。
function GetDuplicateShackles(const Chains: TChains): TChain;
var
Count: Integer;
I: Integer;
Temp: TChain;
PrevValue: Integer;
begin
// Foresee no result
SetLength(Result, 0);
// Count the total number of values
Count := 0;
for I := 0 to Length(Chains) - 1 do
Inc(Count, Length(Chains[I]));
if Count > 0 then
begin
// Copy all values to a temporary chain...
SetLength(Temp, Count);
Count := 0;
for I := 0 to Length(Chains) - 1 do
begin
Move(Chains[I][0], Temp[Count], Length(Chains[I]) * SizeOf(Integer));
Inc(Count, Length(Chains[I]));
end;
// Sort the temporary chain
SortChain(@Temp, 0, Count - 1);
// Find all duplicate values in the temporary chain
SetLength(Result, Count);
Count := 0;
PrevValue := Temp[0];
for I := 1 to Length(Temp) - 1 do
begin
if (Temp[I] = PrevValue) and
((Count = 0) or (Temp[I] <> Result[Count - 1])) then
begin
Result[Count] := PrevValue;
Inc(Count);
end;
PrevValue := Temp[I];
end;
SetLength(Result, Count);
end;
end;
因为我喜欢测试我的所有代码,所以只需要很少的工作就可以使它具有一定的代表性。
unit Unit1;
interface
uses
SysUtils, Classes, Controls, Forms, StdCtrls, Grids;
type
PChain = ^TChain;
TChain = array of Integer;
TChains = array of TChain;
TForm1 = class(TForm)
Grid: TStringGrid;
IntersectionFullButton: TButton;
IntersectionPartialButton: TButton;
DuplicatesFullButton: TButton;
DuplicatesPartialButton: TButton;
Memo: TMemo;
procedure FormCreate(Sender: TObject);
procedure IntersectionButtonClick(Sender: TObject);
procedure DuplicatesButtonClick(Sender: TObject);
private
procedure ClearGrid;
procedure ShowChains(const Chains: TChains);
procedure ShowChain(const Chain: TChain; const Title: String);
end;
var
Form1: TForm1;
implementation
{$R *.dfm}
const
MaxDepth = 20;
procedure FillChains(var Chains: TChains; FillUp: Boolean; MaxValue: Integer);
var
X: Integer;
Y: Integer;
Depth: Integer;
begin
SetLength(Chains, MaxDepth);
for X := 0 to MaxDepth - 1 do
begin
if FillUp then
Depth := MaxDepth
else
Depth := Random(MaxDepth - 2) + 3; // Minimum depth = 3
SetLength(Chains[X], Depth);
for Y := 0 to Depth - 1 do
Chains[X, Y] := Random(MaxValue);
end;
end;
procedure SortChain(Chain: PChain; L, R: Integer);
var
I: Integer;
J: Integer;
Value: Integer;
Temp: Integer;
begin
repeat
I := L;
J := R;
Value := Chain^[(L + R) shr 1];
repeat
while Chain^[I] < Value do
Inc(I);
while Chain^[J] > Value do
Dec(J);
if I <= J then
begin
Temp := Chain^[I];
Chain^[I] := Chain^[J];
Chain^[J] := Temp;
Inc(I);
Dec(J);
end;
until I > J;
if L < J then
SortChain(Chain, L, J);
L := I;
until I >= R;
end;
function GetChainsIntersection(const Chains: TChains): TChain;
var
IShortest: Integer;
I: Integer;
J: Integer;
K: Integer;
Value: Integer;
Found: Boolean;
FindCount: Integer;
begin
IShortest := 0;
for I := 1 to Length(Chains) - 1 do
if Length(Chains[I]) < Length(Chains[IShortest]) then
IShortest := I;
SetLength(Result, Length(Chains[IShortest]));
Value := 0;
FindCount := 0;
SortChain(@Chains[IShortest], 0, Length(Chains[IShortest]) - 1);
for K := 0 to Length(Chains[IShortest]) - 1 do
begin
if (K > 0) and (Chains[IShortest, K] = Value) then
Continue;
Value := Chains[IShortest, K];
Found := False;
for I := 0 to Length(Chains) - 1 do
if I <> IShortest then
begin
Found := False;
for J := 0 to Length(Chains[I]) - 1 do
if Chains[I, J] = Value then
begin
Found := True;
Break;
end;
if not Found then
Break;
end;
if Found then
begin
Result[FindCount] := Value;
Inc(FindCount);
end;
end;
SetLength(Result, FindCount);
end;
function GetDuplicateShackles(const Chains: TChains): TChain;
var
Count: Integer;
I: Integer;
Temp: TChain;
PrevValue: Integer;
begin
SetLength(Result, 0);
Count := 0;
for I := 0 to Length(Chains) - 1 do
Inc(Count, Length(Chains[I]));
if Count > 0 then
begin
SetLength(Temp, Count);
Count := 0;
for I := 0 to Length(Chains) - 1 do
begin
Move(Chains[I][0], Temp[Count], Length(Chains[I]) * SizeOf(Integer));
Inc(Count, Length(Chains[I]));
end;
SortChain(@Temp, 0, Count - 1);
SetLength(Result, Count);
Count := 0;
PrevValue := Temp[0];
for I := 1 to Length(Temp) - 1 do
begin
if (Temp[I] = PrevValue) and
((Count = 0) or (Temp[I] <> Result[Count - 1])) then
begin
Result[Count] := PrevValue;
Inc(Count);
end;
PrevValue := Temp[I];
end;
SetLength(Result, Count);
end;
end;
{ TForm1 }
procedure TForm1.FormCreate(Sender: TObject);
begin
Grid.ColCount := MaxDepth;
Grid.RowCount := MaxDepth;
end;
procedure TForm1.ClearGrid;
var
I: Integer;
begin
for I := 0 to Grid.ColCount - 1 do
Grid.Cols[I].Text := '';
end;
procedure TForm1.ShowChains(const Chains: TChains);
var
I: Integer;
J: Integer;
begin
for I := 0 to Length(Chains) - 1 do
for J := 0 to Length(Chains[I]) - 1 do
Grid.Cells[I, J] := IntToStr(Chains[I, J]);
end;
procedure TForm1.ShowChain(const Chain: TChain; const Title: String);
var
I: Integer;
begin
if Length(Chain) = 0 then
Memo.Lines.Add('No ' + Title)
else
begin
Memo.Lines.Add(Title + ':');
for I := 0 to Length(Chain) - 1 do
Memo.Lines.Add(IntToStr(Chain[I]));
end;
end;
procedure TForm1.IntersectionButtonClick(Sender: TObject);
var
FillUp: Boolean;
Chains: TChains;
Chain: TChain;
begin
ClearGrid;
Memo.Clear;
FillUp := Sender = IntersectionFullButton;
if FillUp then
FillChains(Chains, True, 8)
else
FillChains(Chains, False, 4);
ShowChains(Chains);
Chain := GetChainsIntersection(Chains);
ShowChain(Chain, 'Intersection');
end;
procedure TForm1.DuplicatesButtonClick(Sender: TObject);
var
Chains: TChains;
Chain: TChain;
begin
ClearGrid;
Memo.Clear;
FillChains(Chains, Sender = DuplicatesFullButton, 900);
ShowChains(Chains);
Chain := GetDuplicateShackles(Chains);
ShowChain(Chain, 'Duplicates');
end;
initialization
Randomize;
end.
Unit1.DFM:
object Form1: TForm1
Left = 343
Top = 429
Width = 822
Height = 459
Caption = 'Form1'
Color = clBtnFace
Font.Charset = DEFAULT_CHARSET
Font.Color = clWindowText
Font.Height = -11
Font.Name = 'MS Sans Serif'
Font.Style = []
OldCreateOrder = False
OnCreate = FormCreate
DesignSize = (
806
423)
PixelsPerInch = 96
TextHeight = 13
object Memo: TMemo
Left = 511
Top = 63
Width = 295
Height = 360
Anchors = [akLeft, akTop, akRight, akBottom]
ScrollBars = ssVertical
TabOrder = 5
end
object IntersectionFullButton: TButton
Left = 511
Top = 7
Width = 141
Height = 25
Caption = 'Intersection (full chains)'
TabOrder = 1
OnClick = IntersectionButtonClick
end
object Grid: TStringGrid
Left = 0
Top = 0
Width = 503
Height = 423
Align = alLeft
ColCount = 20
DefaultColWidth = 24
DefaultRowHeight = 20
FixedCols = 0
RowCount = 20
FixedRows = 0
TabOrder = 0
end
object DuplicatesFullButton: TButton
Left = 658
Top = 7
Width = 141
Height = 25
Caption = 'Duplicates (full chains)'
TabOrder = 3
OnClick = DuplicatesButtonClick
end
object IntersectionPartialButton: TButton
Left = 511
Top = 35
Width = 141
Height = 25
Caption = 'Intersection (partial chains)'
TabOrder = 2
OnClick = IntersectionButtonClick
end
object DuplicatesPartialButton: TButton
Left = 658
Top = 35
Width = 141
Height = 25
Caption = 'Duplicates (partial chains)'
TabOrder = 4
OnClick = DuplicatesButtonClick
end
end
答案 2 :(得分:1)
if myarray[i][j] = myarray[j][k] then
不应该是
if myarray[i][k] = myarray[j][k] then
无论如何,您可以对此代码进行的最明显,最简单的优化是更改此
for I := 0 to length(myarray)-1 do
begin
for J := 0 to length(myarray)-1 do
begin
if i = j then
continue;
进入这个
for I := 0 to length(myarray)-1 do
begin
for J := I+1 to length(myarray)-1 do
begin
我的下一步是摆脱内部循环中的外部索引表达式:
if myarray[i][j] = myarray[j][k] then
在I和J循环中,创建指向两个整数数组的指针,然后执行
for I := 0 to length(myarray)-1 do
begin
pia := @myarray[i];
for J := I+1 to length(myarray)-1 do
begin
pja := @myarray[j];
然后在内循环中你可以做
if pia^[j] = pja^[k] then