所以,我刚从OCaml移植了Trie。不幸的是,就tryFind而言,它比标准Map运行得慢。我不明白这一点 - 特里似乎应该更快。 F#的代码库是以某种特殊的方式构建的,以使它们比用户通常部署的代码更快吗?
这是代码 -
[<RequireQualifiedAccess>]
module Trie
type Node<'k, 'v when 'k : comparison> =
{ TrieMap : Map<'k, Node<'k, 'v>>
TrieKvp : ('k list * 'v) option }
member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty
let inline make map kvp =
{ TrieMap = map
TrieKvp = kvp }
let inline makeEmpty () : Node<'k, 'v> = make Map.empty None
let inline isEmpty (node : Node<'k, 'v>) = node.IsEmpty
let rec tryFind (key : 'k list) node =
if key.IsEmpty then
match node.TrieKvp with
| Some (_, value) -> Some value
| None -> None
else
let keyHead = key.Head
let keyTail = key.Tail
let optSubNode = Map.tryFind keyHead node.TrieMap
match optSubNode with
| Some subNode -> tryFind keyTail subNode
| None -> None
let inline containsKey key node =
(tryFind key node).IsSome
let rec addInternal (key : 'k list) value node =
if key.IsEmpty then make node.TrieMap (Some (key, value))
else
let keyHead = key.Head
let keyTail = key.Tail
let newTrie =
match Map.tryFind keyHead node.TrieMap with
| Some subTrie -> subTrie
| None -> makeEmpty ()
let newTrie2 = addInternal keyTail value newTrie
make (Map.add keyHead newTrie2 node.TrieMap) node.TrieKvp
let inline add key value node =
addInternal key value node
let rec addMany kvps node =
if Seq.isEmpty kvps then node
else
let kvpHead = Seq.head kvps
let kvpTail = Seq.skip 1 kvps
let newTrie = add (fst kvpHead) (snd kvpHead) node
addMany kvpTail newTrie
let inline ofList kvps =
addMany kvps (makeEmpty ())
let inline ofListBy by kvps =
let pairs = List.map by kvps
ofList pairs
let rec foldInternal folder rev node state =
match node.TrieKvp with
| Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value
| None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap
let inline fold folder state node =
foldInternal folder [] node state
let rec map (mapper : 'k list -> 'v -> 'a) (node : Node<'k, 'v>) : Node<'k, 'a> =
match node.TrieKvp with
| Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value))
| None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None
let inline toValueList node =
fold (fun state _ value -> value :: state) [] node
let inline singleton (key, value) =
add key value (makeEmpty ())
这是Jon Harrop提供的性能测试,我发现它足以衡量改进 -
let xs = Array.init 1000000 (fun i -> [i])
let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Trie.makeEmpty()
for i=0 to xs.Length-1 do
t <- Trie.add xs.[i] xs.[i] t
printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
for i=0 to xs.Length-1 do
ignore(Trie.tryFind xs.[i])
printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds
let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Map.empty
for i=0 to xs.Length-1 do
t <- Map.add xs.[i] xs.[i] t
printfn "Map took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
for i=0 to xs.Length-1 do
ignore(Map.tryFind xs.[i])
printfn "Map took %fs to search" timer.Elapsed.TotalSeconds
注意:如果您考虑到更快的查找数据结构,请注意我需要持久的数据结构。
答案 0 :(得分:4)
不幸的是,就tryFind而言,它的运行速度比标准Map慢。我不明白这一点 - 特里似乎应该更快。
这里的快速基准测试表明,至少在简单的情况下,你的特里已经快于Map
:
do
let n = 0
let xs = Array.init 1000000 (fun i -> [i])
let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Trie.makeEmpty()
for i=0 to xs.Length-1 do
t <- Trie.add xs.[i] xs.[i] t
printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
for i=0 to xs.Length-1 do
ignore(Trie.tryFind xs.[i])
printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds
let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Map.empty
for i=0 to xs.Length-1 do
t <- Map.add xs.[i] xs.[i] t
printfn "Map took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
for i=0 to xs.Length-1 do
ignore(Map.tryFind xs.[i])
printfn "Map took %fs to search" timer.Elapsed.TotalSeconds
我得到4s来构建你的Trie,8.7s来构建Map
和0.7
来搜索这两种情况。
但是,您的实施还有很大的改进空间。我最近写了一篇关于F#中优化的通用持久哈希trie实现的文章,该文章已发布here。
您之后的评论意味着您只想使用它来映射字符串。如果是这样的话,将字符串密钥专门化为特征会更有效率。
修改强>
KVB建议我详细说明“改进的空间”,所以这里有一些反馈:
inline
作为优化,并且仅基于引人注目的性能测量。empty
成为值而不是函数。List.head
和List.tail
。改为使用模式匹配。答案 1 :(得分:4)
好吧,经过多一点思考之后,我假设性能的真正区别在于使用键列表而不是字符串。字符串(和数组)具有更好的缓存一致性。所以,我将密钥从'k列表更改为字符串瞧!现在,性能实际上比我的应用程序中的Map更好!
这是代码 -
[<RequireQualifiedAccess>]
module StringTrie
type Node<'v> =
{ TrieMap : Map<char, Node<'v>>
TrieKvp : (string * 'v) option }
member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty
let inline make map kvp =
{ TrieMap = map
TrieKvp = kvp }
let inline makeEmpty () : Node<'v> = make Map.empty None
let inline isEmpty (node : Node<'v>) = node.IsEmpty
let rec tryFindInternal (key : string) index node =
if key.Length = index then
match node.TrieKvp with
| Some (_, value) -> Some value
| None -> None
else
let optSubNode = Map.tryFind key.[index] node.TrieMap
match optSubNode with
| Some subNode -> tryFindInternal key (index + 1) subNode
| None -> None
let inline tryFind (key : string) node =
tryFindInternal key 0 node
let inline containsKey key node =
(tryFind key node).IsSome
let rec addInternal (key : string) index value node =
if key.Length = index then make node.TrieMap (Some (key, value))
else
let char = key.[index]
let newTrie =
match Map.tryFind char node.TrieMap with
| Some subTrie -> subTrie
| None -> makeEmpty ()
let newTrie2 = addInternal key (index + 1) value newTrie
make (Map.add char newTrie2 node.TrieMap) node.TrieKvp
let inline add key value node =
addInternal key 0 value node
let rec addMany kvps node =
if Seq.isEmpty kvps then node
else
let kvpHead = Seq.head kvps
let kvpTail = Seq.skip 1 kvps
let newTrie = add (fst kvpHead) (snd kvpHead) node
addMany kvpTail newTrie
let inline ofList kvps =
addMany kvps (makeEmpty ())
let inline ofListBy by kvps =
let pairs = List.map by kvps
ofList pairs
let rec foldInternal folder rev node state =
match node.TrieKvp with
| Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value
| None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap
let inline fold folder state node =
foldInternal folder [] node state
let rec map (mapper : string -> 'v -> 'a) (node : Node<'v>) : Node<'a> =
match node.TrieKvp with
| Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value))
| None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None
let inline toValueList node =
fold (fun state _ value -> value :: state) [] node
let inline singleton (key, value) =
add key value (makeEmpty ())
我还构建了一个适用于数组的版本,并且速度很快 -
[<RequireQualifiedAccess>]
module ArrayTrie
type Node<'k, 'v when 'k : comparison> =
{ TrieMap : Map<'k, Node<'k, 'v>>
TrieKvp : ('k array * 'v) option }
member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty
let inline make map kvp =
{ TrieMap = map
TrieKvp = kvp }
let inline makeEmpty () : Node<'k, 'v> = make Map.empty None
let inline isEmpty (node : Node<'k, 'v>) = node.IsEmpty
let rec tryFindInternal (key : 'k array) index node =
if key.Length = index then
match node.TrieKvp with
| Some (_, value) -> Some value
| None -> None
else
let optSubNode = Map.tryFind key.[index] node.TrieMap
match optSubNode with
| Some subNode -> tryFindInternal key (index + 1) subNode
| None -> None
let inline tryFind (key : 'k array) node =
tryFindInternal key 0 node
let inline containsKey key node =
(tryFind key node).IsSome
let rec addInternal (key : 'k array) index value node =
if key.Length = index then make node.TrieMap (Some (key, value))
else
let char = key.[index]
let newTrie =
match Map.tryFind char node.TrieMap with
| Some subTrie -> subTrie
| None -> makeEmpty ()
let newTrie2 = addInternal key (index + 1) value newTrie
make (Map.add char newTrie2 node.TrieMap) node.TrieKvp
let inline add key value node =
addInternal key 0 value node
let rec addMany kvps node =
if Seq.isEmpty kvps then node
else
let kvpHead = Seq.head kvps
let kvpTail = Seq.skip 1 kvps
let newTrie = add (fst kvpHead) (snd kvpHead) node
addMany kvpTail newTrie
let inline ofList kvps =
addMany kvps (makeEmpty ())
let inline ofListBy by kvps =
let pairs = List.map by kvps
ofList pairs
let rec foldInternal folder rev node state =
match node.TrieKvp with
| Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value
| None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap
let inline fold folder state node =
foldInternal folder [] node state
let rec map (mapper : 'k array -> 'v -> 'a) (node : Node<'k, 'v>) : Node<'k, 'a> =
match node.TrieKvp with
| Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value))
| None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None
let inline toValueList node =
fold (fun state _ value -> value :: state) [] node
let inline singleton (key, value) =
add key value (makeEmpty ())
唯一能够提高性能的东西就是获得一个指向字符串的内部指针,而不是一遍又一遍地执行索引。这在F#中似乎并不容易,但对C#中的数组来说似乎至少是可能的。
答案 2 :(得分:2)
为什么不呢? OCaml怎么样,那有没有更快?由于Trie
是按Map
实现的,因此我预计至少会有一些输入慢于Map
。在某些情况下,它仍然可能优于Map
,例如当尺寸非常大时。
另外,如果您的主要兴趣是查找性能,为什么不冻结您的Trie以使用基于Dictionary
的节点?