我在这里发现了一篇很酷的博客文章,关于使用Tries修复拼写错误,但它是用旧版本的F#编写的。
http://blog.lab49.com/archives/2841
我已经解决了大部分问题,除非在我认为出现问题的所有大写评论的最后。基本上在一些地方预期Set和Map<>给出,反之亦然。我几个小时都在盯着这段代码,似乎无法弄清楚哪些东西不匹配。
open System
open System.Linq
///'k represents the base key type, and 'a represents the element type.
type trie<'k,'a when 'k : comparison> = TNode of ('a option * Map<'k,trie<'k,'a>>)
///'ks represents a type that stores a sequence of the key elements, and 'k represents the type of the key elements.
type iter_module<'ks,'k> =
{
eos : ('ks -> bool) //whether or not a key-sequence is at its end.
head : ('ks -> 'k) //gets the current key-value in a key sequence
tail : ('ks -> 'ks) // increments the key-sequence iterator.
}
let siterm : iter_module<string * int, char> =
{
eos = fun (s,i) -> i >= String.length s
head = fun (s,i) -> s.[i]
tail = fun (s,i) -> (s, i+1)
}
///Extracts the option value associated with a node.
let node_value = function
| TNode (ov,_) -> ov
///Extracts the map representing connections from the current trie node to sub-trees.
let node_map = function
| TNode (_, m) -> m
///Determines whether a trie is empty
let is_empty tn = Map.isEmpty (node_map tn)
///Empty trie.
let empty_trie = TNode (None, Map.empty)
/// <summary>
/// Find a sub-trie.
/// </summary>
/// <param name="tn">The current trie node.</param>
/// <param name="k">The key.</param>
let find_subtrie tn k =
try
Map.find k (node_map tn)
with
not_found -> empty_trie
/// <summary>
/// Update the trie.
/// </summary>
/// <param name="m">The iteration module.</param>
/// <param name="tn">The trie node.</param>
/// <param name="ks">a sequence of the key elements</param>
/// <param name="v">Option containing the element</param>
let add m tn ks v =
let rec upd tn' ks' =
if(m.eos ks') then
TNode(Some v, (node_map tn'))
else
let k = (m.head ks') in
let ov = node_value tn' in
let tn'' = upd(find_subtrie tn' k)(m.tail ks') in
TNode(ov, Map.add k tn'' (node_map tn'))
in
upd tn ks
/// <summary>
/// Lookup the option value associated with a string
/// </summary>
/// <param name="m">The iteration module.</param>
/// <param name="tn">The trie node.</param>
/// <param name="ks">a sequence of the key elements</param>
let lookup m tn ks =
let rec lv tn' ks' =
if(m.eos ks') then
node_value tn'
else
let k = (m.head ks') in
lv(find_subtrie tn' k)(m.tail ks')
in
lv tn ks
/// <summary>
/// Determine whether or not a specific work is valid in the trie by a simple use of the lookup function.
/// </summary>
/// <param name="m">The iteration module.</param>
/// <param name="tn">The trie node.</param>
/// <param name="ks">a sequence of the key elements</param>
let mem m tn ks =
match(lookup m tn ks) with
| Some _ -> true
| None -> false
/// <summary>
/// Determines the suffix of a string
/// </summary>
/// <param name="s">The string to find the suffix of.</param>
/// <param name="i">The starting point of the suffix.</param>
let suffix(s:String,i) = s.Substring(i,(String.length s - i))
/// <summary>
/// Computes the set of words defined by a trie node if we follow any valid character from that node, and then try to follow a specific char sequences.
/// </summary>
/// <param name="pfx">The prefix.</param>
/// <param name="trie">The trie node.</param>
/// <param name="wi">Word index.</param>
/// <param name="words">The words.</param>
let strings_with_suffix pfx trie wi words =
let accumulate_paths c trie' ws =
if (mem siterm trie' wi) then
Set.add (pfx + (string c) + (suffix wi)) ws
else
ws
in
//THIS IS WHERE THINGS APPEAR TO BE GOING WRONG
Map.fold accumulate_paths (node_map trie) words
/// <summary>
/// Spelling correction suggestions.
/// </summary>
/// <param name="trie">The trie.</param>
/// <param name="word">The mispelled word.</param>
let suggestions trie word =
let rec paths_around_char pfx trie wi words =
if (siterm.eos wi) then
strings_with_suffix pfx trie ("", 0) words
else if (is_empty trie) then
words
else
let c = siterm.head wi in
let wi' = siterm.tail wi in
let trie' = find_subtrie trie c in
let ins_ws = strings_with_suffix pfx trie wi words in
let rep_ws = strings_with_suffix pfx trie wi' ins_ws in
let del_ws = if (mem siterm trie wi') then
Set.add (pfx + (suffix wi')) rep_ws
else
rep_ws
in
paths_around_char (pfx + (string c)) trie' wi' del_ws
in
paths_around_char "" trie (word, 0) Set.empty
提前致谢,
鲍勃
答案 0 :(得分:2)
似乎问题在于 strings_with_suffix
Map.fold (旧版本的F#标准库具有不同的参数顺序)的不同签名折叠的签名:('状态 - &gt;'键 - &gt;'值 - &gt;'状态) - &gt; '状态 - &gt;地图&lt;'Key,'价值&gt; - &GT; '州
当前的呼叫网站:
let strings_with_suffix pfx trie wi words =
let accumulate_paths c trie' ws (* 2 *)=
if (mem siterm trie' wi) then
Set.add (pfx + (string c) + (suffix wi)) ws
else
ws
in
//THIS IS WHERE THINGS APPEAR TO BE GOING WRONG
Map.fold accumulate_paths (node_map trie) (* 1 *) words
正确版本如下:
let strings_with_suffix pfx trie wi words =
let accumulate_paths ws c trie' =
if (mem siterm trie' wi) then
Set.add (pfx + (string c) + (suffix wi)) ws
else
ws
in
Map.fold accumulate_paths words (node_map trie)