我正在尝试使用trie数据结构实现拼写检查。我目前有一个Node
的大纲:
class Node:
def __init__(self):
self.next = {}
self.word_marker = False
def add_item(self, string):
#if the length of the string is 0 then return. When the end of the word
#comes set the word_marker to true
if len(string) == 0:
self.word_marker = True
return
#set the key to the first letter of the string and reformat the string to reflect the first letter taken out
#ultimately going to store the letters for each node as the key
key = string[0]
string = string[1:]
#if there is a key in the dictionary, then recursively call add_item with new string
if key in self.next:
self.next[key].add_item(string)
else:
node = Node()
self.next[key] = node
node.add_item(string)
我要做的下一件事是编写函数来搜索string
,然后返回建议的拼写。 (def correct(self, string)
)。我如何通过这个特里来实现搜索?假设我已经通过定义根节点root
向trie添加了单词列表,然后对列表中的每个单词使用add_item
。
答案 0 :(得分:4)
如果您还没有,可能需要查看Norvig的“How to Write a Spelling Corrector”
答案 1 :(得分:2)
这里有一个与您的问题相关的答案:https://stackoverflow.com/a/11016430/793956。
还要考虑使用诸如的库 https://github.com/kmike/marisa-trie#readme或 https://github.com/kmike/datrie#readme
答案 2 :(得分:0)
虽然不是直接的答案,但您可能希望从更加发达的Trie实现开始:
class Trie:
def __init__(self):
self.__final = False
self.__nodes = {}
def __repr__(self):
return 'Trie<len={}, final={}>'.format(len(self), self.__final)
def __getstate__(self):
return self.__final, self.__nodes
def __setstate__(self, state):
self.__final, self.__nodes = state
def __len__(self):
return len(self.__nodes)
def __bool__(self):
return self.__final
def __contains__(self, array):
try:
return self[array]
except KeyError:
return False
def __iter__(self):
yield self
for node in self.__nodes.values():
yield from node
def __getitem__(self, array):
return self.__get(array, False)
def create(self, array):
self.__get(array, True).__final = True
def read(self):
yield from self.__read([])
def update(self, array):
self[array].__final = True
def delete(self, array):
self[array].__final = False
def prune(self):
for key, value in tuple(self.__nodes.items()):
if not value.prune():
del self.__nodes[key]
if not len(self):
self.delete([])
return self
def __get(self, array, create):
if array:
head, *tail = array
if create and head not in self.__nodes:
self.__nodes[head] = Trie()
return self.__nodes[head].__get(tail, create)
return self
def __read(self, name):
if self.__final:
yield name
for key, value in self.__nodes.items():
yield from value.__read(name + [key])