除了有趣之外,我今天实施了Trie。目前它支持add()和search(),还应该实现remove(),但我认为这是相当直接的。
它功能齐全,但为数据填充Trie对我来说有点太多了。我正在使用此列表作为数据源:http://www.isc.ro/lists/twl06.zip(在SO上的其他位置找到)。加载需要大约11秒。我最初的实现需要大约15秒,所以我已经给它一个不错的性能提升,但我仍然不满意:))
我的问题是:还有什么可以给我一个(实质性的)性能提升?我不受这种设计的约束,完全彻底的改造是可以接受的。
class Trie
{
private $trie;
public function __construct(TrieNode $trie = null)
{
if($trie !== null) $this->trie = $trie;
else $this->trie = new TrieNode();
$this->counter = 0;
}
public function add($value, $val = null)
{
$str = '';
$trie_ref = $this->trie;
foreach(str_split($value) as $char)
{
$str .= $char;
$trie_ref = $trie_ref->addNode($str);
}
$trie_ref->value = $val;
return true;
}
public function search($value, $only_words = false)
{
if($value === '') return $this->trie;
$trie_ref = $this->trie;
$str = '';
foreach(str_split($value) as $char)
{
$str .= $char;
if($trie_ref = $trie_ref->getNode($str))
{
if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref));
continue;
}
return false;
}
return false;
}
public function extractWords(TrieNode $trie)
{
$res = array();
foreach($trie->getChildren() as $child)
{
if($child->value !== null) $res[] = $child->value;
if($child->hasChildren()) $res = array_merge($res, $this->extractWords($child));
}
return $res;
}
}
class TrieNode
{
public $value;
protected $children = array();
public function addNode($index)
{
if(isset($this->children[$index])) return $this->children[$index];
return $this->children[$index] = new self();
}
public function getNode($index)
{
return (isset($this->children[$index]) ? $this->children[$index] : false);
}
public function getChildren()
{
return $this->children;
}
public function hasChildren()
{
return count($this->children)>0;
}
}
答案 0 :(得分:3)
不知道php,但
采用以下方法:
public function add($value, $val = null)
{
$str = '';
$trie_ref = $this->trie;
foreach(str_split($value) as $char)
{
$str .= $char;
$trie_ref = $trie_ref->addNode($str);
}
$trie_ref->value = $val;
return true;
}
public function search($value, $only_words = false)
{
if($value === '') return $this->trie;
$trie_ref = $this->trie;
$str = '';
foreach(str_split($value) as $char)
{
$str .= $char;
if($trie_ref = $trie_ref->getNode($str))
{
if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref));
continue;
}
return false;
}
return false;
}
为什么你甚至需要$str .= $char
(我认为是附加的)?这本身会将您的O(n)时间添加/搜索更改为Omega(n ^ 2)(n是$value
的长度)而不是O(n)。
在一个特里,你通常在走绳子时走路,即根据当前字符而不是当前前缀找到下一个节点。
答案 1 :(得分:1)
我想这个实现是针对Key | value类型的插入和查找?这是一个处理[英语]单词的人。
class Trie {
static function insert_word(Node $root, $text)
{
$v = $root;
foreach(str_split($text) as $char) {
$next = $v->children[$char];
if ($next === null)
{
$v->children[$char] = $next = new Node();
}
$v = $next;
}
$v->leaf = true;
}
static function get_words_sorted(Node $node, $text)
{
$res = array();
for($ch = 0; $ch < 128; $ch++) {
$child = $node->children[chr($ch)];
if ($child !== null)
{
$res = array_merge($res, Trie::get_words_sorted($child, $text . chr($ch)));
}
}
if ($node->leaf === true)
{
$res[] = $text;
}
return $res;
}
static function search(Node $root, $text)
{
$v = $root;
while($v !== null)
{
foreach(str_split($text) as $char) {
$next = $v->children[$char];
if ($next === null)
{
return false;
}
else
{
$v = $next;
}
}
if($v->leaf === true)
{
return true;
}
else
{
return false;
}
}
return false;
}
}
class Node {
public $children;
public $leaf;
function __construct()
{
$children = Array();
}
}
使用示例
$root = new Node();
$words = Array("an", "ant", "all", "allot", "alloy", "aloe", "are", "ate", "be");
for ($i = 0; $i < sizeof($words); $i++)
{
Trie::insert_word($root, $words[$i]);
}
$search_words = array("alloy", "ant", "bee", "aren't", "allot");
foreach($search_words as $word)
{
if(Trie::search($root, $word) === true)
{
echo $word . " IS in my dictionary<br/>";
}
else
{
echo $word . " is NOT in my dictionary <br/>";
}
}