Javascript:在前缀树中找到以给定前缀

时间:2015-09-24 09:22:12

标签: javascript recursion closures trie prefix-tree

我有一个trie(也称为前缀树)。给定一个前缀,我想得到一个以前缀开头的十个单词列表。

这个问题的独特之处在于我只想要以给定前缀开头的单词 10 - 而不是所有单词。鉴于此,可以进行优化。

下面我的代码我知道工作正常。特里结构中的每个节点都具有children属性和this_is_the_end_of_a_word属性。例如,当您插入“hi”时,这就是trie的样子:

trie

问题:给定一个前缀,我想得到一个以前缀开头的十个单词列表。

我解决这个问题的方法是:按照prefix的字符向前移动前缀树,直到找到对应于prefix的最后一个字符的节点。现在,您应该在此节点上执行DFS,跟踪列表中包含this_is_the_end_of_a_word === true的节点。但是当你的列表长度等于10时你应该停止搜索,并返回列表。

我认为我的方法很合理,但是我在实现它时遇到了麻烦 - 特别是因为我正在尝试使用递归DFS,所以我不确定如何通过它之间的“全局”列表递归调用。我知道我应该使用闭包,但我是javascript的新手,我不确定如何去做。我尝试过的一个例子如下。

我的Trie课程(我知道这段代码有效,这样你就可以看到我是如何组织我的数据结构的。)

var Trie = function() {

    var that = Object.create(Trie.prototype);
    that.children = {}; //mapping: next character -> child nodes
    that.this_is_the_end_of_a_word = false;

    that.insertWord = function(word) {

        var current_node = that;

        for (var i = 0; i < word.length; i++) {
            var c = word[i]
                //if character is not in the trie already, add it
            if (!(c in current_node.children)) {
                current_node.children[c] = Trie();
            }
            //update current_node
            current_node = current_node.children[c];
        };

        //after adding all the chars of the word, 
        //you are at the end of a word
        current_node.this_is_the_end_of_a_word = true;
    }

    that.insertWords = function(words) {
        for (var i = 0; i < words.length; i++) {
            that.insertWord(words[i]);
        }
    }

    that.contains = function(word) {
        //start at the root
        var current_node = that;
        for (var i = 0; i < word.length; i++) {
            var c = word[i];

            //if the word's character isn't a child of the current_node, 
            //the word isn't in the trie
            if (!(c in current_node.children)) {
                return false;
            }
            //move down the trie, update current_node
            current_node = current_node.children[c];
        };
        return current_node.this_is_the_end_of_a_word;
    }

    Object.freeze(that);
    return that;
}

我的第一种方法(有很多错误)

num_words_to_go = 10; 
//this global is bad practice; 
//I want to put this as the argument to a closure 
//so it's passed between recursive calls

that.getWords = function(start_node, prefix) {
   console.log(0);
   var words = [];

   //if start node is a word, add it
   if (start_node.this_is_the_end_of_a_word) {
       words.push(start_node);
       num_words_to_go--;
   }

   if (num_words_to_go <= 0 || !start_node.children) {
       return words;
   }

   return start_node.children.forEach(
                              currentValue.getWords(
                                    currentValue, prefix + <character for this child>)); 

   /*I can't think of a nice way to write this without going through all of the children. 
   I know I don't need to, because I only need to find 10 words and get out. 
   This is why I was leaning towards the recursive DFS. 
   */

}

第二种方法:我还发现了一个我正在看的python示例: http://v1v3kn.tumblr.com/post/18238156967/roll-your-own-autocomplete-solution-using-tries 我尝试将他的示例翻译成JavaScript,但all_suffixes仍然出现问题。

that.all_suffixes = function (prefix){
    results = [];
    if (that.this_is_the_end_of_a_word) results.push(prefix);
    if (!(that.children)) return results;
    if (results.length > 2) return results;
    var callback = function(currentValue, i, array){
        return currentValue.all_suffixes(prefix+array[i]);
    }
    arr = that.children.forEach(callback, that);
        //[child.all_suffixes(prefix + char) for (char, child) in self.children.items()]
    return concat(reduce(concat, arr), results);        
}

 that.autocomplete = function(prefix){
    current_node = that;
    for(var i = 0; i < prefix.length; i++){
        var c = prefix[i];
        //if there is nothing in the trie with this prefix
        if (!(c in current_node.children)){
            return [];
        }
        current_node = current_node.children[c];
    }
    return list(current_node.all_suffixes(prefix))
 }

1 个答案:

答案 0 :(得分:0)

基本上我会使用您的模型并将新方法getWords(word[, count])应用于Trie类。我更改了方法contains,因为我还需要getWords中的功能。所以我创建了一个新方法getNode,它返回找到单词或部分的节点。

方法getWords首先查找单词(部分),然后遍历数据结构。找到单词后,会将其推送到结果集。如果结果集长度大于或等于所需长度,则终止迭代(因此Array.prototype.some)并停止fork的递归调用。

    that.getWords = function (word, count) {

        function fork(n, w) {

            function child(c) {
                return fork(n.children[c], w + c);
            }

            n.isWord && words.push(w);
            return words.length >= count || Object.keys(n.children).some(child);
        }

        var words = [],
            current_node = that.getNode(word);

        if (current_node) {
            fork(current_node, word);
            return words;
        }
    }

附注:我已将this_is_the_end_of_a_word更改为isWord

<强>输入

  1. 创建Trie
  2. 的新实例
  3. 插入一些字词进行测试。
  4. <强>输出

    1. 测试trie是否包含'motor',返回false。
    2. 测试trie是否包含'te',返回false。
    3. 测试trie是否包含'ten',返回true。
    4. 获取'ind'开头的所有字词(8个可用,显示8个)。
    5. 获取以'in'开头的前10个单词(16个可用,显示10个)。
    6. 整个特里。
    7. var Trie = function () {
      
          var that = Object.create(Trie.prototype);
          that.children = {}; //mapping: next character -> child nodes
          that.isWord = false;
      
          that.insertWord = function (word) {
              var current_node = that;
              for (var i = 0; i < word.length; i++) {
                  var c = word[i]
                  //if character is not in the trie already, add it
                  if (!(c in current_node.children)) {
                      current_node.children[c] = Trie();
                  }
                  //update current_node
                  current_node = current_node.children[c];
              };
      
              //after adding all the chars of the word,
              //you are at the end of a word
              current_node.isWord = true;
          }
      
          that.insertWords = function (words) {
              for (var i = 0; i < words.length; i++) {
                  that.insertWord(words[i]);
              }
          }
      
          that.getNode = function (word) {
              //start at the root
              var current_node = that;
              for (var i = 0; i < word.length; i++) {
                  var c = word[i];
      
                  //if the word's character isn't a child of the current_node,
                  //the word isn't in the trie
                  if (!(c in current_node.children)) {
                      return;
                  }
                  //move down the trie, update current_node
                  current_node = current_node.children[c];
              };
              return current_node;
          }
      
          that.contains = function (word) {
              var current_node = that.getNode(word);
              if (current_node) {
                  return current_node.isWord;
              }
              return false;
          }
      
          that.getWords = function (word, count) {
      
              function fork(n, w) {
      
                  function child(c) {
                      return fork(n.children[c], w + c);
                  }
      
                  n.isWord && words.push(w);
                  return words.length >= count || Object.keys(n.children).some(child);
              }
      
              var words = [],
                  current_node = that.getNode(word);
      
              if (current_node) {
                  fork(current_node, word);
                  return words;
              }
          }
      
          // freeze does lock the isWord property, which is not required here
          //Object.freeze(that);
          return that;
      }
      
      var trie = new Trie();
      trie.insertWords([
          'car', 'cool', 'i', 'in', 'indeed', 'independence', 'india', 'indoor', 'induction',
          'industrial', 'industry', 'indwell', 'inferior', 'informal', 'inhale', 'inn',
          'inside', 'instance', 'intrepid', 'of', 'off', 'other', 'tea', 'ted', 'ten',
          'to', 'zoo', 'zoom'
      ]);
      document.write(trie.contains('motor') + '<br>'); // false
      document.write(trie.contains('te') + '<br>'); // false
      document.write(trie.contains('ten') + '<br>'); // true
      document.write('<pre>' + JSON.stringify(trie.getWords('ind'), 0, 4) + '</pre>');
      document.write('<pre>' + JSON.stringify(trie.getWords('in', 10), 0, 4) + '</pre>');
      document.write('<pre>' + JSON.stringify(trie, 0, 4) + '</pre>');