为什么这个C ++ Trie实现显示奇怪的行为?

时间:2016-06-29 02:47:57

标签: c++ algorithm data-structures trie

  

我实现了这个类来创建一个trie数据结构。功能

unsigned long Insert(string) //inserts the string in trie & return no of words in trie

void PrintAllWords(); // prints all words in trie separated by space in dictionary order
  

实现正常工作并打印从单词数量不是很大的英语字典单词的文本文件中插入的所有单词,但是当提供有大约350k单词的文件时,它只打印出abcd到z。

私人变量

struct TrieTree
{
    std::map<char,struct TrieTree*> map_child;
    std::map<char,unsigned long> map_count; //keeps incrementing count of char in map during insertion.
    bool _isLeaf=false;  // this flag is set true at node where word ends
};

struct TrieTree* _root=NULL;
unsigned long _wordCount=0;
unsigned long _INITIALIZE=1;
  

以下是驱动程序的完整实现。该程序是可执行的。

#include<iostream>
#include<map>
#include<fstream>
class Trie
{
private:

    struct TrieTree
    {
        std::map<char,struct TrieTree*> map_child;
        std::map<char,unsigned long> map_count;
        bool _isLeaf=false;
    };

    struct TrieTree* _root=NULL;
    unsigned long _wordCount=0;
    unsigned long _INITIALIZE=1;

    struct TrieTree* getNode()
    {
        return new TrieTree;
    };


    void printWords(struct TrieTree* Tptr,std::string pre)
    {
        if(Tptr->_isLeaf==true)
        {
            std::cout<<pre<<" ";
            return;
        }

        std::map<char,struct TrieTree*>::iterator it;
        it=Tptr->map_child.begin();
        while(it!=Tptr->map_child.end())
        {
            pre.push_back(it->first);
            printWords(it->second,pre);
            pre.erase(pre.length()-1);  //erase last prefix character
            it++;
        }

    }


public:

    Trie()
    {
        _root=getNode();
    }
    unsigned long WordCount()
    {
        return _wordCount;
    }
    unsigned long WordCount(std::string pre) //count words with prefix pre
    {
        if(WordCount()!=0)
        {
            struct TrieTree *Tptr=_root;
            std::map<char,unsigned long>::iterator it;
            char lastChar;
            for(int i=0;i<pre.length()-1;i++)
            {
                Tptr=Tptr->map_child[pre[i]];
            }
            lastChar=pre[pre.length()-1];
            it=Tptr->map_count.find(lastChar);
            if(it!=Tptr->map_count.end())
            {
                return Tptr->map_count[lastChar];
            }
            else
            {
                return 0;
            }
        }
        return 0;
    }

    unsigned long Insert(std::string key)   //return word count after insertion
    {
        struct TrieTree *Tptr =_root;
        std::map<char,struct TrieTree*>::iterator it;

        if(!SearchWord(key))
        {
            for(int level=0;level<key.length();level++)
            {
                it=Tptr->map_child.find(key[level]);
                if(it==Tptr->map_child.end())
                {
                    //alphabet does not exist in map
                    Tptr->map_child[key[level]]=getNode();  // new node with value pointing to it
                    Tptr->map_count[key[level]] = _INITIALIZE;
                    Tptr=Tptr->map_child[key[level]];       //assign pointer to newly obtained node
                    if(level==key.length()-1)
                        Tptr->_isLeaf=true;
                }
                else
                {   //alphabet exists at this level
                    Tptr->map_count[key[level]]++;
                    Tptr=Tptr->map_child[key[level]];
                }
            }
            _wordCount++;
        }
        return _wordCount;
    }

    bool SearchWord(std::string key)
    {
        struct TrieTree *Tptr =_root;
        std::map<char,struct TrieTree*>::iterator it;
        for(int level=0;level<key.length();level++)
        {
            it=Tptr->map_child.find(key[level]);
         //   cout<<" "<<Tptr->map_child.size()<<endl;  //test to count entries at each map level

            if(it!=Tptr->map_child.end())
            {
                Tptr=Tptr->map_child[key[level]];
            }
            else
            {
                return false;
            }
        }
        if(Tptr->_isLeaf==true)
            return true;
        return false;
    }

    void PrintAllWords()
    {   //print all words in trie in dictionary order
        struct TrieTree *Tptr =_root;
        if(Tptr->map_child.empty())
            {
                std::cout<<"Trie is Empty"<<std::endl;
                return;
            }

        printWords(Tptr,"");

    }
    void PrintAllWords(std::string pre)
    {   //print all words in trie with prefix pre in Dictionary order
        struct TrieTree *Tptr =_root;
        if(Tptr->map_child.empty())
            {
                std::cout<<"Trie is Empty"<<std::endl;
                return;
            }

        for(int i=0;i<pre.length();i++)
        {
            Tptr=Tptr->map_child[pre[i]];
        }

        printWords(Tptr,pre);

    }


};

int main(){
Trie t;

std::string str;
std::fstream fs;
fs.open("words.txt",std::ios::in);

while(fs>>str){
    t.Insert(str);
}

t.PrintAllWords();

return 0;
}
  

我不了解输出,请查看代码并提出修复建议。感谢

1 个答案:

答案 0 :(得分:0)

当您添加单词“a”时,如果树中没有以“a”开头的单词,您将添加一个“叶子”节点,其中“a”作为值。如果然后添加以“a”开头的单词,例如“an”,则将'n'节点添加为'a'节点的子节点。但是,当您打印所有单词时,在您点击叶节点时会停止递归,这意味着您将忽略以该单词开头的所有其他单词。

简单解决方案:从return移除printWords

同样,如果你在树中已经有了“an”,当你添加'a'时,你不会将它标记为叶子,所以它永远不会被输出。

简单解决方案:添加单词时设置_isLeaf,即使该节点已存在(例如Tptr->_isLeaf=true; <{1}}中的else子句添加Insert

我认为最好将_isLeaf更改为类似_isWord的内容,因为使用包含子项的叶节点似乎很奇怪。