Trie - 找到所有可能的句子

时间:2015-01-31 17:55:20

标签: c# trie

我正在执行以下任务(在c#中):我们有一组字母和一本英文字典。查找从提供的字母创建的所有可能的单词组合。为此,我使用trie数据结构 - 我从剩余的字母中搜索单词和所有可能的附加单词(递归操作)。但是,该操作非常耗费时间/空间。知道如何更有效地处理它吗?

EDIT 这是我准备的示例代码:

class Trie
    {
        private Node root = new Node(null);

        public void AddWord(string word)
        {
            root.Add(word, 0);
        }

        public void GetCandidates(string input)
        {
            var results = new List<Result>()
            {
                new Result() {Rest = input}
            };

            Get(results);
        }

        private void Get(List<Result> results)
        {
            foreach (var result in results.Where(r => !string.IsNullOrEmpty(r.Rest)).ToList())
            {
                var pattern = result.Rest.Replace(" ", string.Empty);

                var allWords = new List<Result>();
                root.GetWord(string.Empty, allWords, pattern);
                result.OhterWords = allWords;

                Get(allWords);
            }


        }
    }

    class Node
    {
        protected Dictionary<char,Node> children = new Dictionary<char, Node>();

        public bool End { get; private set; }

        public char? Key { get; private set; }

        public Node(char? key)
        {
            Key = key;
        }

        public void Add(string word, int index)
        {
            var letter = word[index];
            if (!children.ContainsKey(letter))
            {
                children.Add(letter, new Node(letter));

            }

            var nextIndex = index + 1;
            if (nextIndex < word.Length)
            {
                children[letter].Add(word, nextIndex);
            }
            else
            {
                children[letter].End = true;
            }
        }

        public virtual void GetWord(string current,  List<Result> allWords, string availableLetters)
        {
            var newCurrent = string.Concat(current, Key);
            if (End)
            {
                var result = new Result()
                {
                    Rest = availableLetters,
                    Word = newCurrent,
                };


                if (!allWords.Contains(result))
                {
                    allWords.Add(result);
                }
            }

            foreach (var letter in availableLetters)
            {
                if (children.ContainsKey(letter))
                {
                    var index = availableLetters.IndexOf(letter);
                    var tempAvailableString = availableLetters.Remove(index, 1);
                    children[letter].GetWord(newCurrent, allWords,  tempAvailableString);
                }
            }
        }
    }

    class Result
    {
        public List<Result> OhterWords { get; set; }

        public string Word { get; set; }

        public string Rest { get; set; }

        public override bool Equals(object obj)
        {
            var r = obj as Result;
            if (r == null)
            {
                return false;
            }

            return r.Word == Word && r.Rest == Rest;
        }
    }

1 个答案:

答案 0 :(得分:0)

您可以尝试aho-corasick算法。它使用trie和后缀以及替代的trie数据结构。