使预测文本算法更快

时间:2014-10-17 09:43:39

标签: c# regex windows backgroundworker

我正在开发一个Windows Phone拨号应用程序,我在我的应用程序中实现了预测文本。当用户点击键盘时,会生成与输入匹配的联系人。预测太慢了,它也阻止了我的主线程,这就是为什么我实现了BackGroundWorker但仍然遇到性能问题 我的代码是:

private void dialer_TextChanged(object sender, TextChangedEventArgs e)
    {
        MainPage.DialerText = dialer.Text;
        if(!bw1.IsBusy)
        bw1.RunWorkerAsync();
     }
 void bw1_DoWork(object sender, DoWorkEventArgs e)
    {
        try
        {
            var digitMap = new Dictionary<int, string>() {
    { 1, "" },
    { 2, "[abcABC]" },
    { 3, "[defDEF]" },
    { 4, "[ghiGHI]" },
    { 5, "[jklJKL]" },
    { 6, "[mnoMNO]" },
    { 7, "[pqrsPQRS]" },
    { 8, "[tuvTUV]" },
    { 9, "[wxyzWXYZ]" },
    { 0, "" },
};
            var enteredDigits = DialerText;
            var charsAsInts = enteredDigits.ToCharArray().Select(x => int.Parse(x.ToString()));
            var regexBuilder = new StringBuilder();

            foreach (var val in charsAsInts)
            regexBuilder.Append(digitMap[val]);
            MainPage.pattern = regexBuilder.ToString();

            MainPage.pattern = ".*" + MainPage.pattern + ".*";


        }
        catch (Exception f)
        {
            //  MessageBox.Show(f.Message);
        }

    }
void bw1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
    {
        SearchListbox.ItemsSource = listobj.FindAll(x => x.PhoneNumbers.Any(a=>a.Contains(MainPage.DialerText)) | Regex.IsMatch(x.FirstName, MainPage.pattern));
    }

BackGroundWorker也阻塞了我的主线程,因此当我点击键盘时,输入值被添加到TextBox时会出现延迟。我想在没有任何延迟的情况下向TextTox添加输入,怎么做?谢谢。

3 个答案:

答案 0 :(得分:3)

你想 非常快 吗?

停止直接处理单词列表并将其转换为更有效的数据结构。

对于任何大小的单词列表的快速查找(但在内存方面更昂贵),您应该构建一个包含整个单词列表的树结构。

根节点表示零拨号数字,并且连接到(最多)十个节点,其中连接节点的边表示为0到9按下的可能数字之一。

然后,每个节点都包含可以从根节点通过树的路径形成的可能单词,其中路径代表按下的数字。

这意味着搜索不再需要迭代整个单词列表,并且可以在很少的操作中完成。

以下是我在网上找到的100000 word-list实践中的概念。在我的桌面上搜索大约需要0.02毫秒。很好,很快。似乎在内存中占用大约50MB。

void Main()
{
    var rootNode = new Node();

    //probably a bad idea, better to await in an async method
    LoadNode(rootNode).Wait(); 

    //let's search a few times to get meaningful timings    
    for(var i = 0; i < 5; ++i)
    {
        //"acres" in text-ese (specifically chosen for ambiguity)
        var searchTerm = "22737";
        var sw = Stopwatch.StartNew();
        var wordList = rootNode.Search(searchTerm);
        Console.WriteLine("Search complete in {0} ms", 
                        sw.Elapsed.TotalMilliseconds);
        Console.WriteLine("Search for {0}:", searchTerm);
        foreach(var word in wordList)
        {
            Console.WriteLine("Found {0}", word);
        }   
    }
    GC.Collect();
    var bytesAllocated = GC.GetTotalMemory(true);
    Console.WriteLine("Allocated {0} bytes", bytesAllocated);
}

async Task LoadNode(Node rootNode)
{
    var wordListUrl = 
    "http://www-01.sil.org/linguistics/wordlists/english/wordlist/wordsEn.txt";
    Console.WriteLine("Loading words from {0}", wordListUrl);
    using(var httpClient = new HttpClient())
    using(var stream = await httpClient.GetStreamAsync(wordListUrl))
    using(var reader = new StreamReader(stream))
    {
        var wordCount = 0;
        string word;
        while( (word = await reader.ReadLineAsync()) != null )
        {
            word = word.ToLowerInvariant();
            if(!Regex.IsMatch(word,@"^[a-z]+$"))
            {
                continue;
            }
            rootNode.Add(word);
            wordCount++;
        }   
        Console.WriteLine("Loaded {0} words", wordCount);
    }
}

class Node
{
    static Dictionary<int, string> digitMap = new Dictionary<int, string>() {
        { 1, "" },
        { 2, "abcABC" },
        { 3, "defDEF" },
        { 4, "ghiGHI" },
        { 5, "jklJKL" },
        { 6, "mnoMNO" },
        { 7, "pqrsPQRS" },
        { 8, "tuvTUV" },
        { 9, "wxyzWXYZ" },
        { 0, "" }};
    static Dictionary<char,int> letterMap;
    static Node()
    {
        letterMap = digitMap
            .SelectMany(m => m.Value.Select(c=>new {ch = c, num = m.Key}))
            .ToDictionary(x => x.ch, x => x.num);
    }

    List<string> words = new List<string>();

    //the edges collection has exactly 10 
    //slots which represent the numbers [0-9]
    Node[] edges = new Node[10];

    public IEnumerable<string> Words{get{
        return words;
    }}

    public void Add(string word, int pos = 0)
    {
        if(pos == word.Length)
        {
            if(word.Length > 0)
            {
                words.Add(word);
            }
            return;
        }
        var currentChar = word[pos];
        int edgeIndex = letterMap[currentChar];
        if(edges[edgeIndex] == null)
        {
            edges[edgeIndex] = new Node();
        }
        var nextNode = edges[edgeIndex];
        nextNode.Add(word, pos+1);
    }

    public Node FindMostPopulatedNode()
    {
        Stack<Node> stk = new Stack<Node>();
        stk.Push(this);
        Node biggest = null;
        while(stk.Any())
        {
            var node = stk.Pop();
            biggest = biggest == null
               ? node
               : (node.words.Count > biggest.words.Count
                   ? node
                   : biggest);
            foreach(var next in node.edges.Where(e=>e != null))
            {
                stk.Push(next);
            }
        }
        return biggest;
    }

    public IEnumerable<string> Search(string numberSequenceString)
    {
        var numberSequence = numberSequenceString
                               .Select(n => int.Parse(n.ToString()));
        return Search(numberSequence);
    }
    private IEnumerable<string> Search(IEnumerable<int> numberSequence)
    {
        if(!numberSequence.Any())
        {
            return words;
        }
        var first = numberSequence.First();
        var remaining = numberSequence.Skip(1);
        var nextNode = edges[first];
        if(nextNode == null)
        {
            return Enumerable.Empty<string>();
        }
        return nextNode.Search(remaining);
    }
}

答案 1 :(得分:2)

您可以进行一些优化以提高速度:

  • 不需要为正则表达式模式添加.*前缀和后缀,因为IsMatch会在字符串中的任何位置检测到匹配
  • 使用本地Dictionary<int,string>部分模式可以替换为static数组
  • 将数字转换为int可以用减法替换
  • foreach循环和追加可以替换为string.Join

以下是:

private static string[] digitMap = new[] {
    ""
,   "",           "[abcABC]", "[defDEF]"
,   "[ghiGHI]",   "[jklJKL]", "[mnoMNO]"
,   "[pqrsPQRS]", "[tuvTUV]", "[wxyzWXYZ]"
};
void bw1_DoWork(object sender, DoWorkEventArgs e) {
    try {
        MainPage.pattern = string.Join("", DialerText.Select(c => digitMap[c-'0']));
    } catch (Exception f) {
        //  MessageBox.Show(f.Message);
    }
}

答案 2 :(得分:0)

我怀疑阻止的原因是你的后台工作线程并没有真正做很多事情。

我希望bw1_RunWorkerCompleted方法(在后台工作程序完成后在MAIN线程上运行)运行得更长!你可以将这些代码中的一些(全部?)移动到bw1_DoWork方法中吗?显然&#34; listobj&#34;必须可以访问后台线程 - 只需将其传递给您的委托并通过DoWorkEventArgs.Argument属性访问它...

private void dialer_TextChanged(object sender, TextChangedEventArgs e)
    {
        MainPage.DialerText = dialer.Text;
        if(!bw1.IsBusy)
        bw1.RunWorkerAsync(listobj);
     }
 void bw1_DoWork(object sender, DoWorkEventArgs e)
    {
        try
        {
            var digitMap = new Dictionary<int, string>() {
    { 1, "" },
    { 2, "[abcABC]" },
    { 3, "[defDEF]" },
    { 4, "[ghiGHI]" },
    { 5, "[jklJKL]" },
    { 6, "[mnoMNO]" },
    { 7, "[pqrsPQRS]" },
    { 8, "[tuvTUV]" },
    { 9, "[wxyzWXYZ]" },
    { 0, "" },
};
            var enteredDigits = DialerText;
            var charsAsInts = enteredDigits.ToCharArray().Select(x => int.Parse(x.ToString()));
            var regexBuilder = new StringBuilder();

            foreach (var val in charsAsInts)
            regexBuilder.Append(digitMap[val]);
            MainPage.pattern = regexBuilder.ToString();

            MainPage.pattern = ".*" + MainPage.pattern + ".*";

            var listobj = (ListObjectType)e.Argument;
            e.Result = listobj.FindAll(x => x.PhoneNumbers.Any(a=>a.Contains(MainPage.DialerText)) | Regex.IsMatch(x.FirstName, MainPage.pattern));
        }
        catch (Exception f)
        {
            //  MessageBox.Show(f.Message);
        }

    }
void bw1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
    {
        SearchListbox.ItemsSource = (IEnumerable<ListObjectItemType>)e.Result;
    }

注意 - 您显然需要将转换替换为ListObjectType,并将ListObjectItemType替换为实际类型!