Question

您好我正在尝试读取单词列表上的标签，该列表包含类似（TMConnects（MEN）what（WP）发生（VB）到（TO）您的（ADV）我的（ADV）tm（N）的单词网站（N）i（N）可以（ADV）访问（N）到（TO）查看（N）我的（ADV）账单（N））我试图使用if语句和正则表达式来读取每个标签，这样我就可以适当地对词语进行分类。

这是我试过的代码

 foreach (string word in tokensList)
        {
            //Verbs
            Match match_verb = Regex.Match(word, @"[a-zA-Z]+\(vb\)");
            if (match_verb.Success)
            {
                Console.WriteLine(word + "v");
                Verbs.Add(word);
            }
            else
            {
                //Nouns
                Match match_noun = Regex.Match(word, @"[a-zA-Z]+\(n\)");
                if (match_noun.Success)
                {
                    Console.WriteLine(word + "n");
                    Nouns.Add(word);
                }
                else
                {
                    //Adverb(Ad)
                    Match match_adverb = Regex.Match(word, @"[a-zA-Z]+\(adv\)");
                    if (match_adverb.Success)
                    {
                        Console.WriteLine(word + "adv");
                        Adverbs.Add(word);
                    }
                    else
                    {
                        //Adjective(Adj)
                        Match match_adj = Regex.Match(word, @"[a-zA-Z]+\(adj\)");
                        if (match_adj.Success)
                        {
                            Console.WriteLine(word + "adj");
                            Adjectives.Add(word);
                        }
                        else
                        {
                            //Mention(Men)
                            Match match_men = Regex.Match(word, @"[a-zA-Z]+\(men\)");
                            if (match_men.Success)
                            {
                                Console.WriteLine(word + "men");
                                Mentions.Add(word);
                            }
                            else
                            {
                                //Object(KNK)
                                Match match_obj = Regex.Match(word, @"[a-zA-Z]+\(knk\)");
                                if (match_obj.Success)
                                {
                                    Console.WriteLine(word + "obj");
                                    Objects.Add(word);
                                }
                                else
                                {
                                    //Features(KT)
                                    Match match_feature = Regex.Match(word, @"[a-zA-Z]+\(kt\)");
                                    if (match_feature.Success)
                                    {
                                        Console.WriteLine(word + "ft");
                                        Features.Add(word);
                                    }
                                    else
                                    {
                                        //break;
                                    }
                                }
                            }
                        }
                    }
                }

请帮帮我。

Answer 1

如果我要将您提供的列表中括号之间的所有文本小写，我会得到所有正确的结果。所以，我将假设问题是Mark suggested：你的正则表达式区分大小写是一个问题，并且要解决所有你需要做的就是提供RegexOptions.IgnoreCase选项。

而不是Regex.Match(word, @"[a-zA-Z]+\(vb\)") 它会是这样的Regex.Match(word, @"[a-z]+\(vb\)", RegexOptions.IgnoreCase)。

然而，再次遵循Marks的建议，您很可能以一种特定解决方案过于满足的方式来解决问题。也就是说，不是每个条件都有一个if-else语句（每次尝试执行正则表达式匹配），您可以使用更通用的正则表达式执行单个匹配，并利用.NET提供的工具来提供更灵活的解决方案。

    foreach (var match in
        tokensList.Select(
            word =>
                // Two capturing groups: "word" (unnecessary in your case) and "type", 
                // where "word" contains the word only (e.g. TMConnects) and "type" contains the type only (e.g. MEN).
                Regex.Match(
                    word,
                    // Match any letters appearing one or more times followed by an opening bracket ('('), anything that 
                    // isn't a closing bracket and a closing bracket (')').
                    @"^(?<word>[a-z]+)\((?<type>[^\)]+)\)$", 
                    // Tell the regular expression engine to ignore the casing.
                    RegexOptions.IgnoreCase))
                    // Filter out words that don't match the regular expression.
                    .Where(match => match.Success)) 
    {
        // The whole word (including the type.
        var word = match.Value;
        // The type lower-cased, for convenience.
        var type = match.Groups["type"].Value.ToLowerInvariant();
        switch (type)
        {
            case "vb":
                Console.WriteLine(word + "v");
                Verbs.Add(word);
                break;
            case "n":
                Console.WriteLine(word + "n");
                Nouns.Add(word);
                break;
            case "adv":
                Console.WriteLine(word + "adv");
                Adverbs.Add(word);
                break;
            case "adj":
                Console.WriteLine(word + "adj");
                Adjectives.Add(word);
                break;
            case "men":
                Console.WriteLine(word + "men");
                Mentions.Add(word);
                break;
            case "knk":
                Console.WriteLine(word + "obj");
                Objects.Add(word);
                break;
            case "kt":
                Console.WriteLine(word + "ft");
                Features.Add(word);
                break;
        }
    }

if语句具有不同的正则表达式参数

1 个答案: