您好我正在尝试读取单词列表上的标签,该列表包含类似(TMConnects(MEN)what(WP)发生(VB)到(TO)您的(ADV)我的(ADV)tm(N)的单词网站(N)i(N)可以(ADV)访问(N)到(TO)查看(N)我的(ADV)账单(N))我试图使用if语句和正则表达式来读取每个标签,这样我就可以适当地对词语进行分类。
这是我试过的代码
foreach (string word in tokensList)
{
//Verbs
Match match_verb = Regex.Match(word, @"[a-zA-Z]+\(vb\)");
if (match_verb.Success)
{
Console.WriteLine(word + "v");
Verbs.Add(word);
}
else
{
//Nouns
Match match_noun = Regex.Match(word, @"[a-zA-Z]+\(n\)");
if (match_noun.Success)
{
Console.WriteLine(word + "n");
Nouns.Add(word);
}
else
{
//Adverb(Ad)
Match match_adverb = Regex.Match(word, @"[a-zA-Z]+\(adv\)");
if (match_adverb.Success)
{
Console.WriteLine(word + "adv");
Adverbs.Add(word);
}
else
{
//Adjective(Adj)
Match match_adj = Regex.Match(word, @"[a-zA-Z]+\(adj\)");
if (match_adj.Success)
{
Console.WriteLine(word + "adj");
Adjectives.Add(word);
}
else
{
//Mention(Men)
Match match_men = Regex.Match(word, @"[a-zA-Z]+\(men\)");
if (match_men.Success)
{
Console.WriteLine(word + "men");
Mentions.Add(word);
}
else
{
//Object(KNK)
Match match_obj = Regex.Match(word, @"[a-zA-Z]+\(knk\)");
if (match_obj.Success)
{
Console.WriteLine(word + "obj");
Objects.Add(word);
}
else
{
//Features(KT)
Match match_feature = Regex.Match(word, @"[a-zA-Z]+\(kt\)");
if (match_feature.Success)
{
Console.WriteLine(word + "ft");
Features.Add(word);
}
else
{
//break;
}
}
}
}
}
}
请帮帮我。
答案 0 :(得分:1)
如果我要将您提供的列表中括号之间的所有文本小写,我会得到所有正确的结果。所以,我将假设问题是Mark suggested:你的正则表达式区分大小写是一个问题,并且要解决所有你需要做的就是提供RegexOptions.IgnoreCase
选项。
而不是Regex.Match(word, @"[a-zA-Z]+\(vb\)")
它会是这样的Regex.Match(word, @"[a-z]+\(vb\)", RegexOptions.IgnoreCase)
。
然而,再次遵循Marks的建议,您很可能以一种特定解决方案过于满足的方式来解决问题。也就是说,不是每个条件都有一个if-else语句(每次尝试执行正则表达式匹配),您可以使用更通用的正则表达式执行单个匹配,并利用.NET提供的工具来提供更灵活的解决方案。
foreach (var match in
tokensList.Select(
word =>
// Two capturing groups: "word" (unnecessary in your case) and "type",
// where "word" contains the word only (e.g. TMConnects) and "type" contains the type only (e.g. MEN).
Regex.Match(
word,
// Match any letters appearing one or more times followed by an opening bracket ('('), anything that
// isn't a closing bracket and a closing bracket (')').
@"^(?<word>[a-z]+)\((?<type>[^\)]+)\)$",
// Tell the regular expression engine to ignore the casing.
RegexOptions.IgnoreCase))
// Filter out words that don't match the regular expression.
.Where(match => match.Success))
{
// The whole word (including the type.
var word = match.Value;
// The type lower-cased, for convenience.
var type = match.Groups["type"].Value.ToLowerInvariant();
switch (type)
{
case "vb":
Console.WriteLine(word + "v");
Verbs.Add(word);
break;
case "n":
Console.WriteLine(word + "n");
Nouns.Add(word);
break;
case "adv":
Console.WriteLine(word + "adv");
Adverbs.Add(word);
break;
case "adj":
Console.WriteLine(word + "adj");
Adjectives.Add(word);
break;
case "men":
Console.WriteLine(word + "men");
Mentions.Add(word);
break;
case "knk":
Console.WriteLine(word + "obj");
Objects.Add(word);
break;
case "kt":
Console.WriteLine(word + "ft");
Features.Add(word);
break;
}
}