我正在尝试编写一个电子邮件主题行解析器,用户可以在其中定义自己的解析规则。规则将匹配主题行上的成员名称,然后将其用作查找。 catch是成员名称可能包含解析规则分隔符。
// This rule has defined that any text between > matches a member name.
// Note the user can make up any parsing rule so this is just an example.
string sampleRule = ">{member}>";
// left out parsing code. We have figured out we are looking
// for member and the prefix/postfix delimiters.
string prefix = ">";
string postfix = ">";
// note that member>Name3 is a valid member name
string subject = "Subject>memberName1>memberName2>member>Name3>EndSubject";
string pattern = "(?="+prefix+"([a-z].+?)"+postfix+")";
Match m = Regex.Match(subject, pattern);
while(m.Success) {
// this is a possible member name
Console.WriteLine(m.Groups[1].ToString());
m = m.NextMatch();
}
// the output needs to be
// memberName1
// memberName2
// Member>Name3
// It is currently
// memberName1
// memberName2
// member
// Note that spanning bad matches are ok, for example
// memberName1>memberName2 or memberName1>memberName2>member>Name3
答案 0 :(得分:0)
这是一个使用常规正则表达式和递归的脆弱尝试:
static class Program
{
static void Main(string[] args)
{
string prefix = ">";
string suffix = ">";
string subject =
"Subject>memberName1>memberName2>member>Name3>EndSubject";
var result = Find(subject, true, prefix, suffix).ToList();
result.ForEach(item =>
{
Console.WriteLine(item);
});
/* The output is:
memberName1>memberName2
member>Name3 *match
memberName1 *match
memberName2 *match
member
Name3
*/
}
private static IEnumerable<string> Find(
string subject,
bool toggle,
string prefix,
string suffix)
{
string
r1 = @"(?<=" + prefix + @")(?>([\w]*(" + prefix +
"|" + suffix + @")[\w]*))(?=" + suffix + ")",
r2 = @"[\w]*";
var temp = Regex.Matches(subject, toggle ?
r1 : r2
)
.Cast<Match>()
.ToList();
return temp.SelectMany(m =>
temp
.Select(i => i.Value)
.Union(Find(m.Value, !toggle, prefix, suffix)))
.Where(s => !String.IsNullOrEmpty(s))
.Distinct();
}
}
注意:我不确定在您的示例中,>
中的member>Name3
是否被视为前缀或后缀。
[编辑]这是另一种不使用正则表达式的方法。考虑到>
中的member>Name3
可能是前缀或后缀:
var separators = new[] { prefix, suffix };
var firstResult = separators
.SelectMany(s => subject
.Split(separators,StringSplitOptions.RemoveEmptyEntries)
.Skip(1)
.Reverse()
.Skip(1)
.Reverse())
.Distinct()
.ToList();
var result = firstResult
.Zip(firstResult.Skip(1), (a, b) =>
{
var l = new List<string>();
separators.ToList().ForEach(s =>
{
l.Add(String.Format("{0}{1}{2}", a, s, b));
});
return l;
})
.SelectMany(s => s)
.Union(firstResult)
.ToList();