在下面的功能中(效果很好),我现在面临的挑战是让它不仅返回匹配的位置,而且匹配是什么......代码:
txtFilePattern是一个以管道分隔的文件扩展名列表。 txtKeywords是我正在寻找的关键字的多行文本框 txtPatterns与txtKeywords相同,但对于正则表达式模式。
这是我自己对C#Grep的小实验。
private List<Tuple<String, Int32, String>> ScanDocuments2()
{
Regex searchPattern = new Regex(@"$(?<=\.(" + txtFilePattern.Text + "))", RegexOptions.IgnoreCase);
string[] keywordtext = txtKeywords.Lines;
List<string> keywords = new List<string>();
List<Regex> patterns = new List<Regex>();
for (int i = 0; i < keywordtext.Length; i++)
{
if (keywordtext[i].Length > 0)
{
keywords.Add(keywordtext[i]);
}
}
string[] patterntext = txtPatterns.Lines;
for (int j = 0; j < patterntext.Length; j++)
{
if (patterntext[j].Length > 0)
{
patterns.Add(new Regex(patterntext[j]));
}
}
try
{
var files = Directory.EnumerateFiles(txtSelectedDirectory.Text, "*.*", SearchOption.AllDirectories).Where(f => searchPattern.IsMatch(f));
//fileCount = files.Count();
var lines = files.Aggregate(
new List<Tuple<String, Int32, String>>(),
(accumulator, file) =>
{
fileCount++;
using (var reader = new StreamReader(file))
{
var counter = 0;
String line;
while ((line = reader.ReadLine()) != null)
{
if (keywords.Any(keyword => line.ToLower().Contains(keyword.ToLower())) || patterns.Any(pattern => pattern.IsMatch(line)))
{
//cleans up the file path for grid
string tmpfile = file.Replace(txtSelectedDirectory.Text, "..");
accumulator.Add(Tuple.Create(tmpfile, counter, line));
}
counter++;
}
}
return accumulator;
},
accumulator => accumulator
);
return lines;
}
catch (UnauthorizedAccessException UAEx)
{
Console.WriteLine(UAEx.Message);
throw UAEx;
}
catch (PathTooLongException PathEx)
{
Console.WriteLine(PathEx.Message);
throw PathEx;
}
}
问题是 - 如何确定哪个关键字或模式与我返回的元组匹配?
答案 0 :(得分:1)
如何引入新变量来保存匹配模式,并使用FirstOrDefault
代替Any
。然后,只要新变量不是null
,你就拥有匹配的模式,并且你可以在你的元组中返回它。
e.g。
...
new List<Tuple<String, Int32, String, Regex>>()
...
while ((line = reader.ReadLine()) != null)
{
Regex matchingReg = patterns.FirstOrDefault(pattern => pattern.IsMatch(line));
if (keywords.Any(keyword => line.ToLower().Contains(keyword.ToLower())) || matchingReg != null)
{
//cleans up the file path for grid
string tmpfile = file.Replace(txtSelectedDirectory.Text, "..");
accumulator.Add(Tuple.Create(tmpfile, counter, line, matchingReg));
}
counter++;
}
...
答案 1 :(得分:1)
这是一些重构的代码。肯尼斯有正确的想法。
private IEnumerable<LineMatch> ScanDocuments2()
{
string[] keywordtext = txtKeywords.Lines;
string[] patterntext = txtPatterns.Lines;
Regex searchPattern = GetSearchPattern();
var keywords = GetKeywords(keywordtext).ToList();
var patterns = GetPatterns(patterntext).ToList();
try
{
var files = GetFiles(searchPattern);
var lines = files.Aggregate(
new List<LineMatch>(),
(accumulator, file) =>
{
foreach(var item in EnumerateFile(file, keywords, patterns))
{
accumulator.Add(item);
}
return accumulator;
},
accumulator => accumulator
);
return lines;
}
catch (UnauthorizedAccessException UAEx)
{
Console.WriteLine(UAEx.Message);
throw;
}
catch (PathTooLongException PathEx)
{
Console.WriteLine(PathEx.Message);
throw;
}
}
private LineMatch EnumerateFile(string file, IEnumerable<string> keywords, IEnumerable<Regex> patterns)
{
var counter = 0;
foreach(var line in File.ReadLines(file))
{
var matchingRegex = patterns.FirstOrDefault(p => p.IsMatch(line));
var keyword = keywords.FirstOrDefault(k => line.ToLower().Contains(k.ToLower()));
if(keyword == null && matchingRegex == null) continue;
string tmpfile = file.Replace(txtSelectedDirectory.Text, "..");
yield return new LineMatch
{
Counter = counter,
File = tmpfile,
Line = line,
Pattern = matchingRegex == null ? null : matchingRegex.Pattern,
Keyword = keyword
};
counter++;
}
}
private IEnumerable<string> GetFiles(Regex searchPattern)
{
return Directory.EnumerateFiles(txtSelectedDirectory.Text, "*.*", SearchOption.AllDirectories).Where(f => searchPattern.IsMatch(f));
}
private IEnumerable<string> GetKeywords(IEnumerable<string> keywordtext)
{
foreach(var keyword in keywordtext)
{
if(keyword.Length <= 0) continue;
yield return keyword;
}
}
private IEnumerable<string> GetPatterns(IEnumerable<string> patterntext)
{
foreach(var pattern in patterntext)
{
if(pattern.Length <= 0) continue;
yield return new Regex(pattern);
}
}
private Regex GetSearchPattern()
{
return new Regex(string.Format(@"$(?<=\.({0}))", txtFilePattern.Text), RegexOptions.IgnoreCase);
}
public class LineMatch
{
public int Counter { get; set; }
public string File { get; set; }
public string Line { get; set; }
public string Pattern { get; set; }
public string Keyword { get; set; }
}