我必须开发一个实用程序,它接受包含多个大约200 MB的日志/文本文件的文件夹的路径,然后遍历所有文件,从它们存在的行中选择四个元素。
我尝试了多种解决方案,所有解决方案都适用于较小的文件但是当我加载更大的文件时,Windows窗体只是挂起或显示" OutOfMemory Exception"。请帮忙
解决方案1:
string textFile;
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
if (!string.IsNullOrWhiteSpace(fbd.SelectedPath))
{
string[] files = Directory.GetFiles(fbd.SelectedPath);
System.Windows.Forms.MessageBox.Show("Files found: " + files.Length.ToString(), "Message");
foreach (string fileName in files)
{
textFile = File.ReadAllText(fileName);
MatchCollection mc = Regex.Matches(textFile, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
Soltuion 2:
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath))
{
const Int32 BufferSize = 512;
using (var fileStream = File.OpenRead(file))
using (var streamReader = new StreamReader(fileStream, Encoding.UTF8, true, BufferSize))
{
String line;
while ((line = streamReader.ReadLine()) != null)
{
MatchCollection mc = Regex.Matches(line, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
解决方案3:
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
using (StreamReader r = new StreamReader(file))
{
try
{
string line = String.Empty;
while (!r.EndOfStream)
{
line = r.ReadLine();
MatchCollection mc = Regex.Matches(line, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
答案 0 :(得分:0)
很少有事情需要照顾
Path.Text += ...
。我假设这只是一个测试代码,希望应该被抛出File.ReadLines
调用,在您的案例中文件阅读速度没有实际差异以下是实施上述指南的示例代码
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
var buf = new List<string>();
var re2 = new Regex(re1, RegexOptions.Compiled);
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath)) {
foreach (var line in File.ReadLines(file)) {
if ((indx = line.IndexOf('-')) == -1 || line.IndexOf(':', indx + 1) == -1)
continue;
MatchCollection mc = re2.Matches(line);
foreach (Match m in mc) {
string a = m.ToString();
buf.Add(a + Environment.NewLine); //Temporary, Just to check the output
}
}
}
答案 1 :(得分:0)
您的“路径”调试可能会连接大量的字符串。将其更改为StringBuilder而不是+ =连接,以查看是否是导致内存问题的原因
有没有看过MS Log Parser 2.2替代方法?