我需要尽快打开并阅读数千个文件。
我在13 592个文件上运行了一些测试,发现方法1比方法2快一些。这些文件通常在800字节到4kB之间。我想知道我能做些什么来使这个I / O绑定过程更快?
Method 1:
Run 1: 3:05 (don't know what happened here)
Run 2: 1:55
Run 3: 2:06
Run 4: 2:02
Method 2:
Run 1: 2:04
Run 2: 2:08
Run 3: 2:04
Run 4: 2:12
以下是代码:
public class FileOpenerUtil
{
/// <summary>
///
/// </summary>
/// <param name="fullFilePath"></param>
/// <returns></returns>
public static string ReadFileToString(string fullFilePath)
{
while (true)
{
try
{
//Methode 1
using (StreamReader sr = File.OpenText(fullFilePath))
{
string fullMessage = "";
string s;
while ((s = sr.ReadLine()) != null)
{
fullMessage += s + "\n";
}
return RemoveCarriageReturn(fullMessage);
}
//Methode 2
/*using (File.Open(fullFilePath, FileMode.Open, FileAccess.Read, FileShare.Read))
{
Console.WriteLine("Output file {0} ready.", fullFilePath);
string[] lines = File.ReadAllLines(fullFilePath);
//Every new line under the previous line
string fullMessage = lines.Aggregate("", (current, s) => current + s + "\n");
return RemoveCarriageReturn(fullMessage);
//ninject kernel
}*/
//Methode 3
}
catch (FileNotFoundException ex)
{
Console.WriteLine("Output file {0} not yet ready ({1})", fullFilePath, ex.Message);
}
catch (IOException ex)
{
Console.WriteLine("Output file {0} not yet ready ({1})", fullFilePath, ex.Message);
}
catch (UnauthorizedAccessException ex)
{
Console.WriteLine("Output file {0} not yet ready ({1})", fullFilePath, ex.Message);
}
}
}
/// <summary>
/// Verwijdert '\r' in een string sequence
/// </summary>
/// <param name="message">The text that has to be changed</param>
/// <returns>The changed text</returns>
private static string RemoveCarriageReturn(string message)
{
return message.Replace("\r", "");
}
}
我正在阅读的文件是.HL7文件,如下所示:
MSH | ^〜\&安培; | |||| OAZIS || 20150430235954 ADT ^ A03 | 23669166 | P | |||||| 2.3 ASCII EVN | A03 | 20150430235954 |||| 201504302359 PID | 1 || 6001144000 || LastName ^ FirstName ^^^ Mevr。| LastName ^ FirstName | 19600114 | F ||| GStreetName Number ^^ City ^^ PostalCode ^ B ^ H || 09/3444556 ^^ PH~0476519246echtg ^ ^ CP || NL | M || 28783409 ^^^^ VN | 0000000000 | |||||| 60011402843 ||||乙ñ PD1 |||| 003847 ^名字^姓|||||||| |||Ñ0 PV1 | 1 | O | FDAG ^ 000 ^ 053 ^ 001 ^ 0 ^ 2 | NULL || FDAG ^ 000 ^ 053 ^ 001 | 003847 ^名字^姓|| 006813 ^名字^姓| 1900 | 00 ||||| | 006813 ^名字^姓| 0 | 28783409 ^^^^ VN | 1 ^ 20150430 | 01 ||||||||||||||| 1 | 1 || d ||||| 201504301336 | 201504302359 OBX | 1 | CE | KIND_OF_DIS | RCM | 1 ^ 1 Op medisch建议 OBX | 2 | CE | DESTINATION_DIS | RCM | 1 ^ 1 Terug naar huis
打开文件后,我用j4jayant's HL7 parser解析字符串并关闭文件。
答案 0 :(得分:6)
我使用了50,000个不同大小的文件(500到1024字节)。
测试1 :您的方法1 File.ReadAllLines(fullFilePath)
秒:3,4658937968113
测试2 :您的方法2 File.ReadAllText(fullFilePath);
秒:5,5008349279222
测试3 :BinaryReader b = new BinaryReader; b.ReadString();
秒:3,30782645637133
测试4 :Windows FileReader
秒:5,85779941381009
测试5 :StreamReader sr = File.OpenText(fullFilePath); sr.ReadToEnd();
(https://msdn.microsoft.com/en-us/library/2d9wy99d.aspx)
秒:3,07036554759848
测试6 :StreamReader sr = File.OpenText(fullFilePath); sr.ReadToEnd();
秒:3,31464109255517
测试7 :StreamReader sr = File.OpenText(fullFilePath); sr.ReadLine();
秒:3,3364683664508
测试8 :Parallel.For using code File.ReadAllText(fullFilePath);
秒:3,40426888695317
测试9 :FileStream + BufferedStream + StreamReader
秒:4,02871911079061
测试10 :File.ReadAllText(fullFilePath);
秒:0,89543632235447
最佳测试结果测试5 和测试3 (单线程)
测试3 正在使用:Windows FileReader
测试5 使用int maxFiles = 50000;
int j = 0;
Parallel.For(0, maxFiles, x =>
{
Util.Method1("readtext_" + j + ".txt"); // your read method
j++;
});
(https://msdn.microsoft.com/en-us/library/2d9wy99d.aspx))
如果你可以使用线程测试10 是目前为止最快的。
示例:
StreamReader sr = File.OpenText(fullFilePath);
sr.ReadLine();
使用RAMMap清空备用列表时:
测试1 :您的方法1 File.ReadAllLines(fullFilePath)
秒:15,1785750622961
测试2 :您的方法2 File.ReadAllText(fullFilePath);
秒:17,650864469466
测试3 :BinaryReader b = new BinaryReader; b.ReadString();
秒:14,8985912878328
测试4 :Windows FileReader
秒:18,1603815767866
测试5 :StreamReader sr = File.OpenText(fullFilePath); sr.ReadToEnd();
秒:14,5059765845334
测试6 :StreamReader sr = File.OpenText(fullFilePath); sr.ReadToEnd();
秒:14,8649786336991
测试7 :StreamReader sr = File.OpenText(fullFilePath); sr.ReadLine();
秒:14,830567197641
测试8 :Parallel.For() using code File.ReadAllText(fullFilePath);
秒:14,9965866575751
测试9 :FileStream + BufferedStream + StreamReader
秒:15,7336450516575
测试10 :{{1}}
秒:4,11343060325439
答案 1 :(得分:1)
我已经应用了评论中的所有代码。方法1似乎仍然是最快的。
public class FileOpenerUtil
{
/// <summary>
///
/// </summary>
/// <param name="fullFilePath"></param>
/// <returns></returns>
public static string ReadFileToString(string fullFilePath)
{
while (true)
{
try
{
//Method 1
using (StreamReader sr = File.OpenText(fullFilePath))
{
string s;
StringBuilder message = new StringBuilder();
while ((s = sr.ReadLine()) != null)
{
message.Append(s).Append("\n");
}
return RemoveCarriageReturn(message.ToString());
}
//Method 2
/*
string[] lines = File.ReadAllLines(fullFilePath);
string fullMessage = lines.Aggregate("", (current, s) => current + s + "\n");
return RemoveCarriageReturn(fullMessage);*/
}
//Method 3
/*
string s = File.ReadAllText(fullFilePath);
return RemoveCarriageReturn(s);*/
}
catch (FileNotFoundException ex)
{
Console.WriteLine("Output file {0} not yet ready ({1})", fullFilePath, ex.Message);
}
catch (IOException ex)
{
Console.WriteLine("Output file {0} not yet ready ({1})", fullFilePath, ex.Message);
}
catch (UnauthorizedAccessException ex)
{
Console.WriteLine("Output file {0} not yet ready ({1})", fullFilePath, ex.Message);
}
}
}
/// <summary>
/// Verwijdert '\r' in een string sequence
/// </summary>
/// <param name="message">The text that has to be changed</param>
/// <returns>The changed text</returns>
private static string RemoveCarriageReturn(string message)
{
return message.Replace("\r", "");
}
}