我处理大文件(其容量至少为500MB)以c#分割和合并。
我必须将文件拆分为数千个文件,将这些文件分类到某些组中,然后按每个组合并这些文件。
最小文件数为10,000。
我使用Stream.CopyTo()方法实现了merge函数。这是主要部分。
using (Stream writer = File.OpenWrite(outputFilePath))
{
int fileNum = filePaths.Count();
for (int i = 0; i < fileNum; i++)
{
using (Stream reader = File.OpenRead(filePaths.ElementAt(i)))
{ reader.CopyTo(writer); }
}
}
我已经测试了我的程序,将500MB分成2组的17000个文件,并将每组8500个文件合并为一个文件。
合并部分大约需要80秒。我认为与分割相同的文件相比,它需要大约15~20秒
有没有比我的代码更快的方法?
答案 0 :(得分:1)
也许尝试压缩文件?
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.IO.Compression;
class Program {
static void SaveCompressedFile(string filename, string data) {
FileStream fileStream = new FileStream(filename, FileMode.Create, FileAccess.Write);
GZipStream compressionStream = new GZipStream(fileStream, CompressionMode.Compress);
StreamWriter writer = new StreamWriter(compressionStream);
writer.Write(data);
writer.Close();
}
static string LoadCompressedFile(string filename) {
FileStream fileStream = new FileStream(filename, FileMode.Open, FileAccess.Read);
GZipStream compressionStream = new GZipStream(fileStream, CompressionMode.Decompress);
StreamReader reader = new StreamReader(compressionStream);
string data = reader.ReadToEnd();
reader.Close();
return data;
}
static void Main(string[] args) {
try {
string filename = "compressedFile.txt";
string sourceString = "Source String";
SaveCompressedFile(filename, sourceString);
FileInfo compressedFileData = new FileInfo(filename);
string recoveredString = LoadCompressedFile(filename);
} catch (IOException ex) {
Console.WriteLine(ex.ToString());
}
}
}
另请参阅压缩目录的示例。
using System;
using System.Text;
using System.IO;
using System.IO.Compression;
namespace CmprDir
{
class Program
{
delegate void ProgressDelegate(string sMessage);
static void CompressFile(string sDir, string sRelativePath, GZipStream zipStream)
{
//Compress file name
char[] chars = sRelativePath.ToCharArray();
zipStream.Write(BitConverter.GetBytes(chars.Length), 0, sizeof(int));
foreach (char c in chars)
zipStream.Write(BitConverter.GetBytes(c), 0, sizeof(char));
//Compress file content
byte[] bytes = File.ReadAllBytes(Path.Combine(sDir, sRelativePath));
zipStream.Write(BitConverter.GetBytes(bytes.Length), 0, sizeof(int));
zipStream.Write(bytes, 0, bytes.Length);
}
static bool DecompressFile(string sDir, GZipStream zipStream, ProgressDelegate progress)
{
//Decompress file name
byte[] bytes = new byte[sizeof(int)];
int Readed = zipStream.Read(bytes, 0, sizeof(int));
if (Readed < sizeof(int))
return false;
int iNameLen = BitConverter.ToInt32(bytes, 0);
bytes = new byte[sizeof(char)];
StringBuilder sb = new StringBuilder();
for (int i = 0; i < iNameLen; i++)
{
zipStream.Read(bytes, 0, sizeof(char));
char c = BitConverter.ToChar(bytes, 0);
sb.Append(c);
}
string sFileName = sb.ToString();
if (progress != null)
progress(sFileName);
//Decompress file content
bytes = new byte[sizeof(int)];
zipStream.Read(bytes, 0, sizeof(int));
int iFileLen = BitConverter.ToInt32(bytes, 0);
bytes = new byte[iFileLen];
zipStream.Read(bytes, 0, bytes.Length);
string sFilePath = Path.Combine(sDir, sFileName);
string sFinalDir = Path.GetDirectoryName(sFilePath);
if (!Directory.Exists(sFinalDir))
Directory.CreateDirectory(sFinalDir);
using (FileStream outFile = new FileStream(sFilePath, FileMode.Create, FileAccess.Write, FileShare.None))
outFile.Write(bytes, 0, iFileLen);
return true;
}
static void CompressDirectory(string sInDir, string sOutFile, ProgressDelegate progress)
{
string[] sFiles = Directory.GetFiles(sInDir, "*.*", SearchOption.AllDirectories);
int iDirLen = sInDir[sInDir.Length - 1] == Path.DirectorySeparatorChar ? sInDir.Length : sInDir.Length + 1;
using (FileStream outFile = new FileStream(sOutFile, FileMode.Create, FileAccess.Write, FileShare.None))
using (GZipStream str = new GZipStream(outFile, CompressionMode.Compress))
foreach (string sFilePath in sFiles)
{
string sRelativePath = sFilePath.Substring(iDirLen);
if (progress != null)
progress(sRelativePath);
CompressFile(sInDir, sRelativePath, str);
}
}
static void DecompressToDirectory(string sCompressedFile, string sDir, ProgressDelegate progress)
{
using (FileStream inFile = new FileStream(sCompressedFile, FileMode.Open, FileAccess.Read, FileShare.None))
using (GZipStream zipStream = new GZipStream(inFile, CompressionMode.Decompress, true))
while (DecompressFile(sDir, zipStream, progress));
}
public static int Main(string[] argv)
{
if (argv.Length != 2)
{
Console.WriteLine("Usage: CmprDir.exe <in_dir compressed_file> | <compressed_file out_dir>");
return 1;
}
string sDir;
string sCompressedFile;
bool bCompress = false;
try
{
if (Directory.Exists(argv[0]))
{
sDir = argv[0];
sCompressedFile = argv[1];
bCompress = true;
}
else
if (File.Exists(argv[0]))
{
sCompressedFile = argv[0];
sDir = argv[1];
bCompress = false;
}
else
{
Console.Error.WriteLine("Wrong arguments");
return 1;
}
if (bCompress)
CompressDirectory(sDir, sCompressedFile, (fileName) => { Console.WriteLine("Compressing {0}...", fileName); });
else
DecompressToDirectory(sCompressedFile, sDir, (fileName) => { Console.WriteLine("Decompressing {0}...", fileName); });
return 0;
}
catch (Exception ex)
{
Console.Error.WriteLine(ex.Message);
return 1;
}
}
}
}
答案 1 :(得分:1)
您的代码看起来不错,但ElementAt
是代码味道。将其转换为数组并使用[i]
代替。如果你有10K元素我很肯定你浪费了很多时间。
答案 2 :(得分:0)
为什么不直接使用Stream.CopyTo()方法?
private static void CombineMultipleFilesIntoSingleFile(string inputDirectoryPath, string inputFileNamePattern, string outputFilePath)
{
string[] inputFilePaths = Directory.GetFiles(inputDirectoryPath, inputFileNamePattern);
Console.WriteLine("Number of files: {0}.", inputFilePaths.Length);
using (var outputStream = File.Create(outputFilePath))
{
foreach (var inputFilePath in inputFilePaths)
{
using (var inputStream = File.OpenRead(inputFilePath))
{
// Buffer size can be passed as the second argument.
inputStream.CopyTo(outputStream);
}
Console.WriteLine("The file {0} has been processed.", inputFilePath);
}
}
}
OR
分块进行:
const int chunkSize = 2 * 1024; // 2KB
var inputFiles = new[] ;
using (var output = File.Create("output.dat"))
{
foreach (var file in inputFiles)
{
using (var input = File.OpenRead(file))
{
var buffer = new byte[chunkSize];
int bytesRead;
while ((bytesRead = input.Read(buffer, 0, buffer.Length)) > 0)
{
output.Write(buffer, 0, bytesRead);
}
}
}
}