在c#中将大量文件合并到文件中的最快方法是什么?

时间:2016-04-19 05:24:10

标签: c# .net file-io merge

我处理大文件(其容量至少为500MB)以c#分割和合并。

我必须将文件拆分为数千个文件,将这些文件分类到某些组中,然后按每个组合并这些文件。

最小文件数为10,000。

我使用Stream.CopyTo()方法实现了merge函数。这是主要部分。

using (Stream writer = File.OpenWrite(outputFilePath))
{
      int fileNum = filePaths.Count();
      for (int i = 0; i < fileNum; i++)
      {
           using (Stream reader = File.OpenRead(filePaths.ElementAt(i)))
           { reader.CopyTo(writer); }
       }
}

我已经测试了我的程序,将500MB分成2组的17000个文件,并将每组8500个文件合并为一个文件。

合并部分大约需要80秒。我认为与分割相同的文件相比,它需要大约15~20秒

有没有比我的代码更快的方法?

3 个答案:

答案 0 :(得分:1)

也许尝试压缩文件?

using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.IO.Compression;

class Program {
    static void SaveCompressedFile(string filename, string data) {
        FileStream fileStream = new FileStream(filename, FileMode.Create, FileAccess.Write);
        GZipStream compressionStream = new GZipStream(fileStream, CompressionMode.Compress);
        StreamWriter writer = new StreamWriter(compressionStream);
        writer.Write(data);
        writer.Close();
    }

    static string LoadCompressedFile(string filename) {
        FileStream fileStream = new FileStream(filename, FileMode.Open, FileAccess.Read);
        GZipStream compressionStream = new GZipStream(fileStream, CompressionMode.Decompress);
        StreamReader reader = new StreamReader(compressionStream);
        string data = reader.ReadToEnd();
        reader.Close();
        return data;
    }

    static void Main(string[] args) {
        try {
            string filename = "compressedFile.txt";
            string sourceString = "Source String";
            SaveCompressedFile(filename, sourceString);
            FileInfo compressedFileData = new FileInfo(filename);
            string recoveredString = LoadCompressedFile(filename);
        } catch (IOException ex) {
            Console.WriteLine(ex.ToString());
        }
    }
}

Source

另请参阅压缩目录的示例。

using System;
using System.Text;
using System.IO;
using System.IO.Compression;

namespace CmprDir
{
  class Program
  {
    delegate void ProgressDelegate(string sMessage);

    static void CompressFile(string sDir, string sRelativePath, GZipStream zipStream)
    {
      //Compress file name
      char[] chars = sRelativePath.ToCharArray();
      zipStream.Write(BitConverter.GetBytes(chars.Length), 0, sizeof(int));
      foreach (char c in chars)
        zipStream.Write(BitConverter.GetBytes(c), 0, sizeof(char));

      //Compress file content
      byte[] bytes = File.ReadAllBytes(Path.Combine(sDir, sRelativePath));
      zipStream.Write(BitConverter.GetBytes(bytes.Length), 0, sizeof(int));
      zipStream.Write(bytes, 0, bytes.Length);
    }

    static bool DecompressFile(string sDir, GZipStream zipStream, ProgressDelegate progress)
    {
      //Decompress file name
      byte[] bytes = new byte[sizeof(int)];
      int Readed = zipStream.Read(bytes, 0, sizeof(int));
      if (Readed < sizeof(int))
        return false;

      int iNameLen = BitConverter.ToInt32(bytes, 0);
      bytes = new byte[sizeof(char)];
      StringBuilder sb = new StringBuilder();
      for (int i = 0; i < iNameLen; i++)
      {
        zipStream.Read(bytes, 0, sizeof(char));
        char c = BitConverter.ToChar(bytes, 0);
        sb.Append(c);
      }
      string sFileName = sb.ToString();
      if (progress != null)
        progress(sFileName);

      //Decompress file content
      bytes = new byte[sizeof(int)];
      zipStream.Read(bytes, 0, sizeof(int));
      int iFileLen = BitConverter.ToInt32(bytes, 0);

      bytes = new byte[iFileLen];
      zipStream.Read(bytes, 0, bytes.Length);

      string sFilePath = Path.Combine(sDir, sFileName);
      string sFinalDir = Path.GetDirectoryName(sFilePath);
      if (!Directory.Exists(sFinalDir))
        Directory.CreateDirectory(sFinalDir);

      using (FileStream outFile = new FileStream(sFilePath, FileMode.Create, FileAccess.Write, FileShare.None))
        outFile.Write(bytes, 0, iFileLen);

      return true;
    }

    static void CompressDirectory(string sInDir, string sOutFile, ProgressDelegate progress)
    {
      string[] sFiles = Directory.GetFiles(sInDir, "*.*", SearchOption.AllDirectories);
      int iDirLen = sInDir[sInDir.Length - 1] == Path.DirectorySeparatorChar ? sInDir.Length : sInDir.Length + 1;

      using (FileStream outFile = new FileStream(sOutFile, FileMode.Create, FileAccess.Write, FileShare.None))
      using (GZipStream str = new GZipStream(outFile, CompressionMode.Compress))
        foreach (string sFilePath in sFiles)
        {
          string sRelativePath = sFilePath.Substring(iDirLen);
          if (progress != null)
            progress(sRelativePath);
          CompressFile(sInDir, sRelativePath, str);
        }
    }

    static void DecompressToDirectory(string sCompressedFile, string sDir, ProgressDelegate progress)
    {
      using (FileStream inFile = new FileStream(sCompressedFile, FileMode.Open, FileAccess.Read, FileShare.None))
      using (GZipStream zipStream = new GZipStream(inFile, CompressionMode.Decompress, true))
        while (DecompressFile(sDir, zipStream, progress));
    }

    public static int Main(string[] argv) 
    {
      if (argv.Length != 2)
      {
        Console.WriteLine("Usage: CmprDir.exe <in_dir compressed_file> | <compressed_file out_dir>");
        return 1;
      }

      string sDir;
      string sCompressedFile;
      bool bCompress = false;
      try
      {
        if (Directory.Exists(argv[0]))
        {
          sDir = argv[0];
          sCompressedFile = argv[1];
          bCompress = true;
        }
        else
          if (File.Exists(argv[0]))
          {
            sCompressedFile = argv[0];
            sDir = argv[1];
            bCompress = false;
          }
          else
          {
            Console.Error.WriteLine("Wrong arguments");
            return 1;
          }

        if (bCompress)
          CompressDirectory(sDir, sCompressedFile, (fileName) => { Console.WriteLine("Compressing {0}...", fileName); });
        else
          DecompressToDirectory(sCompressedFile, sDir, (fileName) => { Console.WriteLine("Decompressing {0}...", fileName); });

        return 0;
      }
      catch (Exception ex)
      {
        Console.Error.WriteLine(ex.Message);
        return 1;
      }
    }
  }
}

Source

答案 1 :(得分:1)

您的代码看起来不错,但ElementAt是代码味道。将其转换为数组并使用[i]代替。如果你有10K元素我很肯定你浪费了很多时间。

答案 2 :(得分:0)

为什么不直接使用Stream.CopyTo()方法?

     private static void CombineMultipleFilesIntoSingleFile(string inputDirectoryPath, string inputFileNamePattern, string outputFilePath)
{
    string[] inputFilePaths = Directory.GetFiles(inputDirectoryPath, inputFileNamePattern);
    Console.WriteLine("Number of files: {0}.", inputFilePaths.Length);
    using (var outputStream = File.Create(outputFilePath))
    {
        foreach (var inputFilePath in inputFilePaths)
        {
            using (var inputStream = File.OpenRead(inputFilePath))
            {
                // Buffer size can be passed as the second argument.
                inputStream.CopyTo(outputStream);
            }
            Console.WriteLine("The file {0} has been processed.", inputFilePath);
        }
    }
}

OR

分块进行:

const int chunkSize = 2 * 1024; // 2KB
var inputFiles = new[] ;
using (var output = File.Create("output.dat"))
{
    foreach (var file in inputFiles)
    {
        using (var input = File.OpenRead(file))
        {
            var buffer = new byte[chunkSize];
            int bytesRead;
            while ((bytesRead = input.Read(buffer, 0, buffer.Length)) > 0)
            {
                output.Write(buffer, 0, bytesRead);
            }
        }
    }
}
相关问题