我试图编写一个将文件拆分为固定大小的块的方法,但我无法超越 2147483590 的限制( 整数创建缓冲区时每个块的.MaxValue - 57 )因为Byte
构造函数只接受整数。
我已经读过其他S.O.的建议。回答谈到创建小块(例如:100 mb),然后附加块以获得真正期望的GB块大小,但我不知道这是正确的方式还是如何&# 34;附加"大块。
Somone可以帮助我?,这就是我所做的:
Public Sub SplitFile(ByVal InputFile As String,
ByVal ChunkSize As Long,
Optional ByVal ChunkName As String = Nothing,
Optional ByVal ChunkExt As String = Nothing,
Optional ByVal Overwrite As Boolean = False)
' FileInfo instance of the input file.
Dim fInfo As New IO.FileInfo(InputFile)
' The total amount of chunks to create.
Dim ChunkCount As Integer = CInt(Math.Floor(fInfo.Length / ChunkSize))
' The remaining bytes of the last chunk.
Dim LastChunkSize As Long = fInfo.Length - (ChunkCount * ChunkSize)
' The Buffer to read the chunks.
Dim ChunkBuffer As Byte() = New Byte(ChunkSize - 1L) {}
' The Buffer to read the last chunk.
Dim LastChunkBuffer As Byte() = New Byte(LastChunkSize - 1L) {}
' A zero-filled string to enumerate the chunk files.
Dim Zeros As String = String.Empty
' The given filename for each chunk.
Dim ChunkFile As String = String.Empty
' The chunk file basename.
ChunkName = If(String.IsNullOrEmpty(ChunkName),
IO.Path.Combine(fInfo.DirectoryName, IO.Path.GetFileNameWithoutExtension(fInfo.Name)),
IO.Path.Combine(fInfo.DirectoryName, ChunkName))
' The chunk file extension.
ChunkExt = If(String.IsNullOrEmpty(ChunkExt),
fInfo.Extension.Substring(1I),
ChunkExt)
' If ChunkSize is bigger than filesize then...
If ChunkSize >= fInfo.Length Then
Throw New OverflowException("'ChunkSize' should be smaller than the Filesize.")
Exit Sub
' ElseIf ChunkSize > 2147483590I Then ' (Integer.MaxValue - 57)
' Throw New OverflowException("'ChunkSize' limit exceeded.")
' Exit Sub
End If ' ChunkSize <>...
' If not file-overwrite is allowed then...
If Not Overwrite Then
For ChunkIndex As Integer = 0I To (ChunkCount)
Zeros = New String("0", CStr(ChunkCount).Length - CStr(ChunkIndex + 1).Length)
' If chunk file already exists then...
If IO.File.Exists(String.Format("{0}.{1}.{2}", ChunkName, Zeros & CStr(ChunkIndex + 1I), ChunkExt)) Then
Throw New IO.IOException(String.Format("File already exist: {0}", ChunkFile))
Exit Sub
End If ' IO.File.Exists
Next ChunkIndex
End If ' Overwrite
' Open the file to start reading bytes.
Using InputStream As New IO.FileStream(fInfo.FullName, IO.FileMode.Open)
Using BinaryReader As New IO.BinaryReader(InputStream)
BinaryReader.BaseStream.Seek(0L, IO.SeekOrigin.Begin)
For ChunkIndex As Integer = 0I To ChunkCount
Zeros = New String("0", CStr(ChunkCount).Length - CStr(ChunkIndex + 1).Length)
ChunkFile = String.Format("{0}.{1}.{2}", ChunkName, Zeros & CStr(ChunkIndex + 1I), ChunkExt)
If ChunkIndex <> ChunkCount Then ' Read the ChunkSize bytes.
InputStream.Position = (ChunkSize * CLng(ChunkIndex))
BinaryReader.Read(ChunkBuffer, 0I, ChunkSize)
Else ' Read the remaining bytes of the LastChunkSize.
InputStream.Position = (ChunkSize * ChunkIndex) + 1
BinaryReader.Read(LastChunkBuffer, 0I, LastChunkSize)
End If ' ChunkIndex <> ChunkCount
' Create the chunk file to Write the bytes.
Using OutputStream As New IO.FileStream(ChunkFile, IO.FileMode.Create)
Using BinaryWriter As New IO.BinaryWriter(OutputStream)
If ChunkIndex <> ChunkCount Then
BinaryWriter.Write(ChunkBuffer)
Else
BinaryWriter.Write(LastChunkBuffer)
End If
OutputStream.Flush()
End Using ' BinaryWriter
End Using ' OutputStream
' Report the progress...
' RaiseEvent ProgressChanged(CDbl((100I / ChunkCount) * ChunkIndex))
Next ChunkIndex
End Using ' BinaryReader
End Using ' InputStream
End Sub
答案 0 :(得分:2)
重新考虑你的方法。要分割文件,您只需要一个小缓冲区。最多以1MB的块读写。不需要更多。使用您的方法,您可以同时在RAM中缓冲2GB,但不需要缓冲整个块。只需跟踪读取和写入每个文件的总字节数。
从技术上讲,你可以使用单字节缓冲区,但效率很低。
如果您真的想要调整性能,请尝试使用循环缓冲区或具有独立读写线程的独立缓冲区来重叠IO,以便您可以并行读写。一旦你的读取填充一个缓冲区,你可以让一个写线程开始写它,而你的读线程继续另一个缓冲区。我们的想法是消除串行锁定步骤&#34;使用单个缓冲区。
答案 1 :(得分:2)
正如我在评论中写的那样,您可以将数据写入块,直到它们的大小足够大。在循环中使用较小的缓冲区(我从您的问题中获取了一些代码部分)完成读取,同时计算已经写入了多少字节。
' Open the file to start reading bytes.
Using InputStream As New IO.FileStream(fInfo.FullName, IO.FileMode.Open)
Using BinaryReader As New IO.BinaryReader(InputStream)
Dim OneMegabyte As Integer = 1024 * 1024 'Defines length of one MB
'Account for cases where a chunksize smaller than one MegaByte is requested
Dim BufferSize As Integer
If ChunkSize < OneMegabyte Then
BufferSize = CInt(ChunkSize)
Else
BufferSize = OneMegabyte
End If
Dim BytesWritten As Long = 0 'Counts the length of the current file
Dim ChunkIndex As Integer = 0 'Keep track of the number of chunks
While InputStream.Position < InputStream.Length
ChunkFile = String.Format("{0}.{1}.{2}", ChunkName, Zeros & CStr(ChunkIndex + 1I), ChunkExt) 'Define filename
BytesWritten = 0 'Reset length counter
' Create the chunk file to Write the bytes.
Using OutputStream As New IO.FileStream(ChunkFile, IO.FileMode.Create)
Using BinaryWriter As New IO.BinaryWriter(OutputStream)
While BytesWritten < ChunkSize AndAlso InputStream.Position < InputStream.Length 'Read until you have reached the end of the input
Dim ReadBytes() As Byte = BinaryReader.ReadBytes(BufferSize) 'Read one megabyte
BinaryWriter.Write(ReadBytes) 'Write this megabyte
BytesWritten += ReadBytes.Count 'Increment size counter
End While
OutputStream.Flush()
End Using ' BinaryWriter
End Using ' OutputStream
ChunkIndex += 1 'Increment file counter
End While
End Using ' BinaryReader
End Using ' InputStream