我有一个简单的程序,它读取.txt文件,然后将其拆分为许多文件" pMaxRows"行数。这些.txt文件很大 - 有些接近25Gb。现在它的运行速度不够快,我觉得应该有办法通过一次读/写多行来提高效率,但我对vb.net streamreader / streamwriter不是很有经验。
代码如下:
Public Sub Execute(ByVal pFileLocation As String, _
ByVal pMaxRows As Int32)
Dim sr As IO.StreamReader
Dim Row As String
Dim SourceRowCount As Int64
Dim TargetRowCount As int64
Dim TargetFileNumber As Int32
''Does the file exist in that location?
If IO.File.Exists(pFileLocation) = False Then
Throw New Exception("File does not exist at " & pFileLocation)
End If
''Split FileLocation into FileName and Folder Location
Dim arrFileLoc() As String = pFileLocation.Split("\")
Dim i As Integer = arrFileLoc.Length - 1
Dim FileName As String = arrFileLoc(i)
Dim FileLocationLength As Integer = pFileLocation.Length
Dim FileNameLength As Integer = FileName.Length
Dim Folder As String = pFileLocation.Remove(FileLocationLength - FileNameLength, FileNameLength)
''Read the file
sr = New IO.StreamReader(pFileLocation)
SourceRowCount = 0
TargetRowCount = 0
TargetFileNumber = 1
''Create First Target File Name
Dim TargetFileName As String
TargetFileName = TargetFileNumber & "_" & FileName
''Open streamreader and start reading lines
Do While Not sr.EndOfStream
''if it hits the target number of rows:
If (TargetRowCount = pMaxRows) Then
''Advance target file number
TargetFileNumber += 1
''Create New file with target file number
TargetFileName = TargetFileNumber & "_" & FileName
''Set target row count back to 0
TargetRowCount = 0
End If
''Read line
Row = sr.ReadLine()
''Write line
Using sw As New StreamWriter(Folder & TargetFileName, True)
sw.WriteLine(Row)
End Using
SourceRowCount += 1
TargetRowCount += 1
Loop
End Sub
有人有什么建议吗?如果以前已经回答过,那么即使把我带到正确的地方也会非常感激