Public Function fileToColHarvest(ByRef stream As Scripting.TextStream, Optional ByRef limit As Integer = 2000000, Optional ByRef unique As Boolean = False, Optional ByRef FirstSectionAsKey As Boolean = False, Optional ByRef prob As Double = 1) As Generic.List(Of String)
Dim buffer As String
Dim i As Integer
If prob < 1 Then
End If
fileToColHarvest = New Generic.List(Of String)
Do While (Not (stream.AtEndOfStream))
i = i + 1
System.Windows.Forms.Application.DoEvents()
'If Microsoft.VisualBasic.Rnd < 0.01 Then
' appendToTextFile CStr(fileToColHarvest.Count) + "|" + microsoft.visualbasic.str(i) + "|" + buffer, RESULT, ForWriting
'End If
buffer = stream.ReadLine
'buffer = Microsoft.VisualBasic.Replace(buffer, " ", "+")
If Microsoft.VisualBasic.Rnd() < prob Then
If unique Then
If Not FirstSectionAsKey Then
fileToColHarvest.AddIfNotExist(buffer)
Else
fileToColHarvest.AddIfNotExist(buffer)
End If
Else
fileToColHarvest.Add(buffer)
End If
End If
If fileToColHarvest.Count() >= limit Then
Exit Do
End If
Loop
End Function
基本上我想摆脱Scripting.TextStream。
另外,我想逐行阅读文本
答案 0 :(得分:3)
您可以使用StreamReader
和异步ReadLineAsync
方法
异步方法将取代“丑陋”Application.DoEvents()
Public Async Function FileToColHarvest(
pathToFile As String,
limit As Integer,
isUnique As Boolean,
isFirstSectionAsKey As Boolean,
prob As Single) As Task(Of List(Of String))
Dim lines = New List(Of String)()
Dim uniqueLines = New HashSet(Of String)()
Using stream As New FileStream(pathToFile, FileMode.Open)
Using reader As New StreamReader(stream)
While reader.EndOfStream = False
'Await will prevent blocking UI thread
var line = Await reader.ReadLineAsync()
If prob < VBMath.Rnd() Then Continue While
' I have removed check of isFirstSectionAsKey
' because based on your code it does same thing
If isUnique Then
uniqueLines.Add(line)
If uniqueLines.Count >= limit Then Return uniqueLines.ToList()
Else
lines.Add(line)
If lines.Count >= limit Then Return lines
End If
End While
End Using
End Using
Return If(isUnique, uniqueLines.ToList(), lines)
End Function
不相关,但isUnique
参数将此方法划分为两个不同的逻辑 - 所以我建议不要使用参数引入两种不同的方法
FileToColHarvest(...)
FileToColHarvestWithUniqueOnly(...)