此程序将搜索的文件通常非常大(最多12 GB)。我当前的搜索方法(读取一行,搜索它,将其添加到结果文件中,如果它是一个命中)对于合理大小的文件(几MB左右)非常有效。使用我的“大”测试文件(~2.5 GB),搜索文件大约需要12分钟。


  • 异步方法
  • 任务
  • TPL数据流
  • 这些方法的某些组合


'Searches the loaded file
    Private Sub searchBtn_Click(sender As Object, e As EventArgs) Handles searchBtn.Click
        Dim strFileName As String
        Dim didWork As Integer
        Dim searchHits As Integer
        Dim watch As Stopwatch = Stopwatch.StartNew()

        'Prompts user to enter title of file to be created
        exportFD.Title = "Save as. . ."
        exportFD.Filter = "Text Files(*.txt)|*.txt" 'Limits user to only saving as .txt file

        If didWork = DialogResult.Cancel Then 'Handles if Cancel Button is clicked
            strFileName = exportFD.FileName
            Dim writer As New IO.StreamWriter(strFileName, False) 
            Dim reader As New IO.StreamReader(filepath)
            Dim currentLine As String

            'Skip first line of SOURCE text file for search, but use it to write column headers to file
            currentLine = reader.ReadLine()
            Dim columnLine = currentLine.Split(vbTab)

            'First: Insert column names into NEW text file
            For col As Integer = 0 To colCount - 1
                writer.Write(columnLine(col) & vbTab)

            'Search whole file, line by line
            Do While reader.Peek() > 0
                'next line
                currentLine = reader.ReadLine()

                'new function:
                If validChromosome(currentLine) Then
                    searchHits += 1
                End If

            'Close out writer and reader and tell user file was saved
            searchTxtB.Text = searchHits.ToString()
            MsgBox("Searched in: " + watch.Elapsed.ToString() + " and saved to: " + strFileName)
        End If

    End Sub

    'This function searches through the current line and checks if it follows what the user has searched for
    Private Function validChromosome(chromString As String) As Boolean

        'Split line by delimiter
        Dim readRow() As String = Split(chromString, vbTab)
        validChromosome = True 'Start off as true

        Dim rowLength As Integer = readRow.Length - 1

        'Iterate through string tokens and compare 
        For token As Integer = 0 To rowLength
                Dim currentGroupBox As GroupBox = criteriaPanel.Controls.Item(token)
                Dim checkedParameter As CheckBox = currentGroupBox.Controls("CheckBox")

                'User wants to search this parameter
                If checkedParameter.Checked = True Then
                    Dim numericRadio As RadioButton = currentGroupBox.Controls("NumericRadio")

                    'Searching by number
                    If numericRadio.Checked = True Then
                        Dim value As Decimal
                        Dim lowerBox As NumericUpDown = currentGroupBox.Controls("NumericBoxLower")
                        Dim upperBox As NumericUpDown = currentGroupBox.Controls("NumericBoxUpper")

                        Dim lowerInclusiveCheck As CheckBox = currentGroupBox.Controls("NumericInclusiveLowerCheckBox")
                        Dim upperInclusiveCheck As CheckBox = currentGroupBox.Controls("NumericInclusiveUpperCheckBox")

                        'Try to convert the text to a decimal. 
                        If Not Decimal.TryParse(readRow(token), value) Then
                            validChromosome = False
                            Exit For
                        End If

                       'Not within the given range user inputted for numeric search
                        If Not withinRange(value, lowerBox.Value, upperBox.Value, lowerInclusiveCheck.Checked, upperInclusiveCheck.Checked) Then
                            validChromosome = False
                            Exit For
                        End If

                    Else 'Searching by text
                        Dim textBox As TextBox = currentGroupBox.Controls("TextBox")

                        'If the comparison failed, then this chromosome is not valid. Break out of loop and return false.
                        If Not [String].Equals(readRow(token), textBox.Text.ToString(), StringComparison.OrdinalIgnoreCase) Then

                            validChromosome = False
                            Exit For

                        End If
                    End If

                End If

            Catch ex As Exception

                'Simple error checking.
                validChromosome = False
                Exit For

            End Try

    End Function

    'Function to check if value safely in betweeen two values
    Private Function withinRange(value As Decimal, lower As Decimal, upper As   Decimal, inclusiveLower As Boolean, inclusiveUpper As Boolean) As Boolean
        withinRange = False
        Dim lowerCheck As Boolean = False
        Dim upperCheck As Boolean = False

        If inclusiveLower Then
            lowerCheck = value >= lower
            lowerCheck = value > lower
        End If

        If inclusiveUpper Then
            upperCheck = value <= upper
            upperCheck = value < upper
        End If

        withinRange = lowerCheck And upperCheck

    End Function



TPL Dataflow似乎非常适合,特别是因为它很容易支持async

