使用多线程或Parallel.ForEach加速搜索文件

时间:2016-11-11 17:58:26

标签: vb.net multithreading parallel.foreach

我写了一个代码来搜索文件和文件夹,并且(检查插入的单词的所有可能组合)我有一个sub,它给出了插入字符串的所有排列。

我的问题是,我要为每个排列的字符串重复代码(对于4个单词,这意味着24次),并且我尝试使用MultiThreading来加速代码。

我已经阅读了很多例子,但由于很多原因,我无法真正理解逻辑(有些例子是在C中;任何例子都是用不同的逻辑编写的)

我已经尝试了

Parallel.For  
Parallel.ForEach 
ThreadPool

但在将List(包含所有结果)设置为列表框的数据源之前,我无法等待所有线程。

我的代码逻辑是:
通过拆分搜索字符串获取单词
如果搜索类型是"任何顺序的所有单词"然后我得到了所有的排列 我开始搜索每个排列的字符串

我不想在问题中添加太多代码,但我认为在这种情况下有必要了解我的工作方式:

Private Sub Btn_Search_Click(sender As Object, e As EventArgs) Handles Btn_Search.Click
    Select Case True
        Case RBtn_Exact.Checked
            StartSearch(Me.TB_Pattern.Text.Trim)
        Case RBtn_AllInOrder.Checked
            Dim Pattern As String = ""
            For Each Word As String In Me.TB_Pattern.Text.Split(New Char() {" "c})
                If Word.Trim <> "" Then Pattern &= "*" & Word.Trim
            Next
            Pattern &= "*"
            StartSearch(Pattern)
            endsearch()
        Case RBtn_AllWithoutOrder.Checked
            Dim WordHash As New HashSet(Of String)
            For Each Word As String In Split(Me.TB_Pattern.Text, " ")
                If Word.Trim <> "" Then WordHash.Add(Word.Trim)
            Next
            If WordHash.Count > 5 Then
                MessageBox.Show("Max 5 words allowed for this kind of search", MessageBoxButtons.OK, MessageBoxIcon.Exclamation)
                Exit Sub
            End If
            'Get permutations into an array
            StringPermutations()
            'I need to add "*" at the end of each permutated string
            For S As Integer = 0 To PermutationsArr.Length - 1
                PermutationsArr(S) &= "*"
            Next
            'This is for searching without MultiThreading
            For Each Pattern As String In PermutationsArr
                StartSearch(Pattern)
            Next
            'This is my last test
            'Parallel.ForEach(PermutationsArr,
            '                    Sub(Pattern)
            '                        StartSearch(Pattern)
            '                    End Sub
            '                 )
            'Task.WaitAll()
            endsearch()
        Case RBtn_AnyWord.Checked
            Dim WordHash As New HashSet(Of String)
            For Each Word As String In Split(Me.TB_Pattern.Text, " ")
                If Word.Trim <> "" Then WordHash.Add(Word.Trim)
            Next
            If WordHash.Count > 5 Then
                MessageBox.Show("Max 5 words allowed for this kind of search", MessageBoxButtons.OK, MessageBoxIcon.Exclamation)
                Exit Sub
            End If
            For Each Word As String In WordHash
                StartSearch(pattern:="*" & Word & "*")
            Next
            endsearch()
    End Select
End Sub

Private Sub StartSearch(ByVal pattern As String)
    'Search for files
    If Me.CBox_Files.Checked Then
        FileSearch(Me.TB_StartFolder.Text, pattern)
    End If
    'Search for folders
    If Me.CBox_Folders.Checked Then
        ProcessDir(Me.TB_StartFolder.Text, pattern)

        DirSearch(Me.TB_StartFolder.Text, pattern)
    End If
End Sub

Sub endsearch()
    Me.Btn_Search.Text = "Start"
    Me.Btn_Search.BackColor = Me.BackColor
    If Me.LB_Files.Items.Count > 0 Then
        Me.Lbl_FilesFound.Text = Me.LB_Files.Items.Count.ToString
        Me.Lbl_FilesFound.Visible = True
    End If
    If Me.LB_Folders.Items.Count > 0 Then
        Me.Lbl_DirFound.Text = Me.LB_Folders.Items.Count.ToString
        Me.Lbl_DirFound.Visible = True
    End If
End Sub

Sub DirSearch(ByVal sDir As String, ByVal Pattern As String)
    Try
        For Each Dir As String In Directory.GetDirectories(sDir)
            Try
                For Each D As String In Directory.GetDirectories(Dir, Pattern)
                    Try
                        If LimitReached(LB_Folders) Then
                            Me.Lbl_LimitReached.Visible = True
                            Exit Sub
                        Else
                            If Me.CBox_Folders.Checked AndAlso Not LB_Folders.Items.Contains(D) Then LB_Folders.Items.Add(D)
                        End If
                    Catch ex As Exception
                        Continue For
                    End Try
                Next
                DirSearch(Dir, Pattern)
            Catch ex As Exception
                Continue For
            End Try
        Next
    Catch ex As Exception
    End Try
End Sub
Sub FileSearch(ByVal sDir As String, ByVal Pattern As String)
    Dim d As String = ""
    Try
        For Each f As String In Directory.GetFiles(sDir, Pattern)
            Try
                If LimitReached(LB_Files) Then
                    Me.Lbl_LimitReached.Visible = True
                    Exit Sub
                Else
                    If Me.CBox_LastModRange.Checked Then
                        If Me.CBox_Files.Checked AndAlso IntoRangeDate(f) AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
                    Else
                        If Me.CBox_Files.Checked AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
                    End If
                End If
            Catch ex As Exception
                Continue For
            End Try
        Next
        'Search for subfolders
        For Each d In Directory.GetDirectories(sDir)
            Try
                ProcessDir(d, Pattern)
            Catch ex As Exception
            End Try
            Try
                FileSearch(d, Pattern)
            Catch ex As Exception
            End Try
        Next
    Catch excpt As System.Exception
    End Try
End Sub

Private Sub ProcessDir(d As String, ByVal Pattern As String)
    Try
        For Each f As String In Directory.GetFiles(d, Pattern)
            Try
                If LimitReached(LB_Files) Then
                    Me.Lbl_LimitReached.Visible = True
                    Exit Sub
                Else
                    If Me.CBox_LastModRange.Checked Then
                        If Me.CBox_Files.Checked AndAlso IntoRangeDate(f) AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
                    Else
                        If Me.CBox_Files.Checked AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
                    End If
                End If
            Catch ex As Exception
                Continue For
            End Try
        Next
    Catch ex As System.Exception
    End Try
    Try
        For Each d In Directory.GetDirectories(d, Pattern)
            Try
                If Me.CBox_Folders.Checked AndAlso Not LB_Folders.Items.Contains(d) Then LB_Folders.Items.Add(d)
            Catch ex As Exception
                Continue For
            End Try
        Next
    Catch ex As Exception
    End Try
End Sub

修改
在我的代码下面获取排列(我知道它有一个特定的逻辑,但它工作,它似乎足够快):

Private Sub StringPermutations()
    Try
        Dim WordHash As New HashSet(Of String)
        For Each Word As String In Split(Me.TB_Pattern.Text, " ")
            If Word.Trim <> "" Then WordHash.Add(Word.Trim)
        Next
        Dim WordList As List(Of String) = WordHash.ToList
        ReDim PermutationsArr(Factorial(WordList.Count) - 1)
        AddString(WordList, 0)
    Catch ex As Exception
        MsgBox(ex.ToString)
    End Try
End Sub

Private Function Factorial(ByVal Num As Integer) As Integer
    Try
        If Num > 0 AndAlso Num < 12 Then
            Dim Result As Int32 = 1
            Do
                Result *= Num
                Num -= 1
            Loop Until Num <= 1
            Return Result
        Else
            Return 0
        End If
    Catch ex As Exception
        Return Nothing
    End Try
End Function

Private Sub AddString(ByVal WordList As List(Of String), ByVal StartId As Integer)
    Try
        Dim InsLoop As Integer = Factorial(WordList.Count - 1)
        If InsLoop = 0 Then InsLoop = 1
        For Each Word As String In WordList
            For InsWord As Integer = 1 To InsLoop
                PermutationsArr(StartId + InsWord - 1) &= "*" & Word
            Next
            If WordList.Count > 1 Then
                Dim Remaining As New List(Of String)
                For Each RemWord As String In WordList
                    If RemWord <> Word Then Remaining.Add(RemWord)
                Next
                AddString(Remaining, StartId)
            End If
            StartId += InsLoop
        Next
    Catch ex As Exception
        MsgBox(ex.ToString)
    End Try
End Sub

1 个答案:

答案 0 :(得分:1)

这是我的Form类,基于你的,但大大简化了。我使用Tasks进行多线程处理,ConcurrentDictionarys使用容量限制,并发级别和没有重复项来捕获结果,并在最后一次调用中填充Listbox,以最小化UI更新和相关的缓慢。并发级别是为了提供ConcurrentDictionary而生成的任务数。

function quickPrevious(e) {
   var pageToGoTo = (currentPage.page - 2) <= 0 ? 0 : (currentPage.page - 2);
   table.page(pageToGoTo).draw(false);
}

递归可以避免.Net的Imports System.Text.RegularExpressions Public Class SearchForm Private FoldersList As Concurrent.ConcurrentDictionary(Of String, Object) Private FilesList As Concurrent.ConcurrentDictionary(Of String, Object) Private Tasks As New List(Of Task) Private Words As New List(Of String) Private StopWatch As New Stopwatch ' Capacity of the ConcurrentDictionary objects ' Set this from user input on form to limit # of results returned Private Capacity As Int32 = 0 Private PermutationsArr() As String = Nothing Private Sub Btn_Search_Click(sender As Object, e As EventArgs) Handles Btn_Search.Click Btn_Search.Text = "Wait" ' Capacity of the ConcurrentDictionary objects ' Set this from user input on form to limit # of results returned Capacity = 10000 Tasks.Clear() Words.Clear() LB_Folders.DataSource = Nothing LB_Files.DataSource = Nothing Me.Refresh() StopWatch.Restart() Words.AddRange(Regex.Split(Regex.Replace(Me.TB_Pattern.Text.Trim, "\*", String.Empty), "\s+")) Select Case True Case String.IsNullOrWhiteSpace(Me.TB_Pattern.Text.Trim) MsgBox("Too few words", vbOKOnly, "Oops") Case Words.Count < 1 MsgBox("Too few words", vbOKOnly, "Oops") Case Words.Count > 5 MsgBox("Too many words", vbOKOnly, "Oops") Case Me.CBox_LastModRange.Checked AndAlso Me.DT_ModRangeEnd.Value < Me.DT_ModRangeStart.Value MsgBox("Range Start must precede Range End", vbOKOnly, "Oops") Case Me.RBtn_Exact.Checked FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity) FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity) With Join(Words.ToArray) If Me.CBox_Folders.Checked Then ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True SearchFolders(Me.TB_StartFolder.Text, .ToString, True) Else ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders If Me.CBox_Files.Checked Then SearchFiles(Me.TB_StartFolder.Text, .ToString, True, True) End If End If End With Case Me.RBtn_AllInOrder.Checked FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity) FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity) With String.Format("*{0}*", Join(Words.ToArray, "*")) If Me.CBox_Folders.Checked Then ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True SearchFolders(Me.TB_StartFolder.Text, .ToString, True) Else ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders If Me.CBox_Files.Checked Then SearchFiles(Me.TB_StartFolder.Text, .ToString, True, True) End If End With Case Me.RBtn_AllWithoutOrder.Checked StringPermutations() ' Math.Min caps the concurrency level at 40 FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(Math.Min(40, PermutationsArr.Count), Capacity) FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(Math.Min(40, PermutationsArr.Count), Capacity) For Each Pattern As String In PermutationsArr If Me.CBox_Folders.Checked Then ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True SearchFolders(Me.TB_StartFolder.Text, Pattern, True) 'Tasks.Add(Task.Run(Sub() SearchFolders(Me.TB_StartFolder.Text, Pattern))) Else ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders If Me.CBox_Files.Checked Then SearchFiles(Me.TB_StartFolder.Text, Pattern, True, True) End If Next Case Me.RBtn_AnyWord.Checked FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(Words.Count, Capacity) FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(Words.Count, Capacity) For Each Word In Words With String.Format("*{0}*", Word) If Me.CBox_Folders.Checked Then ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True SearchFolders(Me.TB_StartFolder.Text, .ToString, True) Else ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders If Me.CBox_Files.Checked Then SearchFiles(Me.TB_StartFolder.Text, .ToString, True, True) End If End With Next End Select Task.WaitAll(Tasks.ToArray) Debug.Print("Tasks Completed in {0}", StopWatch.Elapsed.ToString) Debug.Print("Adding {0} Folders", FoldersList.Keys.Count.ToString) Me.LB_Folders.DataSource = FoldersList.Keys Debug.Print("Adding {0} Files", FilesList.Keys.Count.ToString) Me.LB_Files.DataSource = FilesList.Keys Btn_Search.Text = "Search" End Sub Private Sub SearchFolders(FolderPath As String, Pattern As String, Optional FirstCall As Boolean = False) Try Dim Folders() As String = IO.Directory.GetDirectories(FolderPath) For Each Folder As String In Folders Dim SubFolders() As String = IO.Directory.GetDirectories(Folder, Pattern) For Each SubFolder As String In SubFolders Select Case True Case Not FilesList.Count < Capacity Exit For Case Not Me.CBox_LastModRange.Checked FoldersList.TryAdd(SubFolder, Nothing) Case FolderInModRange(Folder) FoldersList.TryAdd(SubFolder, Nothing) End Select Next If Me.CBox_Files.Checked Then ' Do NOT call this with Recursive = True from here! SearchFiles(Folder, Pattern) End If If FirstCall Then ' Perform multithreaded Recursion Tasks.Add(Task.Run(Sub() SearchFolders(Folder, Pattern))) Else ' Perform deep recursion within task thread...don't branch further SearchFolders(Folder, Pattern) End If Next Catch ex As UnauthorizedAccessException ' Access Denied Catch ex As Exception Debug.Print("SearchFiles: {0}", ex.ToString) End Try End Sub Private Sub SearchFiles(FolderPath As String, Pattern As String, Optional Recursive As Boolean = False, Optional FirstCall As Boolean = False) ' Recursive and FirstCall should only be True if NOT doing SearchFolders ' Recursive should only be True if called from the main thread or this method to continue the deep dive ' FirstCall should only be True if called from the main thread Try For Each Filename As String In IO.Directory.GetFiles(FolderPath, Pattern) Select Case True Case Not FilesList.Count < Capacity Exit For Case Not Me.CBox_LastModRange.Checked FilesList.TryAdd(Filename, Nothing) Case FileInModRange(Filename) FilesList.TryAdd(Filename, Nothing) End Select Next If Recursive Then Try Dim Folders() As String = IO.Directory.GetDirectories(FolderPath) For Each Folder As String In Folders If FirstCall Then ' Perform multithreaded Recursion Tasks.Add(Task.Run(Sub() SearchFiles(Folder, Pattern, Recursive))) Else ' Perform deep recursion within task thread...don't branch further SearchFiles(Folder, Pattern, Recursive) End If Next Catch ex As Exception ' Access Denied - Does this happen? Debug.Print("Recursive FolderPath: {0}", ex.Message) End Try End If Catch ex As UnauthorizedAccessException ' Access Denied Catch ex As Exception Debug.Print("SearchFiles: {0}", ex.ToString) End Try End Sub Private Function FolderInModRange(Folder As String) As Boolean Try With New IO.DirectoryInfo(Folder) Select Case True Case .LastWriteTime < Me.DT_ModRangeStart.Value Return False Case .LastWriteTime > Me.DT_ModRangeEnd.Value Return False Case Else Return True End Select End With Catch ex As Exception Debug.Print("FolderInModRange: {0}{1}{2}", Folder, Environment.NewLine, ex.ToString) End Try ' Only if exception is thrown Return False End Function Private Function FileInModRange(Filename As String) As Boolean Try With New IO.FileInfo(Filename) Select Case True Case .LastWriteTime < Me.DT_ModRangeStart.Value Return False Case .LastWriteTime > Me.DT_ModRangeEnd.Value Return False Case Else Return True End Select End With Catch ex As IO.PathTooLongException ' Path Too Long Catch ex As Exception Debug.Print("FileInModRange: {0}{1}{2}", Filename, Environment.NewLine, ex.ToString) End Try ' Only if exception is thrown Return False End Function End Class UnauthorizedAccessException方法在遇到用户无权访问的文件夹时生成的GetDirectories错误。

参考文献: