优化子目录中的递归文件搜索速度?

时间:2015-05-28 15:41:49

标签: performance vba excel-vba excel

我正在寻找使用Excel宏以递归方式搜索文件模式的子目录的最快的方法。 Excel VBA似乎相当缓慢。

到目前为止我尝试过的事情(有些基于其他stackoverflow建议):

  • 独家使用Dir来递归子目录并在每个文件夹中搜索filepattern。 (最慢)
  • 使用Folder.Files集合迭代FileSystemObject文件夹,针对filepattern检查每个文件。 (更好,但仍然很慢)
  • 通过FileSystemObject文件夹迭代,然后使用Dir检查文件模式的每个文件夹(目前为止速度最快,但每个文件仍需要几秒钟,如果可能的话我想优化)

我查看了My.Computer.FileSystem.GetFiles,这看起来很完美(允许您指定通配符模式并使用单个命令搜索子文件夹) - 但它似乎不受支持我只能在VB中使用Excel VBA中的Excel VBA。

我目前正在使用下面的FindFile Sub,它目前具有最佳性能。如果有人有关于如何进一步改进这一点的建议,我将非常感激!

Option Explicit
Private Declare Function GetTickCount Lib "kernel32" () As Long


Function Recurse(sPath As String, targetName As String) As String

    Dim FSO As New FileSystemObject
    Dim myFolder As Folder
    Dim mySubFolder As Folder
    Dim myFile As File

    On Error Resume Next
    Set myFolder = FSO.GetFolder(sPath)
    If Err.Number <> 0 Then
        MsgBox "Error accessing " & sPath & ". The macro will abort."
        Err.Clear
        Exit Function
    End If
    On Error GoTo 0

    Dim foundFolderPath As String
    Dim foundFileName As String
    foundFolderPath = ""
    foundFileName = ""

    For Each mySubFolder In myFolder.SubFolders

        foundFileName = Dir(mySubFolder.Path & "\" & targetName & "*")
        If foundFileName <> vbNullString Then
            foundFolderPath = mySubFolder.Path & "\" & foundFileName
        End If

        If foundFolderPath <> vbNullString Then
            Recurse = foundFolderPath
            Exit Function
        End If

        foundFolderPath = Recurse(mySubFolder.Path, targetName)

        If foundFolderPath <> vbNullString Then
            Recurse = foundFolderPath
            Exit Function
        End If
    Next

End Function


Sub FindFile()

    Dim start As Long
    start = GetTickCount()

    Dim targetName As String
    Dim targetPath As String
    targetName = Range("A1").Value 'Target file name without extension
    targetPath = "C:\Example\" & Range("B1").Value 'Subfolder name

    Dim target As String
    target = Recurse(targetPath, targetName)

    Dim finish As Long
    finish = GetTickCount()

    MsgBox "found: " & target & vbNewLine & vbNewLine & (finish - start) & " milliseconds"

End Sub

基于已接受的答案更新了文件搜索功能

此版本的FindFile()的执行速度是我在上述问题中最初粘贴的方法的两倍。正如下面的帖子中所讨论的,这应该适用于32位或64位版本的Excel 2010及更新版本。

Option Explicit

Private Declare PtrSafe Function FindClose Lib "kernel32" (ByVal hFindFile As LongPtr) As Long
Private Declare PtrSafe Function FindFirstFileW Lib "kernel32" (ByVal lpFileName As LongPtr, ByVal lpFindFileData As LongPtr) As LongPtr
Private Declare PtrSafe Function FindNextFileW Lib "kernel32" (ByVal hFindFile As LongPtr, ByVal lpFindFileData As LongPtr) As LongPtr

Private Type FILETIME
  dwLowDateTime  As Long
  dwHighDateTime As Long
End Type

Const MAX_PATH  As Long = 260
Const ALTERNATE As Long = 14

' Can be used with either W or A functions
' Pass VarPtr(wfd) to W or simply wfd to A
Private Type WIN32_FIND_DATA
  dwFileAttributes As Long
  ftCreationTime   As FILETIME
  ftLastAccessTime As FILETIME
  ftLastWriteTime  As FILETIME
  nFileSizeHigh    As Long
  nFileSizeLow     As Long
  dwReserved0      As Long
  dwReserved1      As Long
  cFileName        As String * MAX_PATH
  cAlternate       As String * ALTERNATE
End Type

Private Const FILE_ATTRIBUTE_DIRECTORY As Long = 16 '0x10
Private Const INVALID_HANDLE_VALUE As LongPtr = -1

Function Recurse(folderPath As String, fileName As String)
    Dim fileHandle    As LongPtr
    Dim searchPattern As String
    Dim foundPath     As String
    Dim foundItem     As String
    Dim fileData      As WIN32_FIND_DATA

    searchPattern = folderPath & "\*"

    foundPath = vbNullString
    fileHandle = FindFirstFileW(StrPtr(searchPattern), VarPtr(fileData))
    If fileHandle <> INVALID_HANDLE_VALUE Then
        Do
            foundItem = Left$(fileData.cFileName, InStr(fileData.cFileName, vbNullChar) - 1)

            If foundItem = "." Or foundItem = ".." Then 'Skip metadirectories
            'Found Directory
            ElseIf fileData.dwFileAttributes And FILE_ATTRIBUTE_DIRECTORY Then
                foundPath = Recurse(folderPath & "\" & foundItem, fileName)
            'Found File
            'ElseIf StrComp(foundItem, fileName, vbTextCompare) = 0 Then 'these seem about equal
            ElseIf InStr(1, foundItem, fileName, vbTextCompare) > 0 Then 'for performance
                foundPath = folderPath & "\" & foundItem
            End If

            If foundPath <> vbNullString Then
                Recurse = foundPath
                Exit Function
            End If

        Loop While FindNextFileW(fileHandle, VarPtr(fileData))
    End If

    'No Match Found
    Recurse = vbNullString
End Function

Sub FindFile()

    Dim targetName As String
    Dim targetPath As String
    targetName = Range("A4").Value
    targetPath = "C:\Example\" & Range("B4").Value

    Dim target As String
    target = Recurse(targetPath, targetName)

    MsgBox "found: " & target

End Sub

3 个答案:

答案 0 :(得分:0)

使用FindFirstFile或FindFirstFileEx。内置的本机API的执行速度比VBA快得多。

答案在于stackoverflow:https://stackoverflow.com/a/3865850/2250183
如该答案中所述,您可以在此处找到示例代码:http://www.xtremevbtalk.com/showpost.php?p=1157418&postcount=4

更新了64位支持的示例

此代码适用于Excel 2010及更高版本的64位和32位。它不适用于早期版本的Excel。如果您打算使用64位版本,我建议您阅读documentation on 64 bit support in VBA。该文档还说明了如何添加对早期版本的Excel的支持。

Option Explicit

Private Declare PtrSafe Function FindClose Lib "kernel32" (ByVal hFindFile As LongPtr) As Long
Private Declare PtrSafe Function FindFirstFileW Lib "kernel32" (ByVal lpFileName As LongPtr, ByVal lpFindFileData As LongPtr) As LongPtr
Private Declare PtrSafe Function FindNextFileW Lib "kernel32" (ByVal hFindFile As LongPtr, ByVal lpFindFileData As LongPtr) As LongPtr

Private Type FILETIME
  dwLowDateTime  As Long
  dwHighDateTime As Long
End Type

Const MAX_PATH  As Long = 260
Const ALTERNATE As Long = 14

' Can be used with either W or A functions
' Pass VarPtr(wfd) to W or simply wfd to A
Private Type WIN32_FIND_DATA
  dwFileAttributes As Long
  ftCreationTime   As FILETIME
  ftLastAccessTime As FILETIME
  ftLastWriteTime  As FILETIME
  nFileSizeHigh    As Long
  nFileSizeLow     As Long
  dwReserved0      As Long
  dwReserved1      As Long
  cFileName        As String * MAX_PATH
  cAlternate       As String * ALTERNATE
End Type

Private Const INVALID_HANDLE_VALUE As LongPtr = -1

Private Sub Form_Load()
  Dim hFile     As LongPtr
  Dim sFileName As String
  Dim wfd       As WIN32_FIND_DATA

  sFileName = "c:\*.*" ' Can be up to 32,767 chars

  hFile = FindFirstFileW(StrPtr(sFileName), VarPtr(wfd))

  If hFile <> INVALID_HANDLE_VALUE Then
    Do While FindNextFileW(hFile, VarPtr(wfd))
      Debug.Print Left$(wfd.cFileName, InStr(wfd.cFileName, vbNullChar) - 1)
    Loop

    FindClose hFile
  End If
End Sub

答案 1 :(得分:0)

我遇到了类似的性能问题,我解决了上面建议的win API函数,我的问题与你的问题略有不同,因为我不需要递归搜索目录树,我只是将文件名从给定文件夹中拉出来集合,但你可以调整我的代码:

'for windows API call to FindFirstFileEx
Private Const INVALID_HANDLE_VALUE = -1
Private Const MAX_PATH = 260

Private Type FILETIME
    dwLowDateTime   As Long
    dwHighDateTime  As Long
End Type

Private Type WIN32_FIND_DATA
    dwFileAttributes    As Long
    ftCreationTime      As FILETIME
    ftLastAccessTime    As FILETIME
    ftLastWriteTime     As FILETIME
    nFileSizeHigh       As Long
    nFileSizeLow        As Long
    dwReserved0         As Long
    dwReserved1         As Long
    cFileName           As String * MAX_PATH
    cAlternate          As String * 14
End Type

Private Const FIND_FIRST_EX_CASE_SENSITIVE  As Long = 1
'MSDN: "This value is not supported until Windows Server 2008 R2 and Windows 7."
Private Const FIND_FIRST_EX_LARGE_FETCH     As Long = 2

Private Enum FINDEX_SEARCH_OPS
    FindExSearchNameMatch
    FindExSearchLimitToDirectories
    FindExSearchLimitToDevices
End Enum

Private Enum FINDEX_INFO_LEVELS
    FindExInfoStandard
    FindExInfoBasic 'MSDN: "This value is not supported until Windows Server 2008 R2 and Windows 7."
    FindExInfoMaxInfoLevel
End Enum

Private Declare Function FindFirstFileEx Lib "kernel32" Alias "FindFirstFileExA" ( _
ByVal lpFileName As String, ByVal fInfoLevelId As Long, lpFindFileData As WIN32_FIND_DATA, _
    ByVal fSearchOp As Long, ByVal lpSearchFilter As Long, ByVal dwAdditionalFlags As Long) As Long
Private Declare Function FindNextFile Lib "kernel32" Alias "FindNextFileA" ( _
    ByVal hFindFile As Long, lpFindFileData As WIN32_FIND_DATA) As Long
Private Declare Function FindClose Lib "kernel32" (ByVal hFindFile As Long) As Long


Private Function GetFileNames(ByVal sPath As String) As Collection

    Dim fileInfo    As WIN32_FIND_DATA  'buffer for file info
    Dim hFile       As Long             'file handle
    Dim colFiles    As New Collection

    sPath = sPath & "*.*"

    hFile = FindFirstFileEx(sPath & vbNullChar, FindExInfoBasic, fileInfo, FindExSearchNameMatch, 0&, FIND_FIRST_EX_LARGE_FETCH)

    If hFile <> INVALID_HANDLE_VALUE Then
        Do While FindNextFile(hFile, fileInfo)
            colFiles.Add Left(fileInfo.cFileName, InStr(fileInfo.cFileName, vbNullChar) - 1)
        Loop

        FindClose hFile
    End If

    Set GetFileNames = colFiles

End Function

答案 2 :(得分:0)

我在这个问题上花了几天时间,想出了这段代码。从 reddit 帖子中获取第一部分(感谢)并对其进行了一些修改。我插入了一些不同的目录情况,并且搜索的时间相似。

在 223 个文件夹中找到。 FindFast:23.42 秒,递归:24.14 秒。 如果我在最后一个文件夹中选择一个文件来检查 FindFast,我们会在 387 个文件夹中找到。 FindFast:62.82 秒,递归:0.3 秒。所以文件夹检查的顺序是不一样的。

有一些差异需要注意。我的代码的最初目的是根据通配符(例如“*_ThisName.xlsx”)获取所有 xl 文件。它最终在 9 秒内给了我所有 41。我能够为我的多文件搜索节省 10 秒是因为我可以指定我要查找的文件位于名为“Working”的子目录中,并且我将目录计数限制为 10 个深度.我注释掉了这个测试的这些限制,它增加了 10 秒才能找到一个文件。

我仍然希望我们能进一步缩短搜索时间。

    Function FindFast(TargetFolder As String, Patt As String)
       
        Dim Folder As Object, SubFolder As Object, File As Object
        Dim FQueue As New Collection

       'Test view all folders:
'       Dim FolderColl As New Collection
        
       Dim Count As Integer

        Dim fl As String
           
        With CreateObject("Scripting.FileSystemObject")

            FQueue.Add .GetFolder(TargetFolder)
            Do While FQueue.Count > 0
               Set Folder = FQueue(1)
               FQueue.Remove 1
                'Code for individual folder
                For Each SubFolder In Folder.subFolders

                    'Test view all folders:
                    FolderColl.Add SubFolder

                    'Only 10 folders deep
     '               Count = Len(SubFolder) - Len(Replace(SubFolder, "\", ""))
     '               If Count < 13 Then
                        FQueue.Add SubFolder
                   
             '           ' Only look for the file in Working folder
               '        If InStr(1, SubFolder, "Working") > 1 Then
    
                            fl = Dir(SubFolder & "\" & Patt)
                ' Added as exact match return.  Otherwise will find all with pattern match
                If fl <> "" Then
                    FindFast = SubFolder & "\" & fl
                    Exit Function
                End If
                               
                 '       End If
        '             End If
                Next SubFolder
            Loop

'   Test view all folders:
' Dim i As Long
'For i = 1 To FolderColl.Count
'                                Range("A" & i).value = FolderColl(i)
'Next i           

        End With
         FindFast = vbNullString
    End Function
    
    
    Sub FindFile()
        Dim StartTime As Double
        Dim SecondsElapsed As Double
        Dim target As String
    
        Dim targetName As String
        Dim targetPath As String
        targetName = "5-3-21_Order_Sent.xlsx"
       '    Patt = "*_Order_Sent.xlsx"
        ' or wild extension   Patt = "*_ThisName.*"
    
        targetPath = "\\Fulfill\Company\Orders\Completed"

        StartTime = Timer    
        target = FindFast(targetPath, targetName)
        Debug.Print target
        SecondsElapsed = Round(Timer - StartTime, 2)
        Debug.Print "FindFast: " & SecondsElapsed & " Secs"
    
        MsgBox "found FindFast: " & target & " - " & SecondsElapsed & " Secs"       

        StartTime = Timer   
        target = Recurse(targetPath, targetName)
        Debug.Print target
    
         SecondsElapsed = Round(Timer - StartTime, 2)
        Debug.Print "Recurse: " & SecondsElapsed & " Secs"
    
         MsgBox "found Recurse: " & target & " - " & SecondsElapsed & " Secs"
    
    End Sub

所以在上面的这种形式中,它正在加载所有文件夹,按子文件夹筛选它们,子文件夹从最旧到最新的文件夹。在一种形式中,我希望在最新文件夹中的文件中查找记录,目录越旧,可能性越小。在另一种用法中,我希望获得所有文件的列表,并在可能是最新的文件管理器中查找记录,但可能更旧,文件越旧,可能性越小。