VBA - 从联机目录中的所有子文件夹获取所有文件路径

时间:2014-01-24 22:11:35

标签: excel internet-explorer vba directory filepath

我想编写一段VBA代码,允许我检索位于特定在线目录中所有子文件夹中的所有文件的路径。

例如,给定的在线目录是http://cran.r-project.org/doc/所以我希望代码打开Internet Explorer并转到此目录并检查此子文件夹的所有子文件夹和所有子文件夹等,并检索所有文件的路径和它们被创建的日期。

我非常了解VBA,但在将其与其他应用程序相结合方面我几乎没有经验。

提前感谢所有线索。

2 个答案:

答案 0 :(得分:0)

这是一个如何做到这一点的例子。 代码从root-url开始,按标记名称搜索表元素。采用第一个表并循环遍历除前三个和最后一个(不包含任何数据)之外的行。在数据行中,然后获取名称和日期值的第二个和第三个单元格。如果name以斜杠结尾,那么它被认为是一个文件夹,递归调用函数'ReadPage',并将root加上文件夹名称的url传递给它。如果单元格值中没有斜杠,那么它被认为是日期单元格,然后我们可以打印有关路径和日期的信息。

函数'ReadPage'的每个实例都会打开自己的IE窗口,因此最终关闭此窗口。 HTH。

' Add reference to Microsoft Internet Controls and to Microsoft HTML Object Library

Option Explicit

Private Const rootUrl As String = "http://cran.r-project.org/doc/"

Public Sub main()

    On Error GoTo err_main

    ReadPage rootUrl

err_main:

    If Err.Number <> 0 Then _
        MsgBox Err.Description, vbCritical

End Sub

Private Sub ReadPage(url As String)

    Dim browser As SHDocVw.InternetExplorer
    Set browser = New SHDocVw.InternetExplorer
    browser.Visible = True
    browser.navigate url

    Do Until (browser.readyState = 4 And Not browser.Busy)
        DoEvents
    Loop

    Dim document As MSHTML.HTMLDocument
    Set document = browser.document

    Dim tables As MSHTML.IHTMLElementCollection
    Set tables = document.getElementsByTagName("Table")
    If tables.Length <= 0 Then
        Debug.Print "No tables found in " + url
        GoTo quit_me
    End If

    Dim table As MSHTML.HTMLTable
    Set table = tables(0)

    Dim row As MSHTML.HTMLTableRow
    Dim cellName As MSHTML.HTMLTableCell
    Dim cellDate As MSHTML.HTMLTableCell
    Dim rowIndex

    rowIndex = 0
    For Each row In table.Rows
        rowIndex = rowIndex + 1
        If rowIndex <= 3 Or rowIndex = table.Rows.Length Then GoTo continue

        Set cellName = row.Cells(1)
        If cellName Is Nothing Then GoTo quit_me

        If Right(cellName.innerText, 1) = "/" Then
            ' We are on folder
            ReadPage url & cellName.innerText
            Debug.Print "Finished with Url " & url & cellName.innerText
        Else
            ' We are on file
            Set cellDate = row.Cells(2)
            If cellDate Is Nothing Then GoTo quit_me

            Debug.Print url & cellName.innerText & " : " & cellDate.innerText
        End If
continue:
    Next row

quit_me:
    If Not browser Is Nothing Then
        browser.Quit
        Set browser = Nothing
    End If
End Sub

答案 1 :(得分:0)

这里只有一个IE实例的另一个方法。在这种情况下,文件会立即打印,但文件夹会被记住到一个集合中并单独进行。这是因为在浏览器导航到另一个页面之后,存储在像表这样的变量中的引用将不起作用并且需要重新设置。这种方式只需要表引用,但不需要第一个循环中的行。 HTH。

Option Explicit

Private Const rootUrl As String = "http://cran.r-project.org/doc/"

Public Sub main()

    On Error GoTo err_main

    Dim browser As SHDocVw.InternetExplorer
    Set browser = New SHDocVw.InternetExplorer

    ReadPage browser, rootUrl

err_main:

    If Err.Number <> 0 Then _
        MsgBox Err.Description, vbCritical

    If Not browser Is Nothing Then
        browser.Quit
        Set browser = Nothing
    End If

End Sub

Private Sub ReadPage(browser As SHDocVw.InternetExplorer, urlParam As String)

    Dim table As MSHTML.HTMLTable
    Set table = TableElement(browser, urlParam)

    If table Is Nothing Then
        Debug.Print "No tables found in " + urlParam, vbCritical
        GoTo go_back
    End If

    Static logRow As Integer
    Dim row As MSHTML.HTMLTableRow
    Dim cellName As MSHTML.HTMLTableCell
    Dim cellDate As MSHTML.HTMLTableCell
    Dim rowIndex
    Dim urls As Collection

    Set urls = New Collection
    rowIndex = 0

    For Each row In table.Rows
        rowIndex = rowIndex + 1
        If rowIndex <= 3 Or _
           rowIndex = table.Rows.Length Then _
            GoTo continue

        Set cellName = row.Cells(1)
        If cellName Is Nothing Then GoTo go_back

        If Right(cellName.innerText, 1) = "/" Then
            ' We are on folder, remember it
            urls.Add urlParam & cellName.innerText

        Else
            ' We are on file
            Set cellDate = row.Cells(2)
            If cellDate Is Nothing Then GoTo go_back

            logRow = logRow + 1
            Range("a" & logRow) = _
                urlParam & cellName.innerText & _
                " : " & cellDate.innerText
        End If
continue:
    Next row

    Dim url
    ' loopt through folders
    For Each url In urls
        ReadPage browser, CStr(url)
    Next url

go_back:
    If Not urlParam = rootUrl Then
        Set table = TableElement(browser, isGoBack:=True)
    End If
End Sub

Private Function TableElement(browser As SHDocVw.InternetExplorer, _
    Optional urlParam As String, Optional isGoBack = False) _
    As MSHTML.HTMLTable

    If isGoBack Then
        browser.GoBack
    Else
        browser.navigate urlParam
    End If

    Do Until (browser.readyState = 4 And Not browser.Busy)
        DoEvents
    Loop

    On Error Resume Next
    Set TableElement = browser.document.getElementsByTagName("Table")(0)
    On Error GoTo 0
End Function