我想编写一段VBA代码,允许我检索位于特定在线目录中所有子文件夹中的所有文件的路径。
例如,给定的在线目录是http://cran.r-project.org/doc/所以我希望代码打开Internet Explorer并转到此目录并检查此子文件夹的所有子文件夹和所有子文件夹等,并检索所有文件的路径和它们被创建的日期。
我非常了解VBA,但在将其与其他应用程序相结合方面我几乎没有经验。
提前感谢所有线索。
答案 0 :(得分:0)
这是一个如何做到这一点的例子。 代码从root-url开始,按标记名称搜索表元素。采用第一个表并循环遍历除前三个和最后一个(不包含任何数据)之外的行。在数据行中,然后获取名称和日期值的第二个和第三个单元格。如果name以斜杠结尾,那么它被认为是一个文件夹,递归调用函数'ReadPage',并将root加上文件夹名称的url传递给它。如果单元格值中没有斜杠,那么它被认为是日期单元格,然后我们可以打印有关路径和日期的信息。
函数'ReadPage'的每个实例都会打开自己的IE窗口,因此最终关闭此窗口。 HTH。
' Add reference to Microsoft Internet Controls and to Microsoft HTML Object Library
Option Explicit
Private Const rootUrl As String = "http://cran.r-project.org/doc/"
Public Sub main()
On Error GoTo err_main
ReadPage rootUrl
err_main:
If Err.Number <> 0 Then _
MsgBox Err.Description, vbCritical
End Sub
Private Sub ReadPage(url As String)
Dim browser As SHDocVw.InternetExplorer
Set browser = New SHDocVw.InternetExplorer
browser.Visible = True
browser.navigate url
Do Until (browser.readyState = 4 And Not browser.Busy)
DoEvents
Loop
Dim document As MSHTML.HTMLDocument
Set document = browser.document
Dim tables As MSHTML.IHTMLElementCollection
Set tables = document.getElementsByTagName("Table")
If tables.Length <= 0 Then
Debug.Print "No tables found in " + url
GoTo quit_me
End If
Dim table As MSHTML.HTMLTable
Set table = tables(0)
Dim row As MSHTML.HTMLTableRow
Dim cellName As MSHTML.HTMLTableCell
Dim cellDate As MSHTML.HTMLTableCell
Dim rowIndex
rowIndex = 0
For Each row In table.Rows
rowIndex = rowIndex + 1
If rowIndex <= 3 Or rowIndex = table.Rows.Length Then GoTo continue
Set cellName = row.Cells(1)
If cellName Is Nothing Then GoTo quit_me
If Right(cellName.innerText, 1) = "/" Then
' We are on folder
ReadPage url & cellName.innerText
Debug.Print "Finished with Url " & url & cellName.innerText
Else
' We are on file
Set cellDate = row.Cells(2)
If cellDate Is Nothing Then GoTo quit_me
Debug.Print url & cellName.innerText & " : " & cellDate.innerText
End If
continue:
Next row
quit_me:
If Not browser Is Nothing Then
browser.Quit
Set browser = Nothing
End If
End Sub
答案 1 :(得分:0)
这里只有一个IE实例的另一个方法。在这种情况下,文件会立即打印,但文件夹会被记住到一个集合中并单独进行。这是因为在浏览器导航到另一个页面之后,存储在像表这样的变量中的引用将不起作用并且需要重新设置。这种方式只需要表引用,但不需要第一个循环中的行。 HTH。
Option Explicit
Private Const rootUrl As String = "http://cran.r-project.org/doc/"
Public Sub main()
On Error GoTo err_main
Dim browser As SHDocVw.InternetExplorer
Set browser = New SHDocVw.InternetExplorer
ReadPage browser, rootUrl
err_main:
If Err.Number <> 0 Then _
MsgBox Err.Description, vbCritical
If Not browser Is Nothing Then
browser.Quit
Set browser = Nothing
End If
End Sub
Private Sub ReadPage(browser As SHDocVw.InternetExplorer, urlParam As String)
Dim table As MSHTML.HTMLTable
Set table = TableElement(browser, urlParam)
If table Is Nothing Then
Debug.Print "No tables found in " + urlParam, vbCritical
GoTo go_back
End If
Static logRow As Integer
Dim row As MSHTML.HTMLTableRow
Dim cellName As MSHTML.HTMLTableCell
Dim cellDate As MSHTML.HTMLTableCell
Dim rowIndex
Dim urls As Collection
Set urls = New Collection
rowIndex = 0
For Each row In table.Rows
rowIndex = rowIndex + 1
If rowIndex <= 3 Or _
rowIndex = table.Rows.Length Then _
GoTo continue
Set cellName = row.Cells(1)
If cellName Is Nothing Then GoTo go_back
If Right(cellName.innerText, 1) = "/" Then
' We are on folder, remember it
urls.Add urlParam & cellName.innerText
Else
' We are on file
Set cellDate = row.Cells(2)
If cellDate Is Nothing Then GoTo go_back
logRow = logRow + 1
Range("a" & logRow) = _
urlParam & cellName.innerText & _
" : " & cellDate.innerText
End If
continue:
Next row
Dim url
' loopt through folders
For Each url In urls
ReadPage browser, CStr(url)
Next url
go_back:
If Not urlParam = rootUrl Then
Set table = TableElement(browser, isGoBack:=True)
End If
End Sub
Private Function TableElement(browser As SHDocVw.InternetExplorer, _
Optional urlParam As String, Optional isGoBack = False) _
As MSHTML.HTMLTable
If isGoBack Then
browser.GoBack
Else
browser.navigate urlParam
End If
Do Until (browser.readyState = 4 And Not browser.Busy)
DoEvents
Loop
On Error Resume Next
Set TableElement = browser.document.getElementsByTagName("Table")(0)
On Error GoTo 0
End Function