试图了解如何以特定方式与网站互动。这是我正在处理的较大代码的一部分,该代码将遍历ContractorID列表。我需要在这里做的是以下事情:
找到显示“经审计的财务进行UFR归档”的链接,然后单击它。 (如果不存在,请结束该子项)
在随后的页面上,找到在“文档类别”下标识为“ UFR Excel模板”的链接,然后单击它。 (在这种情况下,该链接显示为“ 15-UFR18.xls”,但是由于没有一致的链接命名方案,因此,始终必须如上所述通过“文档类别”下的标签来标识正确的链接。 t存在,退出子目录。)
在随后的页面上,单击顶部的“下载”链接,然后将文件保存在以下文件路径下(该文件路径将在此时创建):C:\ Documents \ 042786217 \ 2018。
编辑:下面的代码将我带到单击下载按钮的位置,然后出现“打开/保存/取消”对话框。在那附近,只需要弄清楚如何将文件保存到特定路径即可。
Option Explicit
Sub UFRScraper()
If MsgBox("UFR Scraper will run now. Do you wish to continue?", vbYesNo) = vbNo Then Exit Sub
Dim IE As Object
Dim objElement As Object
Dim objCollection As Object
Dim ele As Object
Dim tbl_Providers As ListObject: Set tbl_Providers = ThisWorkbook.Worksheets("tbl_ProviderList").ListObjects("tbl_Providers")
Dim FEIN As String: FEIN = ""
Dim FEINList As Range: Set FEINList = tbl_Providers.ListColumns("FEIN").DataBodyRange
Dim ProviderName As String: ProviderName = ""
Dim ProviderNames As Range: Set ProviderNames = tbl_Providers.ListColumns("Provider Name").DataBodyRange
Dim FiscalYear As String: FiscalYear = ""
Dim urlUFRDetails As String: urlUFRDetails = ""
Dim i As Integer
' Create InternetExplorer Object
Set IE = CreateObject("InternetExplorer.Application")
' Show (True)/Hide (False) IE
IE.Visible = True
i = 1
For i = 1 To 3 'Limited to 3 during testing. Change when ready.
FEIN = FEINList(i, 1)
ProviderName = ProviderNames(i, 1)
urlUFRDetails = "https://ufr.osd.state.ma.us/WebAccess/SearchDetails.asp?ContractorID=" & FEIN & "&FilingYear=2018&nOrgPage=1&Year=2018"
IE.Navigate urlUFRDetails
' Wait while IE loading...
'IE ReadyState = 4 signifies the webpage has loaded (the first loop is set to avoid inadvertently skipping over the second loop)
Do While IE.ReadyState = 4: DoEvents: Loop 'Do While
Do Until IE.ReadyState = 4: DoEvents: Loop 'Do Until
'Step 2 is done here
Dim filingFound As Boolean: filingFound = False
For Each ele In IE.Document.getElementsByTagName("a")
If ele.innerText = "UFR Filing with Audited Financials" Then
filingFound = True
IE.Navigate ele.href
Do While IE.ReadyState = 4: DoEvents: Loop 'Do While
Do Until IE.ReadyState = 4: DoEvents: Loop 'Do Until
Exit For
End If
Next ele
If filingFound = False Then
GoTo Skip
End If
'Step 3
Dim j As Integer: j = 0
Dim UFRFileFound As Boolean: UFRFileFound = False
For Each ele In IE.Document.getElementsByTagName("li")
j = j + 1
If ele.innerText = "UFR Excel Template" Then
UFRFileFound = True
IE.Navigate "https://ufr.osd.state.ma.us/WebAccess/documentviewact.asp?counter=" & j - 4
Do While IE.ReadyState = 4: DoEvents: Loop 'Do While
Do Until IE.ReadyState = 4: DoEvents: Loop 'Do Until
Exit For
End If
Next ele
If UFRFileFound = False Then
GoTo Skip
End If
'Step 4
IE.Document.getElementById("LinkButton2").Click
'**Built in wait time to avoid accidentally overloading server with repeated quick requests during development and testing**
Skip:
Application.Wait (Now + TimeValue("0:00:03"))
MsgBox "Loop " & i & " complete."
Next i
'Unload IE
IE.Quit
Set IE = Nothing
Set objElement = Nothing
Set objCollection = Nothing
MsgBox "Process complete!"
End Sub
答案 0 :(得分:1)
我已经尝试了一些漫长的方法进行步骤3。但目前无法提供完整的下载代码(在一次成功的手动尝试之后),甚至无法手动尝试导致消息“无法检索文件”(也许是服务器端限制)
代码只会带您进入xlx文件中包含href
的单元格
Dim doc As HTMLDocument
Dim Tbl As HTMLTable, Cel As HTMLTableCell, Rw As HTMLTableRow, Col As HTMLTableCol
Set doc = IE.document
For Each ele In IE.document.getElementsByClassName("boxedContent")
For Each Tbl In ele.getElementsByTagName("table")
For Each Rw In Tbl.Rows
For Each Cel In Rw.Cells
'Debug.Print Cel.innerText
If InStr(1, Cel.innerText, "UFR Excel Template") > 0 Then
Debug.Print Rw.Cells(2).innerText & " - " & Rw.Cells(2).innerHTML
End If
Next
Next Rw
Next Tbl
Next
一旦href
可用PtrSafe
函数或WinHTTPrequest
或其他方法可以用来下载文件。在这种情况下,欢迎并渴望从@QHarr等专家那里获得一些更有效的答案。