我正在尝试从网页上抓取一张桌子。下面的代码进入
Set hTable = html.querySelector
行,但hTable
结束时什么都没有。
任何人都可以建议如何正确识别页面上的值表吗?
Public Sub GetSomeData()
Const URL As String = "https://www.morningstar.co.uk/uk/screener/fund.aspx#?filtersSelectedValue=%7B%22analystRatingScale%22:%7B%22id%22:%225%22%7D,%22starRating%22:%7B%22id%22:%225%22%7D%7D&page=1&perPage=10&sortField=legalName&sortOrder=asc&universeId=FOCAN$$ALL"
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()
Dim td As Object, tr As Object, r As Long, c As Long
headers = Array("Tick", "Fund", "1 Day", "1 Week", "1 Month", "3 Months", "6 Months")
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
html.body.innerHTML = .responseText
End With
Set hTable = html.querySelector("ID.ec-screener-results-view-container-section-panel-table-securities")
r = 1
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For Each tr In hTable.getElementsByTagName("tr")
r = r + 1: c = 1
If r > 3 Then
For Each td In tr.getElementsByTagName("td")
.Cells(r - 2, c) = IIf(c = 2, "'" & td.innerText, td.innerText)
c = c + 1
Next
End If
Next
End With
End Sub
答案 0 :(得分:0)
由于页面上有很多动态元素,因此无法使用 MSXML2.XMLHTTP 从此页面中读取任何数据。唯一的方法是使用IE或Selenium。可以处理JavaScript的环境。
通过保存生成的HTML文档并打开它来自己查看:
Public Sub GetSomeData()
Const URL As String = "https://www.morningstar.co.uk/uk/screener/fund.aspx#?filtersSelectedValue=%7B%22analystRatingScale%22:%7B%22id%22:%225%22%7D,%22starRating%22:%7B%22id%22:%225%22%7D%7D&page=1&perPage=10&sortField=legalName&sortOrder=asc&universeId=FOCAN$$ALL"
Dim html As htmlDocument, hTable As HTMLTable, ws As Worksheet, headers()
Dim td As Object, tr As Object, r As Long, c As Long
headers = Array("Tick", "Fund", "1 Day", "1 Week", "1 Month", "3 Months", "6 Months")
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New htmlDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
html.body.innerHTML = .responseText
End With
'-----
Open "E:\MorningStar.htm" For Output As #1 'Use your own path
Print #1, html.body.outerHTML
Close
'-----
End Sub