VBA-Web抓取无法获取HTMLElement innerText

时间:2018-08-26 16:09:05

标签: excel vba web-scraping

我正在尝试使用excel VBA取消汇率,但无法获取所需的innerText值。我不明白为什么,因为其他网站也可以使用相同的技术。

URL-https://www.nbs.rs/export/sites/default/internet/english/scripts/kl_srednji.html

Sub GetCurr()

Dim tempHTMLDoc As New MSHTML.HTMLDocument
Dim HTMLCurrency As MSHTML.IHTMLElementCollection
Dim HTMLRows As MSHTML.IHTMLElementCollection
Dim HTMLDate As MSHTML.IHTMLElementCollection
Dim HTMLElem As MSHTML.IHTMLElement
Dim connectionTest As Boolean
Dim EUR, CZK, HRK, HUF, PLN, RON, RSD As String
Dim myURL As String
Dim i As Long

connectionTest = True
myURL = "https://www.nbs.rs/export/sites/default/internet/english/scripts/kl_srednji.html"

Call WebConnection(tempHTMLDoc, connectionTest, myURL)
If connectionTest = False Then Exit Sub

Set HTMLDate = tempHTMLDoc.getElementsByTagName("span")
'Debug.Print HTMLDate.Length

For Each HTMLElem In HTMLDate 'I am looking for which element contains the date (can not find)
  Debug.Print HTMLElem.innerText
Next HTMLElem

'I am trying to get the necessary currencies
Set HTMLRows = tempHTMLDoc.getElementsByTagName("tr")

Debug.Print HTMLRows.Length

For i = 0 To HTMLRows.Length - 1 'If lenght > 0

    Set HTMLCurrency = HTMLRows(i).getElementsByTagName("td")

    If HTMLCurrency.Length > 4 Then 'each currency contains 5 "td" tags

        Select Case HTMLCurrency(2).innerText
            Case "EUR"
                EUR = HTMLCurrency(4).innerText
            Case "HRK"
                HRK = HTMLCurrency(4).innerText
            Case "HUF"
                HUF = HTMLCurrency(4).innerText
            Case "PLN"
                PLN = HTMLCurrency(4).innerText
            Case "RON"
                RON = HTMLCurrency(4).innerText
            Case "CZK"
                CZK = HTMLCurrency(4).innerText
        End Select

    End If

Next i

Debug.Print "EUR - ", EUR; vbNewLine; "HRK - ", HRK; vbNewLine; "HUF - ", HUF; vbNewLine; "PLN - ", PLN; vbNewLine; _
            "RON - ", RON; vbNewLine; "CZK - ", CZK

End Sub

'============================================================================

Sub WebConnection(HTMLDoc As MSHTML.HTMLDocument, ConnTest As Boolean, URL As String)

Dim XMLPage As New MSXML2.XMLHTTP60
Dim errorMsg As VbMsgBoxResult

On Error GoTo CONNECTION_ERROR

XMLPage.Open "GET", URL, False
XMLPage.send

On Error GoTo 0

If XMLPage.Status <> 200 Then
    errorMsg = MsgBox("There is something wrong with webpage. Do you want to try to continue?", vbYesNo + vbCritical, "ERROR")
    If errorMsg = vbNo Then
        ConnTest = False
        Exit Sub
    End If
End If

HTMLDoc.body.innerHTML = XMLPage.responseText
Exit Sub

CONNECTION_ERROR:
MsgBox "There is something wrong with the connection.", vbCritical, "ERROR"
ConnTest = False
Exit Sub

End Sub

我试图使用id(index:srednjiKursList:tbody_element)或类名(tableCell),但它不起作用。该网站的构建方式不同

1 个答案:

答案 0 :(得分:2)

您的原始链接(称为登录页面)是动态加载的。您的ToOwned请求太快了,无法检索所需的信息。

您可以使用其他网址。

当您进入登录页面时,您会看到它实际上向以下页面发出了XMLHTTP GET请求:

get request

以上是使用fiddler的结果,但是您可以使用例如Chrome开发工具( F12 )检查网络流量。

您可以将该URL直接输入到您的代码中,并且效果很好。


整个表格:

您还可以按以下方式获取整个表格:

GET

结果示例:

results



仅列出货币:

您还可以根据表结构使用一些数学运算来仅获取列出的那些元素。

Option Explicit
Public Sub GetInfo()
    Dim html As New HTMLDocument, hTable As HTMLTable, clipboard As Object
    Const URL = "https://www.nbs.rs/kursnaListaModul/srednjiKurs.faces?lang=eng"

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", URL, False
        .send
        html.body.innerHTML = StrConv(.responseBody, vbUnicode)
    End With

    Set hTable = html.getElementById("index:srednjiKursLista")
    Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
    clipboard.SetText hTable.outerHTML
    clipboard.PutInClipboard
    ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).PasteSpecial
End Sub

使用剪贴板:

以下行:

Option Explicit
Public Sub GetInfo()
    Dim html As New HTMLDocument, hTable As HTMLTable, clipboard As Object
    Const URL = "https://www.nbs.rs/kursnaListaModul/srednjiKurs.faces?lang=eng"

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", URL, False
        .send
        html.body.innerHTML = StrConv(.responseBody, vbUnicode)
    End With

    Set hTable = html.getElementById("index:srednjiKursLista")

    Dim list As Object, i As Long
    Dim EUR As Double, CZK As Double, HRK As Double, HUF As Double, PLN As Double, RON As Double, RSD As Double
    Set list = hTable.querySelectorAll("td")
    For i = 2 To list.Length - 1 Step 5
        Select Case list.item(i).innerText
        Case "EUR"
            EUR = list.item(i + 2).innerText
        Case "HRK"
            HRK = list.item(i + 2).innerText
        Case "HUF"
            HUF = list.item(i + 2).innerText
        Case "PLN"
            PLN = list.item(i + 2).innerText
        Case "RON"
            RON = list.item(i + 2).innerText
        Case "CZK"
            CZK = list.item(i + 2).innerText
        End Select
    Next

    Debug.Print "EUR - ", EUR; vbNewLine; "HRK - ", HRK; vbNewLine; "HUF - ", HUF; vbNewLine; "PLN - ", PLN; vbNewLine; _
                                                                                                      "RON - ", RON; vbNewLine; "CZK - ", CZK
End Sub

向Microsoft Forms对象库添加了后期绑定引用,以便您可以访问剪贴板。

您也可以将用户窗体添加到您的项目中,或者进入VBE>工具>引用> Microsoft Forms对象库以进行访问:

Forms