VBA-网页抓取找不到正确的GET请求

时间:2018-08-27 13:56:09

标签: excel vba web-scraping xmlhttprequest

我的问题与其他问题VBA - web scraping can not get HTMLElement innerText有关。我有类似的问题

网站URL-https://www.hnb.hr/en/core-functions/monetary-policy/exchange-rate-list/exchange-rate-list

我需要获取货币参考的日期和所选的值。问题是我找不到最终生成这些值的正确GET请求。我发现它与POST请求有关:

POST / EN /芯功能/货币政策/汇率列表/汇率列表?p_p_id = tecajnalistacontroller_WAR_hnbtecajnalistaportlet&p_p_lifecycle = 2&p_p_state =正常&p_p_mode =视图&p_p_resource_id = getTecajnaAjaxDataURL&p_p_cacheability = cacheLevelPage&p_p_col_id =柱-2 p_p_col_count = 2 HTTP / 1.1

我想使用一种通过id,class或tag进行获取的技术-但要再次使用,只要GET URL请求太快而无法检索所需的信息

1 个答案:

答案 0 :(得分:2)

XMLHTTP请求和API:

我将使用他们的API,如下所示。我有一些帮助函数来帮助解析响应。在GetDict函数中,可以设置感兴趣的货币。在GetRate函数中,可以指定感兴趣的汇率。如果未指定,则默认为"median_rate"

调用API:

  

要获取特定日期的价格,请对[   以下网址:

     

http://hnbex.eu/api/v1/rates/daily/?date=YYYY-MM-DD

     

date参数是可选的。如果未设置,则为当前日期(今天)   使用。

您可以使用JSON parser来解析JSON响应,但是我发现使用SplitJSON字符串中获取所需信息更加简单。如果您熟悉JSON,我将很高兴地提供一个JSON解析示例。

Option Explicit

Public Sub GetInfo()
    'http://hnbex.eu/api/v1/
    Dim strJSON As String, http As Object, json As Object
    Const URL As String = "http://hnbex.eu/api/v1/rates/daily/"

    Set http = CreateObject("MSXML2.XMLHTTP")
    With http
        .Open "GET", URL, False
        .send
        strJSON = .responseText
    End With
    'Set json = JsonConverter.ParseJson(strJSON) '<== You could parse the JSON using a JSON parse such as [JSONConverter][1]

    Dim currencyDict As Object
    Set currencyDict = GetDict

    Dim key As Variant, dictKeys As Variant, result As Variant
    For Each key In currencyDict.keys
        result = GetRate(strJSON, key)
        If Not IsError(result) Then currencyDict(key) = result
        result = vbNullString
    Next key

    PrintDictionary currencyDict

End Sub

Public Function GetDict() As Object '<== You could adapt to pass currencies as string arguments to the function. Or even a string array.
    Dim dict As Object
    Set dict = CreateObject("Scripting.Dictionary")
    dict.Add "EUR", vbNullString
    dict.Add "CZK", vbNullString
    dict.Add "HRK", vbNullString
    dict.Add "HUF", vbNullString
    dict.Add "PLN", vbNullString
    dict.Add "RON", vbNullString
    dict.Add "RSD", vbNullString
    Set GetDict = dict
End Function

Public Function GetRate(ByVal json As String, ByVal key As Variant, Optional ByVal rate As String = "median_rate") As Variant
    Dim arr() As String, tempString As String
    On Error GoTo Errhand
    arr = Split(json, """currency_code"": " & Chr$(34) & key & Chr$(34))
    tempString = arr(1)
    tempString = Split(arr(1), Chr$(34) & rate & Chr$(34) & ":")(1)
    tempString = Split(tempString, ",")(0)
    GetRate = tempString
    Exit Function
Errhand:
    GetRate = CVErr(xlErrNA)
End Function

Public Sub PrintDictionary(ByVal dict As Object)
    Dim key As Variant
    For Each key In dict.keys
        Debug.Print key & " : " & dict(key)
    Next
End Sub

Internet Explorer:

您可以使用带有显式等待元素的循环,以在页面上显示(或填充)

Option Explicit
Public Sub GetInfo()
    Dim IE As New InternetExplorer, t As Date, hTable As HTMLTable, clipboard As Object
    Const WAIT_TIME_SECS As Long = 5
    t = Timer

    With IE
        .Visible = True
        .navigate "https://www.hnb.hr/en/core-functions/monetary-policy/exchange-rate-list/exchange-rate-list"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Do
            DoEvents
            On Error Resume Next
            Set hTable = .document.getElementById("records_table")
            On Error GoTo 0
            If Timer - t > WAIT_TIME_SECS Then Exit Do
        Loop While hTable Is Nothing

        If hTable Is Nothing Then
            .Quit
            Exit Sub
        End If
        Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
        clipboard.SetText hTable.outerHTML
        clipboard.PutInClipboard
        ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).PasteSpecial
        .Quit                                    '<== Remember to quit application
    End With
End Sub