使用其类提取HTML元素值

时间:2019-06-07 09:10:46

标签: html excel vba web-scraping

我研究了此链接Extract Table from Webpage in Excel using VBA中提供的解决方案,它非常有帮助。但是我需要在每个HTML Table单元格(td)中提取具有特定类的元素。

  1. URL为:https://www.betfair.com/exchange/plus/football/competition/11997260

  2. HTML表类为:coupon-table

  3. 要提取的价格嵌套在单元格内。它在Span元素中使用“ bet-button-price”类。这就是我需要提取到Excel工作表中每个单元格中的数据。

这是表结构的屏幕截图:

enter image description here

如果能将每个单元格中的价格提取到表格中,我将不胜感激。

Public Sub GetInfo()
Const URL As String = "https://www.betfair.com/exchange/plus/football/competition/11997260"
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()
headers = Array("Countries", "Prices")
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
    .Open "GET", URL, False
    .send
    html.body.innerHTML = .responseText
End With
 Set hTable = html.querySelector("table.coupon-table")
Dim Td As Object, Tr As Object, r As Long, c As Long

r = 1
With ws
    .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
    For Each Tr In hTable.getElementsByTagName("tr")
        r = r + 1: c = 1
        If r > 3 Then
            For Each Td In Tr.getElementsByClassName("bet-button-price")
                .Cells(r - 2, c) = IIf(c = 2, "'" & Td.innerText, Td.innerText)
                c = c + 1
            Next
        End If
    Next
End With

结束子

2 个答案:

答案 0 :(得分:0)

您尝试过getElementByClassName(“ yourclassname”)吗?

答案 1 :(得分:0)

我将使用页面用于更新返回json的那些值的同一端点。然后使用json解析器提取所需的值。

我使用jsonconverter.bas-您将链接中的代码添加到名为JsonConverter的模块中,然后转到VBE>工具>引用>添加对Microsoft脚本运行时的引用。

在json的VBA中,[]表示可以通过For Each或索引访问的集合,{}表示可以通过键或For Each访问的字典。

我将结果存储在一个数组中,然后一遍一遍地写出来以提高效率。您可以写到Activesheet.Cells(2,1),然后根据需要在第1行中添加标题。

您可以查看示例响应json here

Option Explicit
Public Sub GetPrices()
    Dim s As String, json As Object, p As String

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.betfair.com/www/sports/exchange/readonly/v1/bymarket?_ak=nzIFcwyWhrlwYMrh&alt=json&currencyCode=GBP&locale=en_GB&marketIds=1.157348157,1.157348529,1.157347785,1.157347909,1.157348405,1.157348653,1.157348281,1.157349025,1.159492425,1.157348777,1.157348033,1.157348901,1.157350197,1.157350445,1.157351280,1.157349949&rollupLimit=10&rollupModel=STAKE&types=MARKET_STATE,MARKET_RATES,MARKET_DESCRIPTION,EVENT,RUNNER_DESCRIPTION,RUNNER_STATE,RUNNER_EXCHANGE_PRICES_BEST,RUNNER_METADATA,MARKET_LICENCE,MARKET_LINE_RANGE_INFO", False
        .send
        s = .responseText
        Set json = JsonConverter.ParseJson(s)
    End With

    Dim runners As Object, runner As Object, results(), r As Long
    Set runners = json("eventTypes")(1)("eventNodes")

    ReDim results(1 To runners.Count, 1 To 7)
    For Each runner In runners
        r = r + 1
        results(r, 1) = runner("event")("eventName")
        results(r, 2) = runner("marketNodes")(1)("runners")(1)("exchange")("availableToBack")(1)("price")
        results(r, 3) = runner("marketNodes")(1)("runners")(1)("exchange")("availableToLay")(1)("price")
        results(r, 4) = runner("marketNodes")(1)("runners")(3)("exchange")("availableToBack")(1)("price")
        results(r, 5) = runner("marketNodes")(1)("runners")(3)("exchange")("availableToLay")(1)("price")
        results(r, 6) = runner("marketNodes")(1)("runners")(2)("exchange")("availableToBack")(1)("price")
        results(r, 7) = runner("marketNodes")(1)("runners")(2)("exchange")("availableToLay")(1)("price")
    Next
    ActiveSheet.Cells(1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

结果:

enter image description here