如何使用excel vba中的getElementsByClassName从网页获取数据

时间:2017-05-14 15:50:04

标签: excel-vba vba excel

我正在尝试使用以下代码从excel vba中的https://in.finance.yahoo.com/quotes/ADANIENT.BO获取数据,但它似乎无法正常工作。

Private Sub mysub()
'Use References> Microsoft Internet Controls and Microsoft HTML Object Library

Dim IE As InternetExplorer, doc As HTMLDocument, quote As String
Dim URL As String
Set IE = CreateObject("internetExplorer.application")

URL = "https://in.finance.yahoo.com/quotes/ADANIENT.BO"
IE.navigate (URL)
Do
    DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE

Set doc = IE.document

      quote = doc.getElementById("JB3wv").getElementsByClassName("-fsw9 _16zJc")(0).getElementsByClassName("_3Bucv")(0).innerText
      'quote = doc.getElementById("JB3wv").getElementsByTagName("table")(0).getElementsByTagName("tr")(0).getElementsByTagName("td")(1).getElementsByClassName("_3Bucv").innerText
Debug.Print quote
IE.Application.Quit
End Sub

您可以转到网址https://in.finance.yahoo.com/quotes/ADANIENT.BO并检查检查元素的最后价格

    <div class="JB3wv"><table class="-fsw9 _16zJc" data-test="contentTable">      <tbody><tr data-index="0" data-key="ADANIENT.BO" data-test-key="ADANIENT.BO" class=""><td class="_2VvFs"><span><label class="_120DQ _2z7ql"><input name="rowToggle" value="on" data-rapid_p="14" data-v9y="1" type="checkbox"><i></i></label><a class="_61PYt " title="ADANIENT.BO" href="/quote/ADANIENT.BO" data-rapid_p="15" data-v9y="1">ADANIENT.BO</a></span></td><td style="font-weight: 700;"><span class="_3Bucv" style="font-weight: 700;">121.60</span></td><td style="font-weight: 700;"><span class="_3Bucv _2ZN-S" style="font-weight: 700;">+1.50</span></td><td style="font-weight: 700;"><span class="_3Bucv _2ZN-S" style="font-weight: 700;">+1.25%</span></td><td style="text-align: left;">INR</td><td><span>3:56 PM IST</span></td><td style="font-weight: 700;"><span class="_3Bucv" style="font-weight: 700;">1.98m</span></td><td>-</td><td>1.45m</td><td style="text-align: left;"><canvas style="width: 140px; height: 23px;" width="140" height="23"></canvas></td><td style="text-align: left;"><canvas style="width: 140px; height: 23px;" width="140" height="23"></canvas></td><td style="text-align: left;"><canvas style="width: 70px; height: 25px;" width="70" height="25"></canvas></td><td style="font-weight: 700;"><span class="_3Bucv" style="font-weight: 700;">0</span></td></tr></tbody></table></div>

2 个答案:

答案 0 :(得分:0)

在finance.yahoo.com最近更改后,似乎正确的网址是

https://finance.yahoo.com/quote/ADANIENT.BO

要通过IE自动化从网页DOM中检索所需的值,您可以使用以下代码:

Option Explicit

Sub GetLastPriceIE()

    Dim sURL As String
    Dim sHeader As String
    Dim sQuote As String

    sURL = "https://in.finance.yahoo.com/quote/ADANIENT.BO"
    ' Open IE
    With CreateObject("InternetExplorer.Application")
        ' Navigate URL
        .Visible = True
        .Navigate sURL
        ' Wait IE
        Do While .readyState < 3 Or .Busy
            DoEvents
        Loop
        ' Wait document
        Do While .document.readyState <> "complete"
            DoEvents
        Loop
        ' Wait target element
        Do While IsNull(.document.getElementById("quote-header-info"))
            DoEvents
        Loop
        ' Retrieve quote header info inner text
        sHeader = .document.getElementById("quote-header-info").innerText
        .Quit
    End With
    ' Create RegEx
    With CreateObject("VBScript.RegExp")
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        ' Set pattern to match values like "121.55+1.15 (+0.96 %)"
        .Pattern = "^\s*(\d+\.\d+)\s*[+-]\d+\.\d+\s*\(\s*[\+-]\d+\.\d+\s*%\s*\)\s*$"
        With .Execute(sHeader)
            If .Count = 1 Then
                sQuote = .Item(0).SubMatches(0)
            Else
                sQuote = "N/A"
            End If
        End With
    End With
    Debug.Print sQuote

End Sub

您可以通过https://query1.finance.yahoo.com/v7/finance/quote?lang=en-US&fields=regularMarketPrice&symbols=ADANIENT.BO之类的网址获取API的最新价格,以下是示例代码:

Option Explicit

Sub GetLastPricesXHR()

    Dim aSymbols
    Dim aPrices
    Dim i As Long

    ' Put symbols into array
    aSymbols = Array("ADANIENT.BO", "NTPC.BO", "BHEL.BO")
    ' Retrieve prices
    aPrices = ParseLastPricesXHR(aSymbols)
    ' Output
    For i = 0 To UBound(aSymbols)
        Debug.Print aSymbols(i), aPrices(i)
    Next

End Sub

Function ParseLastPricesXHR(aSymbols)

    Dim sResp As String
    Dim aChunks
    Dim i As Long
    Dim sChunk As String
    Dim aPrices
    Dim sPrice As String

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://query1.finance.yahoo.com/v7/finance/quote?lang=en-US&fields=regularMarketPrice&symbols=" & Join(aSymbols, "%2C"), False
        .Send
        sResp = .ResponseText
    End With
    aChunks = Split(sResp, """regularMarketPrice"":", UBound(aSymbols) + 2)
    If UBound(aChunks) <> UBound(aSymbols) + 1 Then
        MsgBox "Wrong response"
        End
    End If
    ReDim aPrices(UBound(aSymbols))
    For i = 0 To UBound(aSymbols)
        sChunk = aChunks(i + 1)
        sPrice = Split(sChunk, ",", 2)(0)
        aPrices(i) = sPrice
    Next
    ParseLastPricesXHR = aPrices

End Function

简短说明。在Chrome中浏览网址https://in.finance.yahoo.com/quotes/ADANIENT.BO。打开“开发人员工具”窗口( F12 ),“网络”选项卡,并在加载页面后检查记录的XHR。您可以在Previev / Response选项卡中找到一个包含相关数据的XHR(通过页面https://in.finance.yahoo.com/quotes/ADANIENT.BO/view/v1中的值121.6搜索):

preview

查看“标题”选项卡:您可以在此处找到常规参数中的URL以及详细的查询字符串参数:

headers

有些参数似乎是可选的,所以我在上面的代码中省略了它们。尽管响应实际上是JSON,但使用Split进行解析对于一个值来说简单而有效。

通过API和JSON解析从Yahoo Finance检索更多数据 - 通过解析HTML内容this answer检查this answer

答案 1 :(得分:0)

Sub getLastPrice()

Set ie = CreateObject("InternetExplorer.Application")

ie.Visible = True

ie.navigate "https://in.finance.yahoo.com/quotes/ADANIENT.BO"

Do While ie.busy Or ie.readystate <> 4
DoEvents
Loop

Application.Wait Now + TimeValue("00:00:02") '~~> give another 2 seconds

Set tbls = ie.document.getElementsByTagName("table")

Dim k As Integer
k = 0

For Each tbl In tbls '~~> looping in order to find the exact table

'~~> Be aware that the table's class value has one white space at the end.

If tbl.getAttribute("class") = "_2VeNv " Then Set tbl = tbls(k): Exit For

k = k + 1

Next

last_Price = tbl.Rows(1).Cells(1).Children(0).innertext 

debug.print last_Price '~~> result value = 195.10

'Explain ~~> table(k) second row(=second tr) second cell(=second td) first tag(span) innerText

End Sub