Excel VBA报废-HTML表格不可见

时间:2019-02-02 20:34:01

标签: html excel vba web-scraping

我想从获取数据“https://in.tradingview.com/symbols/NSE-ABB/technicals/”使用Excel VBA网站再杀沉绵我歌厅响应,但body.innerHTML没有显示所需的表,但在铬,如果我检查的页面,我能够看到带有名称的表格。

代码有什么问题?

With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", URL, False
        .send
        sResponse = StrConv(.responseBody, vbUnicode)
    End With

    sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
    WriteTxtFile sResponse
    With html
        .body.innerHTML = sResponse
        Set tElementC = .getElementsByClassName("table-1i1M26QY- maTable-27Z4Dq6Y- tableWithAction-2OCRQQ8y-")(0).getElementsByTagName("td")

    End With

URL-> https://in.tradingview.com/symbols/NSE-ABB/technicals/ 要访问的类名称=“ table-1i1M26QY- maTable-27Z4Dq6Y- tableWithAction-2OCRQQ8y-”

2 个答案:

答案 0 :(得分:4)

https://in.tradingview.com/symbols/NSE-ABB/technicals/提供的链接提供的网页源HTML不包含必要的数据,它使用AJAX。该网站提供了sorta API。响应以JSON格式返回。因此,您需要首先进行一些逆向工程,以了解网站的工作方式。在浏览器中,e。 G。 Chrome,按 F12 打开DevTools,导航到该网页,转到“网络”标签,将过滤器设置为XHR,如下图所示:

network tab

检查记录的响应。其中一个具有最大大小的文件实际上包含了所有必要的数据:

json response

要制作这样的XHR,您还需要保留整个有效负载结构,并添加相关的标头:

headers and form data

在“表单数据”部分中,数组内有很多引号字段标题,因此您可以选择实际需要的标题。您可能会找到更多可用的标题,单击Initiator链接(上面的第一个屏幕截图),您将看到启动该XHR的JS代码。单击底部的漂亮打印 {} 以使代码可读。在搜索框中输入您已经从“表单数据”中拉出的任何标题,例如。 G。 Recommend.Other,然后在代码中找到其他代码:

quote field titles

这里是VBA示例,显示了如何进行这种抓取。 JSON.bas模块导入VBA项目中以进行JSON处理。

Option Explicit

Sub Test()

    Dim aQuoteFieldTitles()
    Dim aQuoteFieldData()
    Dim sPayload As String
    Dim sJSONString As String
    Dim vJSON
    Dim sState As String
    Dim i As Long

    ' Put the necessary field titles into array
    aQuoteFieldTitles = Array( _
        "name", "description", "country", "type", "after_tax_margin", "average_volume", "average_volume_30d_calc", "average_volume_60d_calc", "average_volume_90d_calc", "basic_eps_net_income", "beta_1_year", "beta_3_year", "beta_5_year", "current_ratio", "debt_to_assets", "debt_to_equity", "dividends_paid", "dividends_per_share_fq", _
        "dividends_yield", "dps_common_stock_prim_issue_fy", "earnings_per_share_basic_ttm", "earnings_per_share_diluted_ttm", "earnings_per_share_forecast_next_fq", "earnings_per_share_fq", "earnings_release_date", "earnings_release_next_date", "ebitda", "enterprise_value_ebitda_ttm", "enterprise_value_fq", "exchange", "expected_annual_dividends", _
        "gross_margin", "gross_profit", "gross_profit_fq", "industry", "last_annual_eps", "last_annual_revenue", "long_term_capital", "market_cap_basic", "market_cap_calc", "net_debt", "net_income", "number_of_employees", "number_of_shareholders", "operating_margin", _
        "pre_tax_margin", "preferred_dividends", "price_52_week_high", "price_52_week_low", "price_book_ratio", "price_earnings_ttm", "price_revenue_ttm", "price_sales_ratio", "quick_ratio", "return_of_invested_capital_percent_ttm", "return_on_assets", "return_on_equity", "return_on_invested_capital", "revenue_per_employee", "sector", _
        "eps_surprise_fq", "eps_surprise_percent_fq", "total_assets", "total_capital", "total_current_assets", "total_debt", "total_revenue", "total_shares_outstanding_fundamental", "volume", "relative_volume", "pre_change", "post_change", "close", "open", "high", "low", "gap", "price_earnings_to_growth_ttm", "price_sales", "price_book_fq", _
        "price_free_cash_flow_ttm", "float_shares_outstanding", "total_shares_outstanding", "change_from_open", "change_from_open_abs", "Perf.W", "Perf.1M", "Perf.3M", "Perf.6M", "Perf.Y", "Perf.YTD", "Volatility.W", "Volatility.M", "Volatility.D", "RSI", "RSI7", "ADX", "ADX+DI", "ADX-DI", "ATR", "Mom", "High.All", "Low.All", "High.6M", "Low.6M", _
        "High.3M", "Low.3M", "High.1M", "Low.1M", "EMA5", "EMA10", "EMA20", "EMA30", "EMA50", "EMA100", "EMA200", "SMA5", "SMA10", "SMA20", "SMA30", "SMA50", "SMA100", "SMA200", "Stoch.K", "Stoch.D", "MACD.macd", "MACD.signal", "Aroon.Up", "Aroon.Down", "BB.upper", "BB.lower", "goodwill", "debt_to_equity_fq", "CCI20", "DonchCh20.Upper", _
        "DonchCh20.Lower", "HullMA9", "AO", "Pivot.M.Classic.S3", "Pivot.M.Classic.S2", "Pivot.M.Classic.S1", "Pivot.M.Classic.Middle", "Pivot.M.Classic.R1", "Pivot.M.Classic.R2", "Pivot.M.Classic.R3", "Pivot.M.Fibonacci.S3", "Pivot.M.Fibonacci.S2", "Pivot.M.Fibonacci.S1", "Pivot.M.Fibonacci.Middle", "Pivot.M.Fibonacci.R1", _
        "Pivot.M.Fibonacci.R2", "Pivot.M.Fibonacci.R3", "Pivot.M.Camarilla.S3", "Pivot.M.Camarilla.S2", "Pivot.M.Camarilla.S1", "Pivot.M.Camarilla.Middle", "Pivot.M.Camarilla.R1", "Pivot.M.Camarilla.R2", "Pivot.M.Camarilla.R3", "Pivot.M.Woodie.S3", "Pivot.M.Woodie.S2", "Pivot.M.Woodie.S1", "Pivot.M.Woodie.Middle", "Pivot.M.Woodie.R1", _
        "Pivot.M.Woodie.R2", "Pivot.M.Woodie.R3", "Pivot.M.Demark.S1", "Pivot.M.Demark.Middle", "Pivot.M.Demark.R1", "KltChnl.upper", "KltChnl.lower", "P.SAR", "Value.Traded", "MoneyFlow", "ChaikinMoneyFlow", "Recommend.All", "Recommend.MA", "Recommend.Other", "Stoch.RSI.K", "Stoch.RSI.D", "W.R", "ROC", "BBPower", "UO", "Ichimoku.CLine", _
        "Ichimoku.BLine", "Ichimoku.Lead1", "Ichimoku.Lead2", "VWMA", "ADR", "RSI[1]", "Stoch.K[1]", "Stoch.D[1]", "CCI20[1]", "ADX-DI[1]", "AO[1]", "Mom[1]", "Rec.Stoch.RSI", "Rec.WR", "Rec.BBPower", "Rec.UO", "Rec.Ichimoku", "Rec.VWMA", "Rec.HullMA9" _
    )

    ' Field titles exactly as in the table MOVING AVERAGES
    ' aQuoteFieldTitles = Array("EMA5", "SMA5", "EMA10", "SMA10", "EMA20", "SMA20", "EMA30", "SMA30", "EMA50", "SMA50", "EMA100", "SMA100", "EMA200", "SMA200", "Ichimoku.BLine", "VWMA", "HullMA9")

    ' Compose payload
    sPayload = "{""symbols"":{""tickers"":[""NSE:ABB""],""query"":{""types"":[]}},""columns"":" & JSON.Serialize(aQuoteFieldTitles) & "}"
    ' Retrieve JSON response
    With CreateObject("MSXML2.XMLHTTP")
        .Open "POST", "https://scanner.tradingview.com/india/scan", True
        .setRequestHeader "content-type", "application/x-www-form-urlencoded"
        .setRequestHeader "user-agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
        .setRequestHeader "content-length", Len(sPayload)
        .send (sPayload)
        Do Until .readyState = 4: DoEvents: Loop
        sJSONString = .responseText
    End With
    ' Parse JSON response
    JSON.Parse sJSONString, vJSON, sState
    ' Check response validity
    Select Case True
        Case sState <> "Object"
            MsgBox "Invalid JSON response"
        Case IsNull(vJSON("data"))
            MsgBox vJSON("error")
        Case Else
            ' Output data to worksheet #1
            aQuoteFieldData = vJSON("data")(0)("d")
            With ThisWorkbook.Sheets(1)
                .Cells.Delete
                .Cells.WrapText = False
                For i = 0 To UBound(aQuoteFieldTitles)
                    .Cells(i + 1, 1).Value = aQuoteFieldTitles(i)
                    .Cells(i + 1, 2).Value = aQuoteFieldData(i)
                Next
                .Columns.AutoFit
            End With
            MsgBox "Completed"
    End Select

End Sub

对我来说输出如下:

output

顺便说一句,类似的方法适用于in other answers

答案 1 :(得分:0)

如评论中所述,javascript必须在页面上运行才能更新所需的内容。似乎没有免费可用的API。您可以使用浏览器。您需要转到VBE>工具>引用>添加对Microsoft Internet控件的引用。

Option Explicit
Public Sub GetInfo()
    Dim IE As InternetExplorer, ws As Worksheet, hTable As Object, tRow As Object, td As Object, r As Long, c As Long, headers()
    headers = Array("name", "value", "action")
    Set ws = ThisWorkbook.Worksheets("Sheet1"): Set IE = New InternetExplorer
    With IE
        .Visible = True
        .Navigate2 "https://in.tradingview.com/symbols/NSE-ABB/technicals/"

        While .Busy Or .readyState < 4: DoEvents: Wend
        Set hTable = IE.document.querySelector("table + .tableWithAction-2OCRQQ8y-")
        ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        For Each tRow In hTable.getElementsByTagName("tr")
            r = r + 1: c = 1
            For Each td In tRow.getElementsByTagName("td")
                ws.Cells(r, c).Value = td.innerText
                c = c + 1
            Next td
        Next tRow
        .Quit
    End With
End Sub