Excel VBA代码-修改以在不同页面中拾取其他表吗?

时间:2018-09-19 10:39:22

标签: html excel vba excel-vba web-scraping

有关修改一段代码以获取其他网页表内容的快速问题。在这里,我对如何根据标题获取某些表格内容有一些很好的指导,而且效果很好-再次感谢'QHarr'的帮助。

我要从中获取表详细信息的URL是:

https://finance.yahoo.com/quote/AAPL/?p=AAPL

这是一段获取我想要的数据的代码:

Sub GetYahooInfo100()
Dim tickers(), ticker As Long, lastRow As Long, headers()
Dim wsSource As Worksheet, http As clsHTTP, html As HTMLDocument

Application.ScreenUpdating = False

Set wsSource = ThisWorkbook.Worksheets("100")
Set http = New clsHTTP

headers = Array("Ticker", "Previous Close", "Open", "Bid", "Ask", "Day's Range", "52 Week Range", "Volume", "Avg. Volume", "Market Cap", "Beta", "PE Ratio (TTM)", "EPS (TTM)", _
                "Earnings Date", "Forward Dividend & Yield", "Ex-Dividend Date", "1y Target Est")

With wsSource
    lastRow = GetLastRow(wsSource, 1)
    Select Case lastRow
    Case Is < 3
        Exit Sub
    Case 3
        ReDim tickers(1, 1): tickers(1, 1) = .Range("A3").Value
    Case Is > 3
        tickers = .Range("A3:A" & lastRow).Value
    End Select

    ReDim Results(0 To UBound(tickers, 1) - 1)
    Dim i As Long, endPoint As Long
    endPoint = UBound(headers)

    For ticker = LBound(tickers, 1) To UBound(tickers, 1)
        If Not IsEmpty(tickers(ticker, 1)) Then
            Set html = http.GetHTMLDoc("https://finance.yahoo.com/quote/" & tickers(ticker, 1) & "/?p=" & tickers(ticker, 1))
            Results(ticker - 1) = http.GetInfo(html, endPoint)
            On Error Resume Next
            Set html = Nothing
        Else
            Results(ticker) = vbNullString
        End If
    Next

    .Cells(2, 1).Resize(1, UBound(headers) + 1) = headers
    For i = LBound(Results) To UBound(Results)
        .Cells(3 + i, 2).Resize(1, endPoint - 1) = Results(i)
    Next
End With
Application.ScreenUpdating = True
End Sub

下面还有另外一段:

Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
    GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With

结束功能

就像我说的,其中一位成员对确定如何做到这一点非常有帮助。我已经尝试过修改这段代码,以从统计信息页面中提取另一组数据,如下所示:

https://finance.yahoo.com/quote/AAPL/key-statistics?p=AAPL

但是我必须丢失一些东西。除非我的引用对这些表不正确,否则我很茫然。我正在寻找是否可以捕获每个表中的所有数据字段,而不仅仅是捕获可能会出错的一个字段。

希望有人可以提供帮助。

非常感谢。

1 个答案:

答案 0 :(得分:2)

尝试以下操作:

Option Explicit

Public Sub GetYahooInfo()
    Dim tickers(), ticker As Long, lastRow As Long, headers()
    Dim wsSource As Worksheet, http As Object, html As New HTMLDocument
    headers = Array("Previous Close", "Open", "Bid", "Ask", "Day's Range", "52 Week Range", "Volume", "Avg. Volume", "Market Cap", _
                    "Beta", "PE Ratio (TTM)", "EPS (TTM)", "Earnings Date", "Forward Dividend & Yield", "Ex-Dividend Date", "1y Target Est", _
                    "Market Cap (intraday)", "Enterprise Value", "Trailing P/E", "Forward P/E", "PEG Ratio (5 yr expected)", "Price/Sales (ttm)", _
                    "Price/Book (mrq)", "Enterprise Value/Revenue", "Enterprise Value/EBITDA", "Fiscal Year Ends", "Most Recent Quarter (mrq)", _
                    "Profit Margin", "Operating Margin (ttm)", "Return on Assets (ttm)", "Return on Equity (ttm)", "Revenue (ttm)", "Revenue Per Share (ttm)", _
                    "Quarterly Revenue Growth (yoy)", "Gross Profit (ttm)", "EBITDA", "Net Income Avi to Common (ttm)", "Diluted EPS (ttm)", _
                    "Quarterly Earnings Growth (yoy)", "Total Cash (mrq)", "Total Cash Per Share (mrq)", "Total Debt (mrq)", _
                    "Total Debt/Equity (mrq)", "Current Ratio (mrq)", "Book Value Per Share (mrq)", "Operating Cash Flow (ttm)", _
                    "Levered Free Cash Flow (ttm)", "Beta", "52-Week Change", "S&P500 52-Week Change", "52 Week High", "52 Week Low", _
                    "50-Day Moving Average", "200-Day Moving Average", "Avg Vol (3 month)", "Avg Vol (10 day)", _
                    "Shares Outstanding", "Float", "% Held by Insiders", "% Held by Institutions", "Shares Short (Aug 31, 2018)", _
                    "Short Ratio (Aug 31, 2018)", "Short % of Float (Aug 31, 2018)", "Short % of Shares Outstanding (Aug 31, 2018)", _
                    "Shares Short (prior month Jul 31, 2018)", "Forward Annual Dividend Rate", "Forward Annual Dividend Yield", _
                    "Trailing Annual Dividend Rate", "Trailing Annual Dividend Yield", "5 Year Average Dividend Yield", "Payout Ratio", _
                    "Dividend Date", "Ex-Dividend Date", "Last Split Factor (new per old)", "Last Split Date")
    Application.ScreenUpdating = False

    Set wsSource = ThisWorkbook.Worksheets("Sheet1") '<== Change as appropriate to sheet containing the tickers

    With wsSource
        lastRow = GetLastRow(wsSource, 1)
        Select Case lastRow
        Case Is < 3
            Exit Sub
        Case 3
            ReDim tickers(1, 1): tickers(1, 1) = .Range("A3").Value
        Case Is > 3
            tickers = .Range("A3:A" & lastRow).Value
        End Select

        Dim i As Long, sResponse As String
        Set http = CreateObject("MSXML2.XMLHTTP")

        For ticker = LBound(tickers, 1) To UBound(tickers, 1)
            With Worksheets("Sheet1")
                If Not IsEmpty(tickers(ticker, 1)) Then
                    With http
                        .Open "GET", "https://finance.yahoo.com/quote/" & tickers(ticker, 1) & "/key-statistics?p=" & tickers(ticker, 1), False
                        .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
                        .send
                        sResponse = StrConv(.responseBody, vbUnicode)
                    End With

                    sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))   
                    html.body.innerHTML = sResponse

                    Dim tables As Object, destLastRow As Long
                    Dim counter As Long: counter = 2

                    Set tables = html.querySelectorAll("tbody td")             
                    .Cells(2, 2).Resize(1, UBound(headers) + 1) = headers

                    For i = 1 To tables.Length - 1 Step 2
                        .Cells(ticker + 2, counter) = tables(i).innerText
                        If InStr(tables(i).innerText, "Last Split Date") > 0 Or InStr(tables(i + 1).innerText, "Last Split Date") > 0 Then
                            Exit For
                        End If
                        counter = counter + 1
                    Next
                    Set html = Nothing: Set tables = Nothing
                Else
                    .Cells(2 + ticker, 2) = "N/A"
                End If
            End With
        Next
    End With
    Application.ScreenUpdating = True
End Sub

Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
    End With
End Function

样本结果:

enter image description here