无法获取Web废表以填充多个单元格VBA

时间:2019-01-22 19:57:43

标签: vba web-scraping html-table

我正在尝试通过VBA从网站上抓取一张桌子。我能够从表格中收集所有数据,但是我只能将其填充到单个单元格中,而不是按照网站上的格式来分解信息。本质上,数据应分为四列,然后沿着与数据一样多的行下降。我知道我在这里遗漏了一些简单的东西,但我只是不能将手指放在上面。请帮助!编码如下:

Sub WebScrape()

Dim objIE As InternetExplore'

Dim ele As Object
Dim y As Integer
objIE.navigate "http://www.uscfinvestments.com/holdings/usci"
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

y = 1
For Each ele In objIE.document.getElementById("holdingsTableDiv").getElementsByTagName("table")
    Debug.Print ele.textContent
    Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
Next
ActiveWorkbook.Save

End Sub

2 个答案:

答案 0 :(得分:1)

我在下面展示了许多方法。就个人而言,我更喜欢使用API​​的最后一个。


使用剪贴板:

如果要显示在页面上,一种简单的好方法就是将表格复制到剪贴板并粘贴

Option Explicit
Public Sub GetInfo()
    Dim ie As InternetExplorer, hTable As Object, clipboard As Object, ws As Worksheet, t As Date
    Const MAX_WAIT_SEC As Long = 10
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
    Set ie = New InternetExplorer

    With ie
        .Visible = True
        .Navigate2 "http://www.uscfinvestments.com/holdings/usci"

        While .Busy Or .readyState < 4: DoEvents: Wend

        t = Timer
        Do
            On Error Resume Next
            Set hTable = .document.getElementById("holdingsTableID")
            On Error GoTo 0
            If Timer - t > MAX_WAIT_SEC Then Exit Do
        Loop While hTable Is Nothing

        If Not hTable Is Nothing Then
            clipboard.SetText hTable.outerHTML
            clipboard.PutInClipboard
            ws.Cells(1, 1).PasteSpecial
        End If
        .Quit
    End With
End Sub

循环表:

如果要循环表的行和列,则可以使用行类名和行号来确定写出方式

Option Explicit  
Public Sub GetInfo()
    Dim ie As InternetExplorer, hTable As Object, ws As Worksheet, t As Date
    Const MAX_WAIT_SEC As Long = 10
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    Set ie = New InternetExplorer

    With ie
        .Visible = True
        .Navigate2 "http://www.uscfinvestments.com/holdings/usci"

        While .Busy Or .readyState < 4: DoEvents: Wend

        t = Timer
        Do
            On Error Resume Next
            Set hTable = .document.getElementById("holdingsTableID")
            On Error GoTo 0
            If Timer - t > MAX_WAIT_SEC Then Exit Do
        Loop While hTable Is Nothing

        If Not hTable Is Nothing Then
            WriteTable hTable, 1, ws
        End If
        .Quit
    End With
End Sub

Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
    If ws Is Nothing Then Set ws = ActiveSheet

    Dim tRow As Object, td As Object, r As Long, c As Long, th As Object
    r = startRow
    With ws
        For Each tRow In hTable.getElementsByTagName("tr")
            c = 1
            If r = startRow Or tRow.className = "subHeader" Then
                For Each th In tRow.getElementsByTagName("th")
                    .Cells(r, c) = th.innerText
                    c = c + 1
                Next
            Else
                For Each td In tRow.getElementsByTagName("td")
                    .Cells(r, c).Value = td.innerText
                    c = c + 1
                Next td
            End If
            r = r + 1
        Next tRow
    End With
End Sub

API:

有一个以json格式提供数据的API

https://cssecure.alpsinc.com/api/v1//holding/usci

它需要验证。我正在使用jsonconverter.bas来解析返回的json。下载并添加.bas后,您需要进入VBE>工具>参考>添加对Microsoft Scripting Runtime的参考。

Option Explicit
Public Sub GetValues()
    Dim json As Object, authorization As String
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "http://www.uscfinvestments.com/uscfinvestments-template/assets/javascript/api_key.php", False
        .send
        authorization = Split(Split(.responseText, "'")(1), "'")(0)
        .Open "GET", "https://cssecure.alpsinc.com/api/v1//holding/usci", False
        .setRequestHeader "Authorization", authorization
        .send
        Set json = JsonConverter.ParseJson(.responseText)
    End With
    Dim arr(), headers(), item As Object, r As Long
    headers = Array("Security", "Quantity", "Price", "Market Value")
    r = 1
    ReDim arr(1 To json.Count, 1 To 4)
    For Each item In json
        arr(r, 1) = item("name")
        arr(r, 2) = item("shares")
        Dim test As String
        If IsNull(item("contractprice")) Then
           arr(r, 3) = item("settlementprice")
        Else
            arr(r, 3) = item("contractprice")
        End If
        arr(r, 4) = item("marketvalue")
        r = r + 1
    Next
    With ThisWorkbook.Worksheets("Sheet1")
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(arr, 1), UBound(arr, 2)) = arr
    End With
End Sub

答案 1 :(得分:0)

这里没有什么可添加的。 QHarr已经涵盖了它们。我使用硬编码延迟而不是显式等待,以使脚本不太冗长。

Sub GetContent()
    Dim Html As HTMLDocument, elem As Object, tRow As Object, C&, R&

    With New InternetExplorer
        .Visible = False
        .navigate "http://www.uscfinvestments.com/holdings/usci"
        While .Busy = True Or .readyState < 4: DoEvents: Wend
        Set Html = .Document
    End With

    Application.Wait Now + TimeValue("00:00:03") 'make it 05 if it fails somehow

    For Each elem In Html.getElementById("holdingsTableID").Rows
        For Each tRow In elem.Cells
            C = C + 1: ThisWorkbook.Worksheets("Sheet1").Cells(R + 1, C) = tRow.innerText
        Next tRow
        C = 0: R = R + 1
    Next elem
End Sub