我正在尝试通过VBA从网站上抓取一张桌子。我能够从表格中收集所有数据,但是我只能将其填充到单个单元格中,而不是按照网站上的格式来分解信息。本质上,数据应分为四列,然后沿着与数据一样多的行下降。我知道我在这里遗漏了一些简单的东西,但我只是不能将手指放在上面。请帮助!编码如下:
Sub WebScrape()
Dim objIE As InternetExplore'
Dim ele As Object
Dim y As Integer
objIE.navigate "http://www.uscfinvestments.com/holdings/usci"
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
y = 1
For Each ele In objIE.document.getElementById("holdingsTableDiv").getElementsByTagName("table")
Debug.Print ele.textContent
Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
Next
ActiveWorkbook.Save
End Sub
答案 0 :(得分:1)
我在下面展示了许多方法。就个人而言,我更喜欢使用API的最后一个。
使用剪贴板:
如果要显示在页面上,一种简单的好方法就是将表格复制到剪贴板并粘贴
Option Explicit
Public Sub GetInfo()
Dim ie As InternetExplorer, hTable As Object, clipboard As Object, ws As Worksheet, t As Date
Const MAX_WAIT_SEC As Long = 10
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 "http://www.uscfinvestments.com/holdings/usci"
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
On Error Resume Next
Set hTable = .document.getElementById("holdingsTableID")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While hTable Is Nothing
If Not hTable Is Nothing Then
clipboard.SetText hTable.outerHTML
clipboard.PutInClipboard
ws.Cells(1, 1).PasteSpecial
End If
.Quit
End With
End Sub
循环表:
如果要循环表的行和列,则可以使用行类名和行号来确定写出方式
Option Explicit
Public Sub GetInfo()
Dim ie As InternetExplorer, hTable As Object, ws As Worksheet, t As Date
Const MAX_WAIT_SEC As Long = 10
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 "http://www.uscfinvestments.com/holdings/usci"
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
On Error Resume Next
Set hTable = .document.getElementById("holdingsTableID")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While hTable Is Nothing
If Not hTable Is Nothing Then
WriteTable hTable, 1, ws
End If
.Quit
End With
End Sub
Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, td As Object, r As Long, c As Long, th As Object
r = startRow
With ws
For Each tRow In hTable.getElementsByTagName("tr")
c = 1
If r = startRow Or tRow.className = "subHeader" Then
For Each th In tRow.getElementsByTagName("th")
.Cells(r, c) = th.innerText
c = c + 1
Next
Else
For Each td In tRow.getElementsByTagName("td")
.Cells(r, c).Value = td.innerText
c = c + 1
Next td
End If
r = r + 1
Next tRow
End With
End Sub
API:
有一个以json格式提供数据的API
https://cssecure.alpsinc.com/api/v1//holding/usci
它需要验证。我正在使用jsonconverter.bas来解析返回的json。下载并添加.bas后,您需要进入VBE>工具>参考>添加对Microsoft Scripting Runtime
的参考。
Option Explicit
Public Sub GetValues()
Dim json As Object, authorization As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://www.uscfinvestments.com/uscfinvestments-template/assets/javascript/api_key.php", False
.send
authorization = Split(Split(.responseText, "'")(1), "'")(0)
.Open "GET", "https://cssecure.alpsinc.com/api/v1//holding/usci", False
.setRequestHeader "Authorization", authorization
.send
Set json = JsonConverter.ParseJson(.responseText)
End With
Dim arr(), headers(), item As Object, r As Long
headers = Array("Security", "Quantity", "Price", "Market Value")
r = 1
ReDim arr(1 To json.Count, 1 To 4)
For Each item In json
arr(r, 1) = item("name")
arr(r, 2) = item("shares")
Dim test As String
If IsNull(item("contractprice")) Then
arr(r, 3) = item("settlementprice")
Else
arr(r, 3) = item("contractprice")
End If
arr(r, 4) = item("marketvalue")
r = r + 1
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(arr, 1), UBound(arr, 2)) = arr
End With
End Sub
答案 1 :(得分:0)
这里没有什么可添加的。 QHarr已经涵盖了它们。我使用硬编码延迟而不是显式等待,以使脚本不太冗长。
Sub GetContent()
Dim Html As HTMLDocument, elem As Object, tRow As Object, C&, R&
With New InternetExplorer
.Visible = False
.navigate "http://www.uscfinvestments.com/holdings/usci"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set Html = .Document
End With
Application.Wait Now + TimeValue("00:00:03") 'make it 05 if it fails somehow
For Each elem In Html.getElementById("holdingsTableID").Rows
For Each tRow In elem.Cells
C = C + 1: ThisWorkbook.Worksheets("Sheet1").Cells(R + 1, C) = tRow.innerText
Next tRow
C = 0: R = R + 1
Next elem
End Sub