需要一些帮助来从forexfactory.com 抓取一个简单的表格。下面的代码有效,但格式非常不稳定,而且那里的列似乎有一个无法正常工作的列。我知道我使用的代码非常新手。乐于改进。
Sub Pulldata2()
Dim ieObj As InternetExplorer
Dim appIE As Object
Dim htmlEle As IHTMLElement
Dim i As Integer
Dim strSheet As String
Dim LastRow As Long
strSheet = Sheet1.Range("A3")
Set ieObj = New InternetExplorer
ieObj.Visible = False
ieObj.navigate Sheet1.Range("A3").Value
Application.ScreenUpdating = False
Application.DisplayStatusBar = False
Application.Wait Now + TimeValue("00:00:03")
Sheet2.Activate
For Each htmlEle In ieObj.document.getElementsByClassName("calendar__table")(0).getElementsByTagName("tr")
LastRow = Cells(Rows.Count, "A").End(xlUp).Row
i = 1 + LastRow
With ActiveSheet
.Range("A" & i).Value = htmlEle.Children(1).textContent
.Range("B" & i).Value = htmlEle.Children(2).textContent
.Range("C" & i).Value = htmlEle.Children(3).textContent
.Range("D" & i).Value = htmlEle.Children(4).textContent
.Range("E" & i).Value = htmlEle.Children(5).textContent
.Range("F" & i).Value = htmlEle.Children(6).textContent
.Range("G" & i).Value = htmlEle.Children(7).textContent
.Range("H" & i).Value = htmlEle.Children(8).textContent
End With
i = i + 1
On Error Resume Next
Next htmlEle
End Sub
答案 0 :(得分:0)
首先要注意的是,内容存在于视图源中,这意味着它可以被 xmlhttp 请求而无需求助于浏览器的开销。
接下来要注意的是,每个目标列都可以由特定的类作为目标。为了从正确的行开始,还必须指定忽略标题的父类。
由于 css 选择器很长但并不复杂,我通过循环生成它。
我使用 css 选择器返回目标列中的所有 td 节点。我根据要检索的列数使用一点数学循环该列表(请注意,我不是在此 nodeList 中检索日期;我已经在脚本的前面获取了该列表,然后将其添加到占位符的第一列)。这样我只用所需的列重新创建表。
Option Explicit
Public Sub WriteTable()
'tools > references > Microsoft HTML Object Library
Dim html As MSHTML.HTMLDocument, xhr As Object
Set xhr = CreateObject("MSXML2.XMLHTTP")
Set html = New MSHTML.HTMLDocument
With xhr
.Open "GET", "https://www.forexfactory.com/", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
End With
Dim dateText As String
dateText = html.querySelector(".flexDatePicker").Value
Dim css As String, base As String, classSet()
base = ", .calendar__row > " 'parent class to prefix each child class
classSet = Array(".calendar__time", ".calendar__currency", ".calendar__event", _
".calendar__actual", ".calendar__forecast", ".calendar__previous") 'classes to match on
css = ".calendar__row > " & Join$(classSet, base) 'final selector
Dim tableCells As Object, r As Long, res As Long, c As Long, results()
Set tableCells = html.querySelectorAll(css)
r = 1
ReDim results(1 To tableCells.Length / 6, 1 To 7) 'calc size of results array based on 6 columns + 1 for date (previously calculated)
results(1, 1) = dateText
For c = 0 To tableCells.Length - 1
res = c Mod 6
results(r, res + 2) = tableCells.Item(c).innerText
If res = 5 Then r = r + 1 'new row in array
Next
'array fill down time and populate blank date rows
For r = UBound(results, 1) To LBound(results, 1) + 1 Step -1
results(r, 2) = IIf(results(r, 2) = vbNullString, results(r - 1, 2), results(r, 2))
results(r, 1) = dateText
Next
Dim headers()
headers = Array("Date", "Time", "Currency", "Event", "Actual", "Forecast", "Previous")
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
阅读:
更传统的方法可能如下所示(尽管需要重构以减少嵌套级别):
Option Explicit
Public Sub WriteTable()
'tools > references > Microsoft HTML Object Library
Dim html As MSHTML.HTMLDocument, xhr As Object
Set xhr = CreateObject("MSXML2.XMLHTTP")
Set html = New MSHTML.HTMLDocument
With xhr
.Open "GET", "https://www.forexfactory.com/", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
End With
Dim dateText As String
dateText = html.querySelector(".flexDatePicker").Value
Dim table As MSHTML.HTMLTable, r As Long, c As Long, results() As Variant, n As Long
Set table = html.querySelector(".calendar__table")
ReDim results(1 To table.Rows.Length - 4, 1 To 7) 'calc size of results array based on 6 columns + 1 for date (previously calculated)
results(1, 1) = dateText
Dim i As Long
For r = 4 To table.Rows.Length - 1
n = 2
If table.Rows(r).Children.Length = 10 Then
If table.Rows(r).Children(4).innerText <> vbNullString Then
i = i + 1
For c = 1 To table.Rows(r).Children.Length - 1
Select Case c
Case 1, 2, 4, 6, 7, 8
results(i, n) = table.Rows(r).Children(c).innerText
n = n + 1
End Select
Next
End If
End If
Next
results = Application.Transpose(results)
ReDim Preserve results(1 To 7, 1 To i)
results = Application.Transpose(results)
'array fill down time and populate blank date rows
For r = UBound(results, 1) To LBound(results, 1) + 1 Step -1
results(r, 2) = IIf(results(r, 2) = vbNullString, results(r - 1, 2), results(r, 2))
results(r, 1) = dateText
Next
Dim headers() As Variant
headers = Array("Date", "Time", "Currency", "Event", "Actual", "Forecast", "Previous")
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub