在我提出问题之前,我是一名业余编码员,除了VBA在ms办公室应用程序之外基本没有任何有意义的经验(我知道 - noob!)
我正在尝试创建一个使用VBA将数据导入excel的网络抓取工具,根据我在以下代码摘录中的评论,我能够找到的最好的就是获胜的答案{ {3}}
下面,我使用invest.com作为一个例子,但实际上我的项目将跨越多个站点,并将提供给每天更新的矩阵,并在事件到期时自我蚕食 - 因此我宁愿在代码端预先填充工作负载,以尽可能少地为我提供输入(对我而言)。
考虑到这一点,我可以问一下是否有办法做以下任何事情(支撑自己,这将是一些令人畏惧的基本知识):
有没有办法可以导航到网址并在该网页上的每个表格上运行for each
循环(没有任何已知的ID)?这是为了加快我的代码速度,以尽量减少我的输入,因为会有相当多的数据要更新,我计划在刷新时放置一个2分钟的循环触发器。
不是做我在下面做过的事情,而是可以引用一个表而不是一行,然后按照单元格(2,5)的方式做一些.value来返回值第1行第4列? (假设两个维度中的数组索引都从0开始?)除此之外,我的第一列(在某些方面我的主键)在所有源上的顺序可能不同,所以有一种方法我可以做相当于Columns("A:A").Find(What:=[Primary key], After:=Cells(1, 1), LookIn:=xlValues, LookAt:=xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=True, SearchFormat:=False).Row
找到表中哪一行与我正在寻找的相关?
代码:
Sub Scraper()
Dim appIE, allRowOfData As Object
' As per https://stackoverflow.com/questions/27066963/scraping-data-from-website-using-vba
Set appIE = CreateObject("internetexplorer.application")
With appIE
.Navigate "http://uk.investing.com/rates-bonds/financial-futures" 'Sample page
.Visible = False
End With
Do While appIE.Busy
Application.Wait (Now + TimeValue("0:00:01")) 'If page not open, wait a second befor trying again
Loop
Set allRowOfData = appIE.document.getElementById("pair_8907")
'tr id="[ID of row within table]"
Dim myValue As String: myValue = allRowOfData.Cells(8).innerHTML
'The 8 is the column number of the table
'(note: column numbers start at 0 so the 9th column should have "8" entered here
Set appIE = Nothing
Range("A1").Value = myValue
End Sub
答案 0 :(得分:2)
如果要使用Excel函数导航表,为什么不先将表转储到工作表上,这段代码适用于我
Option Explicit
Sub Scraper()
Dim appIE As Object
' As per http://stackoverflow.com/questions/27066963/scraping-data-from-website-using-vba
Set appIE = CreateObject("internetexplorer.application")
With appIE
.Navigate "http://uk.investing.com/rates-bonds/financial-futures" 'Sample page
.Visible = True
End With
Do While appIE.Busy
DoEvents
Application.Wait (Now + TimeValue("0:00:01")) 'If page not open, wait a second befor trying again
Loop
'Debug.Print TypeName(appIE.document)
Dim doc As Object 'MSHTML.HTMLDocument
Set doc = appIE.document
'* appIE busy is good but you need to wait for the whole document to completely load and initialise so use this
While doc.readyState <> "complete"
DoEvents
Wend
'* we can select all the tables because they share the same CSS class name
Dim tablesSelectedByClass As Object 'MSHTML.HTMLElementCollection
Set tablesSelectedByClass = doc.getElementsByClassName("genTbl")
'* you can change this, it was just convenient for me to add sheets to my workbook
Dim shNewResults As Excel.Worksheet
Set shNewResults = ThisWorkbook.Worksheets.Add
Dim lRowCursor As Long '* this controls pasting down the sheet
lRowCursor = 1
Dim lTableIndexLoop As Long
For lTableIndexLoop = 0 To tablesSelectedByClass.Length - 1
Dim tableLoop As Object 'MSHTML.HTMLTable
Set tableLoop = tablesSelectedByClass.Item(lTableIndexLoop)
If LenB(tableLoop.ID) > 0 Then '* there are some extra nonsense tables, this subselects
Dim sParentColumn As String, objParentColumn As Object ' MSHTML.HTMLSemanticElement
Set objParentColumn = FindMyColumn(tableLoop, sParentColumn) '* need to understand is table on left hand or right hand side
Dim vHeader As Variant: vHeader = Empty
If sParentColumn = "leftColumn" Then
'* tables on the left have a preceding H3 element with the table's description
Dim objH3Headers As Object
Set objH3Headers = objParentColumn.getElementsByTagName("H3")
vHeader = objH3Headers.Item(lTableIndexLoop).innerText
Else
'* tables on the right have a hidden attribute we can use
vHeader = tableLoop.Attributes.Item("data-gae").Value
If Len(vHeader) > 3 Then
vHeader = Mid$(vHeader, 4)
Mid$(vHeader, 1, 1) = Chr(Asc(Mid$(vHeader, 1, 1)) - 32)
End If
End If
'* tables on the right do not have column headers
Dim bHasColumnHeaders As Boolean
bHasColumnHeaders = (tableLoop.ChildNodes.Length = 2)
Dim vTableCells() As Variant '* this will be our table data container which we will paste in one go
Dim lRowCount As Long: lRowCount = 0
Dim lColumnCount As Long: lColumnCount = 0
Dim lDataHeadersSectionIdx As Long: lDataHeadersSectionIdx = 0
Dim objColumnHeaders As Object: Set objColumnHeaders = Nothing
If bHasColumnHeaders Then
Set objColumnHeaders = tableLoop.ChildNodes.Item(0).ChildNodes.Item(0)
lRowCount = lRowCount + 1
lDataHeadersSectionIdx = 1
Else
lDataHeadersSectionIdx = 0
End If
Dim objDataRows As Object 'MSHTML.HTMLElementCollection
Set objDataRows = tableLoop.ChildNodes.Item(lDataHeadersSectionIdx).ChildNodes
lColumnCount = objDataRows.Item(0).ChildNodes.Length
lRowCount = lRowCount + objDataRows.Length
ReDim vTableCells(1 To lRowCount, 1 To lColumnCount) As Variant
'* we have them get the column headers
Dim lColLoop As Long
If bHasColumnHeaders Then
For lColLoop = 1 To lColumnCount
vTableCells(1, lColLoop) = objColumnHeaders.ChildNodes.Item(lColLoop - 1).innerText
Next
End If
'* get the data cells
Dim lRowLoop As Long
For lRowLoop = 1 To lRowCount - VBA.IIf(bHasColumnHeaders, 1, 0)
For lColLoop = 1 To lColumnCount
vTableCells(lRowLoop + VBA.IIf(bHasColumnHeaders, 1, 0), lColLoop) = objDataRows.Item(lRowLoop - 1).ChildNodes.Item(lColLoop - 1).innerText
Next
Next
'* paste our table description
shNewResults.Cells(lRowCursor, 1).Value2 = vHeader
lRowCursor = lRowCursor + 1
'* paste our table data
shNewResults.Cells(lRowCursor, 1).Resize(lRowCount, lColumnCount).Value2 = vTableCells
lRowCursor = lRowCursor + lRowCount + 1
End If
Next
End Sub
Function FindMyColumn(ByVal node As Object, ByRef psColumn As String) As Object
'* this code ascends the DOM looking for "column" in the id of each node
While InStr(1, node.ID, "column", vbTextCompare) = 0 And Not node.ParentNode Is Nothing
DoEvents
Set node = node.ParentNode
Wend
If InStr(1, node.ID, "column", vbTextCompare) > 0 Then
Set FindMyColumn = node
psColumn = CStr(node.ID)
End If
End Function
顺便说一句,如果你交易很多经纪人致富而你变穷,经纪费用从长远来看确实会产生影响。