我有一些代码可用于从网页上抓取数据,但是网页已更改,无法再使用。该代码应该在内部事务表上进行计算,但是getelementsbytagname("td")
不再返回所有单元格。
我猜是因为它是嵌入页面或其他内容中的页面,但是我无法终生解决它,因为我对html不太熟悉。示例网页为gurufocus.com/stock/lmb/insider
。
我的代码如下:
Sub getStatements()
Dim wb As Object
Dim doc As Object
Dim incomeStmtURLs As Variant
Dim sURL As String
Dim allCells As IHTMLElementCollection
Dim aCell As HTMLTableCell
Dim i As Integer
Dim loginBoxData As String
Application.DisplayAlerts = False
Call ToggleEvents(False)
incomeStmtURLs = Range("Sheet1!h1:h2").Value
For i = 1 To UBound(incomeStmtURLs)
Set wb = CreateObject("internetExplorer.Application")
sURL = incomeStmtURLs(i, 1)
wb.navigate sURL
wb.Visible = False
While wb.Busy
Application.Wait Now + #12:00:01 AM#
DoEvents
Wend
'HTML document
Set doc = wb.document
On Error GoTo err_clear
' gets all cell and looks for date format,
' goes from new transaction to old so once gets to older than a year it exits for loop
' checks nextSibling from date is a buy and if so does calculations, by taking further value sin row
' for priceThisTime have to get rid of $ symbol for calculation
Set allCells = doc.getElementsByTagName("td")
For Each aCell In allCells
MsgBox (aCell.innerText)
If aCell.innerText Like "####-##-##" = True Then
If CDate(aCell.innerText) >= Date - 365 Then
If aCell.NextSibling.innerText = "Buy" Then
buys = buys + 1
sharesThisTime = CDec(aCell.NextSibling.NextSibling.innerText)
priceThisTime = aCell.NextSibling.NextSibling.NextSibling.NextSibling.innerText
totalPrice = totalPrice + (sharesThisTime * CDec(Right(priceThisTime, Len(priceThisTime) - 1)))
shareCount = shareCount + sharesThisTime
End If
Else
Exit For
End If
End If
Next aCell
Sheet6.Cells(i + 1, 2) = buys
If (shareCount <> 0) Then
Sheet6.Cells(i + 1, 3).Value = totalPrice / shareCount
End If
buys = 0
totalPrice = 0
shareCount = 0
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit
Next i
Call ToggleEvents(True)
End Sub
答案 0 :(得分:0)
以下内容专门针对该表并检索所有td元素。我认为您的逻辑可能可以以列号为基础,但无论如何都可以应用(以防万一我也将表设置为变量)。
我将每页的结果设置为100,但是您可以将该行注释掉
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
Dim ie As New InternetExplorer, lastDropDrownItemIndex As Long, dropDown As Object, t As Date
Const MAX_WAIT_SEC As Long = 10
With ie
.Visible = True
.Navigate2 "https://www.gurufocus.com/stock/lmb/insider"
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
Set dropDown = .document.querySelectorAll(".el-dropdown-menu__item")
lastDropDrownItemIndex = dropDown.Length - 1
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While lastDropDrownItemIndex < 1
If dropDown.Length = 0 Then Exit Sub
dropDown.item(lastDropDrownItemIndex).Click 'comment me out if don't want 100 results per page
Dim tds As Object, table As Object
Set tds = .document.getElementsByClassName("data-table")(0).getElementsByTagName("td")
Set table = .document.getElementsByClassName("data-table")
Stop
.Quit
End With
End Sub