pastebin.com/p9m5tMMw
您好,我正在尝试从网站上抓取数据。我已经通过粘贴容器附加了代码。但是,例如,我需要的文本“ NET SALES”在tr,td,div下。我很难提取这些数据。如果是NET SALES,我想在第4个td内获取div的值。我已经附加了到目前为止的代码。生成的报告也是动态的,因此很难为其确定正确的ID。可以使用表ID“ ctl00_cpMain_rptMain_fixedTable”,但我无法获得代码来浏览所有这些元素,以到达包含tr和所需元素的tbody。谢谢
Sub GetData()
Dim IE As Object
Dim ListOfRows As Object
Dim CellsInsideRow As Object
Dim DivsInsideCell As Object
Dim StrInsideDiv As Object
Dim tRows As Object
Dim tCells As Object
Dim tDivs As Object
Dim Content As Object
Dim ContentArea As Object
Dim ThingInDiv As Object
Dim tThings
Dim t As Integer
Dim s As String
Dim Tbody As Object
Dim ttDivs As HTMLDivElement
Dim DivsInsideOfDivs As Object
Set IE = CreateObject("InternetExplorer.Application")
'Dim IE As SHDocVw.InternetExplorer'
Dim HtmlDoc As MSHTML.HTMLDocument
Dim htmlinput As MSHTML.IHTMLElement
Set IE = New SHDocVw.InternetExplorer
' Open Home Page'
With IE
.Visible = True
.navigate "https://ballout.sage-nexgen.com/report/SDateXParam.aspx?r=204"
Do While IE.readyState <> READYSTATE_COMPLETE
Loop
End With
' Identify Source Code'
Set HtmlDoc = IE.document
'Mention this bug'
' Input Username and Password'
'On Error Resume Next
'IE.document.forms("aspnetForm").elements("ctl00$cpMain$logMain$UserName").Value = "**********"
'IE.document.forms("aspnetForm").elements("ctl00$cpMain$logMain$Password").Value = "******"
'IE.document.forms("aspnetForm").elements("ctl00$cpMain$logMain$LoginButton").Click
Do While IE.Busy
DoEvents
Loop
IE.document.forms("aspnetForm").elements("ctl00$cpMain$clbStores$0").Click
Do While IE.Busy
DoEvents
Loop
IE.document.forms("aspnetForm").elements("ctl00$cpMain$StartDate").Value = Range("A1").Text '
Do While IE.Busy
DoEvents
Loop
IE.document.getElementById("ctl00_cpMain_cmdRun2").Click
Do While IE.Busy
DoEvents
Loop
On Error Resume Next
Set ListOfRows = IE.document.getElementsByTagName("tr")
Debug.Print ListOfRows.Length
For Each tRows In ListOfRows
Set CellsInsideRow = tRows.getElementsByTagName("td")
For Each tCells In CellsInsideRow
Set DivsInsideCell = tCells.getElementsByTagName("div")
For Each tDivs In DivsInsideCell
Set ttDivs = tDivs
'If Trim(ttDivs.innerText) = "AVERAGE NET SALE" Then
'Debug.Print Trim(ttDivs.innerText)
'End If
Next tDivs
Next tCells
Next tRows
'Set ListOfRows = Content.getElementsByTagName("tr")
'MsgBox (Len(ListOfRows))
'For Each tRows In ListOfRows
' Set CellsInsideRow = tRows.getElementsByTagName("td")
' For Each tCells In CellsInsideRow
'
' Set DivsInsideCell = tCells.getElementsByTagName("div")
'Next tCells
'Next tRows
End Sub
答案 0 :(得分:0)
HTML有点混乱。在目标HTMLTableCell内是另一个表,并且该表内的许多div是cels。我只是通过换行符分割目标cells.InnerText
并处理分割后的数据。比尝试解密此HTML要容易得多。
Sub ProcessData(IE As InternetExplorerMedium)
Const AverageNetSalesText As String = "AVERAGE NET SALE"
Dim doc As HTMLDocument, tbl As HTMLTable, ListRows As Object, tr As HTMLTableRow, td As HTMLTableCell
Dim data As Variant
Set doc = IE.Document
Set tbl = doc.getElementById("ctl00_cpMain_rptMain_fixedTable")
Set ListRows = tbl.getElementsByTagName("TR")
For Each tr In ListRows
If tr.Cells.Length >= 2 Then
Set td = tr.Cells(2)
If Not td Is Nothing Then
If InStr(td.innerText, "AVERAGE") > 0 Then
data = Split(td.innerText, vbNewLine)
Dim AverageNetSales As Single
If UBound(data) >= 15 Then
If InStr(data(15), AverageNetSalesText) > 0 Then
data(15) = Trim(data(15))
AverageNetSales = Right(data(15), Len(data(15)) - Len(AverageNetSalesText))
'Do something
Range("A17") = AverageNetSales
Exit Sub
End If
End If
End If
End If
End If
Next
End Sub
答案 1 :(得分:0)
您有嵌套表,但是在html中有一个ID为content
的表,其中包含所有信息。您可以收集该表中的行(tr
)并循环查找感兴趣的字符串。如果在行上使用.innerText
,则将获得财务金额以及文字说明。否则,您可以从行内的td
元素中提取子信息。类似于以下内容:
Dim html As HTMLDocument, tableRows As Object, i As Long, rowOfInterest As Object
Set html = ie.document
Dim a As Object
Set tableRows = html.querySelectorAll("#content tr")
For i = 0 To tableRows.Length - 1
If InStr(tableRows.Item(i).innerText, "NET SALES") > 0 Then
Set rowOfInterest = tableRows.Item(i)
Exit For
End If
Next
If Not rowOfInterest Is Nothing Then
Debug.Print tableRows.Item(i).innerText
Debug.Print tableRows.Item(i).getElementsByTagName("td")(1).innerText
End If
行信息示例:
通过javascript填充值,因此您需要留出足够的时间进行填充。