使用VBA在嵌套表和Divs中获取值

时间:2018-11-01 21:46:00

标签: html excel vba excel-vba web-scraping

我尝试过发布多种解决方案,但尚未使代码正常工作。 我想将网站的价值复制/粘贴到Excel工作表中。 以下是一些HTML:我追求的价值始终在底部。

<DIV style="HEIGHT:100%;WIDTH:100%" ID="oReportDiv">
    <DIV style="HEIGHT:100%;WIDTH:100%;" class="ap">
        <TABLE CELLSPACING="0" CELLPADDING="0">
            <TR>
                <TD ID="oReportCell">
                    <TABLE CELLSPACING="0" CELLPADDING="0">
                        <TR>
                            <TD class="a106xBc">
                                <DIV class="a106xB">
                                    <TABLE CELLSPACING="0" CELLPADDING="0" BORDER="0" COLS="9" LANG="en-US" class="r10">


                                        <TR VALIGN="top">

                                            <TD COLSPAN="3">
                                                <TABLE CELLSPACING="0" CELLPADDING="0" COLS="4" BORDER="0" style="border-collapse:collapse;" class="a103">


                                                    <TR VALIGN="top">
                                                        <TD style="HEIGHT:5.33mm;" class="a67c r14">&nbsp;</TD>
                                                        <TD style="" class="a71c">
                                                            <DIV style="word-wrap:break-word;text-decoration:none;" class="a71">Get This Value</DIV>
                                                        </TD>

<TR VALIGN="top">
                                                        <TD style="HEIGHT:5.33mm;" class="a67c r14">&nbsp;</TD>
                                                        <TD style="" class="a71c">
                                                            <DIV style="word-wrap:break-word;text-decoration:none;" class="a71">Second Value and so on</DIV>
                                                        </TD>

                                                    </TR>

这是我尝试过并遇到的错误: 对象不支持此属性或方法

Sub GrabLastNames()

'dimension (set aside memory for) our variables
Dim objIE As InternetExplorer
Dim ele As Object
Dim y As Integer
Dim div
Dim nodeList As Object, i As Long


'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = True





'navigate to page with needed data
objIE.navigate ""
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

'we will output data to excel, starting on row 1
y = 1

'look at all the 'tr' elements in the 'table' with id 'myTable',
'and evaluate each, one at a time, using 'ele' variable
Set nodeList = objIE.document.querySelectorAll("td.a71c .a71")

对于i = 0到nodeList.Length-1     调试。打印nodeList.Item(i).innerText 下一个         对于objIE.document.getElementById(“ oReportDiv”)。getElementsById(“ a71”)

中的每个元素

    Debug.Print ele.textContent
    'each 'tr' (table row) element contains 4 children ('td') elements
    'put text of 1st 'td' in col A
    Sheets("Sheet3").Range("A" & y).Value = ele.Children(0).textContent
    'put text of 2nd 'td' in col B
    Sheets("Sheet3").Range("B" & y).Value = ele.Children(1).textContent
    'put text of 3rd 'td' in col C
    Sheets("Sheet3").Range("C" & y).Value = ele.Children(2).textContent
    'put text of 4th 'td' in col D
    Sheets("Sheet3").Range("D" & y).Value = ele.Children(3).textContent
    'increment row counter by 1
    y = y + 1
'repeat until last ele has been evaluated
Next

'save the Excel workbook
ActiveWorkbook.Save

结束子

感谢您的反馈

1 个答案:

答案 0 :(得分:0)

尝试

Dim nodeList As Object, i As Long
Set nodeList = objIE.document.querySelectorAll("td.a71c .a71")
For i = 0 To nodeList.Length -1
    Debug.Print nodeList.item(i).innerText
Next

Set nodeList objIE.document.querySelectorAll("#oReportCell .a71") 

,然后执行与上述相同的循环。

是否有要处理的父级iframe /框架是另一种想法。


完整代码如下:

Option Explicit

Public Sub GrabLastNames()
    Dim objIE As InternetExplorer, t As Date, nodeList As Object, i As Long
    Const MAX_WAIT_SEC As Long = 5
    Set objIE = New InternetExplorer

    With objIE
        .Visible = True
        .navigate "https://jetblue.rosterapps.com/GenerateReport.aspx?command=6Rqw0HG%2FJQvTea2EXxyC%2BSDRGUzjdE2GHNUaN5rcKVPdKRpHBhRkfYa3c%2FnPLk58WnTYk6kBq1OZHQYsWqgM8g%3D%3D&action=render"

        Do While .Busy = True Or .readyState <> 4: DoEvents: Loop

        t = Timer
        Do
            DoEvents
            On Error Resume Next
            Set nodeList = .document.querySelectorAll("#oReportCell .a71")
            On Error GoTo 0
            If Timer - t > MAX_WAIT_SEC Then Exit Do

        Loop While nodeList Is Nothing

        If Not nodeList Is Nothing Then
            With ThisWorkbook.Worksheets("Sheet3")
                For i = 0 To nodeList.Length - 1
                    .Cells(1, i + 1) = nodeList.item(i).innerText
                Next
            End With
        End If
        .Quit
    End With
End Sub