使用 VBA 从网站 Screener.in 中抓取表格

时间:2021-07-27 05:36:31

标签: html excel vba internet-explorer web-scraping

我正在使用下面的代码来获取表格,但它注意到有一个带有数据的 + 按钮,但没有进入 excel,因为单击 + 按钮后检查数据显示

请帮忙获取包含+按钮列的所有表格数据

Sub get_table()

    Dim ie As Object
    Dim url As String
    url = "https://www.screener.in/company/COFORGE/consolidated/"
    
    Set ie = CreateObject("internetexplorer.application")
    ie.Visible = True
    ie.navigate url
    
    Do While ie.Busy = True: DoEvents: Loop
    Do Until ie.readyState = 4: DoEvents: Loop
    
    Dim tbl As HTMLTable
    Set tbl = ie.document.getElementsByTagName("table")(1)
    Dim trcounter As Integer
    Dim tdcounter As Integer
    
    trcounter = 1
    tdcounter = 1
    thcounter = 1
    
    Dim tr As HTMLTableRow
    Dim td As HTMLTableCell
    Dim th As HTMLTableRow
    
    Dim mySH As Worksheet
    Set mySH = ThisWorkbook.Sheets("sheet1")
    
    ' this for date
    For Each th In tbl.getElementsByTagName("th")
            mySH.Cells(tdcounter, thcounter).Value = th.innerText
            thcounter = thcounter + 1
        Next th
        
    ' this for table data
    For Each tr In tbl.getElementsByTagName("tr")
        For Each td In tr.getElementsByTagName("td")
            mySH.Cells(trcounter, tdcounter).Value = td.innerText
            tdcounter = tdcounter + 1
        Next td
        tdcounter = 1
        trcounter = trcounter + 1
    Next tr

End Sub

1 个答案:

答案 0 :(得分:0)

您可以先使用 InStr function 检查 <td> 是否包含 + 按钮,如果是,则单击 + 按钮。点击表格中所有的+按钮后,可以读取数据并写入表格中。

工作示例如下:

Sub LOADIE()
    Set ieA = CreateObject("InternetExplorer.Application")
    ieA.Visible = True
    ieA.navigate "https://www.screener.in/company/COFORGE/consolidated/"
    Do Until ieA.readyState = 4
       DoEvents
    Loop
    
    Set doc = ieA.document
    Set tbl = doc.getElementsByTagName("table")(1)

    trcounter = 1
    tdcounter = 1
    thcounter = 1
    
    Dim tr As HTMLTableRow
    Dim td As HTMLTableCell
    Dim th As HTMLTableRow
    
    Dim mySH As Worksheet
    Set mySH = ThisWorkbook.Sheets("sheet1")
    
    ' this for date
    For Each th In tbl.getElementsByTagName("th")
        mySH.Cells(tdcounter, thcounter).Value = th.innerText
        thcounter = thcounter + 1
    Next th
        
    ' this for clicking the + button
    For Each tr In tbl.getElementsByTagName("tr")
        For Each td In tr.getElementsByTagName("td")
        If InStr(td.innerHTML, "button") > 0 Then
            td.getElementsByTagName("button")(0).Click
        End If
        Next td
        tdcounter = 1
        trcounter = trcounter + 1
    Next tr

    ' this for table data
    trcounter = 1
    For Each tr In tbl.getElementsByTagName("tr")
        For Each td In tr.getElementsByTagName("td")
            mySH.Cells(trcounter, tdcounter).Value = td.innerText
            tdcounter = tdcounter + 1
        Next td
        tdcounter = 1
        trcounter = trcounter + 1
    Next tr

   ieA.Quit
   Set ieA = Nothing
End Sub