将表格从IE解析为Excel

时间:2018-10-04 01:22:22

标签: excel vba excel-vba web-scraping

Sub Web_Table_Option_Two()
Dim HTMLDoc As New HTMLDocument
Dim objTable As Object
Dim lRow As Long
Dim lngTable As Long
Dim lngRow As Long
Dim lngCol As Long
Dim ActRw As Long
Dim objIE As InternetExplorer
Set objIE = New InternetExplorer
objIE.navigate "https://finviz.com/screener.ashx?v=152"

Do Until objIE.readyState = 4 And Not objIE.Busy
    DoEvents
Loop
Application.Wait (Now + TimeValue("0:00:03")) 'wait for java script to load
HTMLDoc.body.innerHTML = objIE.document.body.innerHTML
With HTMLDoc.body
    Set objTable = .getElementsByTagName("table")
    For lngTable = 0 To objTable.Length - 1
        For lngRow = 0 To objTable(lngTable).Rows.Length - 1
            For lngCol = 0 To objTable(lngTable).Rows(lngRow).Cells.Length - 1
                ThisWorkbook.Sheets("Sheet1").Cells(ActRw + lngRow + 1, lngCol + 1) = objTable(lngTable).Rows(lngRow).Cells(lngCol).innerText
            Next lngCol
        Next lngRow
        ActRw = ActRw + objTable(lngTable).Rows.Length + 1
    Next lngTable
End With
objIE.Quit
End Sub

`

使用上面的代码,我试图通过代码从网站上获取Stock Screener数据,但是该表未在HTML代码中标记,因此我不确定应该如何获取此信息。目前,它正在抓取屏幕上的所有内容。

1 个答案:

答案 0 :(得分:2)

对于仅底部表格信息,您可以使用以下内容并定位tbody标签集合以及其中的所需索引,以避免表格选择附带的所有不必要的绒毛。

我会更快地使用XMLHTTP请求。适当的索引会在两种方法之间改变。


XMLHTTP请求:

Option Explicit
Public Sub GetTable()
    Dim sResponse As String, html As HTMLDocument, hTable As Object
    Set html = New HTMLDocument
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://finviz.com/screener.ashx?v=152", False
        .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
        .send
        sResponse = StrConv(.responseBody, vbUnicode)
    End With

    With html
        .body.innerHTML = sResponse
        Set hTable = .getElementsByTagName("tbody")(9)
        WriteTable hTable, 1, ThisWorkbook.Worksheets("Sheet1")
    End With
End Sub

Public Sub WriteTable(ByVal hTable As Object, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
    If ws Is Nothing Then Set ws = ActiveSheet
    Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
    r = startRow
    With ws
        Set tRow = hTable.getElementsByTagName("tr")
        For Each tr In tRow
            r = r + 1: c = 1
            Set tCell = tr.getElementsByTagName("td")
            For Each td In tCell
                .Cells(r, c).Value = td.innerText
                c = c + 1
            Next td
        Next tr
    End With
End Sub

Internet Explorer(使用上面的WriteTable子菜单):

Option Explicit
Public Sub GetInfo()
    Dim IE As New InternetExplorer, hTable As Object
    With IE
        .Visible = True
        .navigate "https://finviz.com/screener.ashx?v=152"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Set hTable = .document.getElementsByTagName("tbody")(13)

        WriteTable hTable, 1, ThisWorkbook.Worksheets("Sheet1")
        .Quit
    End With
End Sub

输出:

enter image description here


参考(VBE>工具>参考):

  1. Microsoft Internet控件
  2. Microsoft HTML对象库