从Web获取数据

时间:2016-06-23 07:19:29

标签: excel vba web-scraping

我想通过excel的链接从网页上获取表格:

http://www.soccerstats.com/timing.asp?league=spain

问题是excel只获取“Total”选项卡上的数据,而我需要两个单独的表“HOME”和“AWAY”

tabs

如何使用VBA获取数据?

1 个答案:

答案 0 :(得分:0)

这很有趣。表Overall, Home, Away实际上都在iframe内。

我已经使用过Selenium basic VBA,但是您可以轻松地做到这一点。我找到iframe并切换到它。对pageSource的检查显示,实际上所有表都可以通过标签table从此处访问,并且感兴趣的表位于索引1-3。

在安装selenium basic之后,您需要添加对硒类型库的引用。

全部写出来的代码:

Option Explicit
Public Sub GetTables()
    Dim d As WebDriver, i As Long, doc As New HTMLDocument, hTables As Object
    Set d = New ChromeDriver
    Const URL = "http://www.soccerstats.com/timing.asp?league=spain"
    Application.ScreenUpdating = False
    With d
        .Start "Chrome"
        .get URL
        On Error Resume Next
        .FindElementByCss("button[onclick*=setCookielocal]").Click
        On Error GoTo 0

        .SwitchToFrame "pmatch"                  '<== Switch to iFrame
        doc.body.innerHTML = .PageSource
        Set hTables = doc.getElementsByTagName("table")

        For i = 1 To 3
            WriteTable hTables(i), GetLastRow(ActiveSheet, 1) + 1, ActiveSheet
        Next i
        .Quit
        Application.ScreenUpdating = True
    End With
End Sub

Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
    End With
End Function

Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
    If ws Is Nothing Then Set ws = ActiveSheet

    Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, R As Long, C As Long, tBody As Object
    R = startRow
    With ws
        Dim headers As Object, header As Object, columnCounter As Long
        Set headers = hTable.getElementsByTagName("th")
        For Each header In headers
            columnCounter = columnCounter + 1
            .Cells(startRow, columnCounter) = header.innerText
        Next header
        startRow = startRow + 1
        Set tBody = hTable.getElementsByTagName("tbody")
        For Each tSection In tBody               'HTMLTableSection
            Set tRow = tSection.getElementsByTagName("tr") 'HTMLTableRow
            For Each tr In tRow
                R = R + 1
                Set tCell = tr.getElementsByTagName("td")
                C = 1
                For Each td In tCell             'DispHTMLElementCollection
                    .Cells(R, C).Value = "'" & td.innerText 'HTMLTableCell
                    C = C + 1
                Next td
            Next tr
        Next tSection
    End With
End Sub