通过网络抓取获得Excel

时间:2018-11-03 20:38:17

标签: excel vba web-scraping

我想将所有6个表从this website拉到我的工作簿中。 (vs全部,vs PG,vs SG,vs SF,vs PF,vs C)当我尝试使用excel中的from web选项并选择表时,它只是拉入标题。这是为什么?目前,我有一个“立即粘贴”按钮,然后转到网站,将其复制并单击使用宏创建的“粘贴”按钮,以清除当前信息并粘贴新值。我想消除我不得不手动转到网站并复制表格的麻烦。除了“来自网络”之外,还有另一种方法吗?

1 个答案:

答案 0 :(得分:3)

确保选择正确的表格。有两个表元素。第一个只是标题。第二个是标题+信息。我不确定您可以使用此方法来获取所有选项卡,因为URL不会更改,并且内容是javascript更新的。您可以看到API是否有任何提供,尽管它是由希望在颁发API密钥之前与您联系的人员阻止的。

任何简单的方法都是去VBE>工具>引用>添加对Microsoft Internet Control的引用,然后使用Internet Explorer导航到页面。

您可以使用CSS selector通过其id来定位表,并使用另一个CSS选择器class选择器来定位所有选项卡链接,以便单击它们以更新每个表的链接标签。

Option Explicit

Public Sub GetInfo()
    Dim IE As New InternetExplorer, hTable As HTMLTable
    Dim ws As Worksheet
    Set ws = ThisWorkbook.Worksheets("Sheet1")

    Dim clipboard As Object, tabs As Object, iTab As Long
    Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")

    With IE
        .Visible = True
        .navigate "https://swishanalytics.com/optimus/nba/daily-fantasy-team-defensive-ranks-position"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Set tabs = .document.querySelectorAll(".position.fastClick")

        For iTab = 0 To tabs.Length - 1

            If iTab > 0 Then
                tabs.item(iTab).Click
                While .Busy Or .readyState < 4: DoEvents: Wend

            End If

            clipboard.SetText .document.querySelector("#stat-table").outerHTML
            clipboard.PutInClipboard

            With ws
                .Cells(GetLastRow(ws, 1) + 2, 1).PasteSpecial
            End With
        Next
        .Quit
    End With

End Sub

Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
    End With
End Function

如果您不想使用剪贴板复制粘贴表,则可以循环其行和行内的表格单元格。

Option Explicit

Public Sub GetInfo()
    Dim IE As New InternetExplorer, ws As Worksheet, tabs As Object, iTab As Long
    Application.ScreenUpdating = False
    Set ws = ThisWorkbook.Worksheets("Sheet1")

    With IE
        .Visible = True
        .navigate "https://swishanalytics.com/optimus/nba/daily-fantasy-team-defensive-ranks-position"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Set tabs = .document.querySelectorAll(".position.fastClick")

        For iTab = 0 To tabs.Length - 1
            If iTab > 0 Then
                tabs.item(iTab).Click
                While .Busy Or .readyState < 4: DoEvents: Wend
            End If

            WriteTable .document.querySelector("#stat-table"), GetLastRow(ws, 1) + 2, ws
        Next
        .Quit
    End With
    Application.ScreenUpdating = True
End Sub

Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
    End With
End Function

Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)

    If ws Is Nothing Then Set ws = ActiveSheet

    Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, C As Long, tBody As Object
    r = startRow
    With ws
        Dim headers As Object, header As Object, columnCounter As Long
        Set headers = hTable.getElementsByTagName("th")
        For Each header In headers
            columnCounter = columnCounter + 1
            .Cells(startRow, columnCounter) = header.innerText
        Next header
        startRow = startRow + 1
        Set tBody = hTable.getElementsByTagName("tbody")
        For Each tSection In tBody
            Set tRow = tSection.getElementsByTagName("tr")
            For Each tr In tRow
                r = r + 1
                Set tCell = tr.getElementsByTagName("td")
                C = 1
                For Each td In tCell
                    .Cells(r, C).Value = td.innerText
                    C = C + 1
                Next td
            Next tr
        Next tSection
    End With
End Sub