Exce VBA-从获取构建网站获取链接

时间:2019-02-11 15:20:19

标签: html excel vba web-scraping fetch

我一直在尝试从“ http://builds.reicast.com/”中获取数据,但是问题是网站也在获取信息(这是我真正需要的; Master开发版本的URL)。我觉得加载Javascript提取的延迟阻碍了我这一边的提取过程。另外,我尝试了几种不同的获取构建URL的方法,但它们从未出现(我假设它与前面提到的问题相同)。

这是它的样子:

Sub FetchData()
    With ActiveSheet.QueryTables.Add(Connection:= _
        "URL;http://builds.reicast.com/", Destination:=Range( _
        "$A$1"))
        .Name = "master"
        .FieldNames = True
        .RowNumbers = False
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .WebSelectionType = xlEntirePage
        .WebFormatting = xlWebFormattingNone
        .WebPreFormattedTextToColumns = True
        .WebConsecutiveDelimitersAsOne = True
        .WebSingleBlockTextImport = False
        .WebDisableDateRecognition = False
        .WebDisableRedirections = False
        .Refresh BackgroundQuery:=False
    End With
End Sub

printscreen

如何获取它来加载“ master dev-builds部分”? 谢谢你的时间!

1 个答案:

答案 0 :(得分:0)

您可以使用通过VBE>工具>引用添加的Microsoft Internet Explorer控件库,并包含定时循环以确保链接存在,例如。

Option Explicit
Public Sub GetLinks()
    Dim ie As New InternetExplorer, commits As Object, t As Date
    Const MAX_WAIT_SEC As Long = 10
    With ie
        .Visible = True
        .Navigate2 "http://builds.reicast.com/"
        While .Busy Or .readyState < 4: DoEvents: Wend
        t = Timer
        Do
            Set commits = ie.document.querySelectorAll(".commit [href]")
            If Timer - t > MAX_WAIT_SEC Then Exit Do
        Loop While commits.Length = 0

        Debug.Print commits.Length
        Dim i As Long
        For i = 0 To commits.Length - 1
            With ActiveSheet
                .Cells(i + 1, 1) = commits.item(i).innerText
                .Cells(i + 1, 2) = commits.item(i).getAttribute("href")
            End With
        Next
        Stop '<==Delete me later
        .Quit
    End With
End Sub

如果要写出整个表:

Option Explicit
Public Sub GetTable()
    Dim ie As New InternetExplorer, hTable As Object, t As Date, headers(), ws As Worksheet
    Const MAX_WAIT_SEC As Long = 10
    headers = Array("Commit", "Date", "Android", "Win_x86", "Win_x64")
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    With ie
        .Visible = True
        .Navigate2 "http://builds.reicast.com/"
        While .Busy Or .readyState < 4: DoEvents: Wend
        t = Timer
        Do
            On Error Resume Next
            Set hTable = ie.document.querySelector("#builds table")
            On Error GoTo 0
            If Timer - t > MAX_WAIT_SEC Then Exit Do
        Loop While hTable Is Nothing
        Writetable hTable, 1, ws
        ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Quit
    End With
End Sub

Public Sub Writetable(ByVal hTable As Object, ByVal startRow As Long, ByVal ws As Worksheet)
    Dim tr As Object, td As Object, r As Long, c As Long
    For Each tr In hTable.getElementsByTagName("tr")
        r = r + 1: c = 1
        If r > 2 Then
            For Each td In tr.getElementsByTagName("td")
                Select Case c
                Case 1, 3, 4, 5
                    ws.Cells(r - 1, c) = td.FirstChild
                Case Else
                    ws.Cells(r - 1, c) = td.innerText
                End Select
                c = c + 1
            Next
        End If
    Next
End Sub

示例输出: