从网站以特定格式(Excel VBA)获取数据

时间:2018-07-28 01:17:52

标签: excel vba excel-vba

我正在尝试通过使用以下代码从网站(https://www.baseball-reference.com/teams/ARI/2017-schedule-scores.shtml)中提取数据:

  Sub GetBaseballReferenceData()

'created loop so we can loop through all different team url's
Dim x As Integer
Dim i As Integer
For i = 1 To 30

x = Cells(Rows.Count, 2).End(xlUp).Row

x = x + 2


'gets the team abbreviation that we use in our url
Team = Cells(i, "A")


'these two strings are used for url, they don't change
Const bbref_site As String = "https://www.baseball-reference.com/teams/"
Const year_schedule_scores As String = "/2017-schedule-scores"



Dim qt As QueryTable

Dim ws As Worksheet

Set ws = ActiveSheet



'uses Url to return data
Set qt = ws.QueryTables.Add(Connection:="URL;" & bbref_site & Team & year_schedule_scores & ".shtml", Destination:=Cells(x, 2))

qt.Refresh BackgroundQuery:=False


Next i

End Sub

运行代码时,它可以工作并为我获取所需的信息。但是,W / L列应采用这种格式(1-2、2-3、3-0),而应格式化为日期。当我尝试将其重新格式化为文本时,它将返回错误代码。最初如何从网站上提取想要的数据作为文本?

感谢所有帮助!

2 个答案:

答案 0 :(得分:0)

我稍微更改了代码

编辑:添加了qt.WebDisableDateRecognition

Option Explicit 

Sub GetBaseballReferenceData()

'created loop so we can loop through all different team url's
Dim x As Integer
Dim i As Integer
Dim Team As String
Dim qt As QueryTable
Dim ws As Worksheet
Dim WLRange As Range

'these two strings are used for url, they don't change
Const bbref_site As String = "https://www.baseball-reference.com/teams/"
Const year_schedule_scores As String = "/2017-schedule-scores"

Set ws = ActiveSheet


    For i = 1 To 1

        x = Cells(Rows.Count, 2).End(xlUp).Row

        x = x + 2

        'gets the team abbreviation that we use in our url
        Team = Cells(i, "A")


        'uses Url to return data
        Set qt = ws.QueryTables.Add(Connection:="URL;" & bbref_site & Team & year_schedule_scores & ".shtml", Destination:=Cells(x, 2))
        qt.WebDisableDateRecognition = True

        qt.Refresh False
        'qt.Refresh BackgroundQuery:=False


    Next i

End Sub

答案 1 :(得分:0)

您也可以使用XHR

Option Explicit
Public Sub GetSchedules()
    Dim x As Long, i As Long, URL As String, Team As String
    Const bbref_site As String = "https://www.baseball-reference.com/teams/"
    Const year_schedule_scores As String = "/2017-schedule-scores"
    Dim sResponse As String, HTML As New HTMLDocument, wsSchedule As Worksheet, wsTeam As Worksheet
    Dim wb As Workbook: Set wb = ThisWorkbook
    Set wsSchedule = wb.Worksheets("Schedules"): Set wsTeam = wb.Worksheets("TeamNames")
    wsSchedule.Cells.ClearContents
    Application.ScreenUpdating = False
    Dim http As Object: Set http = CreateObject("MSXML2.XMLHTTP")
    With wsTeam
        For i = 1 To 30
            Team = .Cells(i, "A")
            URL = bbref_site & Team & year_schedule_scores & ".shtml"
            http.Open "GET", URL, False
            http.send
            sResponse = StrConv(http.responseBody, vbUnicode)
            sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
            With HTML
                .body.innerHTML = sResponse
            End With
            WriteTable HTML, GetLastRow(wsSchedule, 1) + 2, wsSchedule
        Next i
        Application.ScreenUpdating = True
    End With
End Sub

Public Sub WriteTable(ByVal HTML As HTMLDocument, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
    Dim headers As Object, i As Long, columnCounter As Long
    Dim columnInfo As Object, rowCounter As Long
    With ws
        Set headers = HTML.querySelectorAll("#team_schedule thead th")
        For i = 0 To headers.Length - 1
            columnCounter = columnCounter + 1
            .Cells(startRow, columnCounter) = headers.item(i).innerText
        Next i
        Set columnInfo = HTML.querySelectorAll("#team_schedule tbody tr td")
        columnCounter = 2
        For i = 0 To columnInfo.Length - 1
            If i Mod 20 = 0 Then
                rowCounter = rowCounter + 1
                columnCounter = 2
                .Cells(startRow + rowCounter, 1) = rowCounter
            Else
                columnCounter = columnCounter + 1
            End If
            If columnCounter = 11 Then
                .Cells(startRow + rowCounter, columnCounter) = Chr$(39) & columnInfo.item(i).innerText
            Else
                .Cells(startRow + rowCounter, columnCounter) = columnInfo.item(i).innerText
            End If
        Next i
    End With
End Sub