我正在尝试通过使用以下代码从网站(https://www.baseball-reference.com/teams/ARI/2017-schedule-scores.shtml)中提取数据:
Sub GetBaseballReferenceData()
'created loop so we can loop through all different team url's
Dim x As Integer
Dim i As Integer
For i = 1 To 30
x = Cells(Rows.Count, 2).End(xlUp).Row
x = x + 2
'gets the team abbreviation that we use in our url
Team = Cells(i, "A")
'these two strings are used for url, they don't change
Const bbref_site As String = "https://www.baseball-reference.com/teams/"
Const year_schedule_scores As String = "/2017-schedule-scores"
Dim qt As QueryTable
Dim ws As Worksheet
Set ws = ActiveSheet
'uses Url to return data
Set qt = ws.QueryTables.Add(Connection:="URL;" & bbref_site & Team & year_schedule_scores & ".shtml", Destination:=Cells(x, 2))
qt.Refresh BackgroundQuery:=False
Next i
End Sub
运行代码时,它可以工作并为我获取所需的信息。但是,W / L列应采用这种格式(1-2、2-3、3-0),而应格式化为日期。当我尝试将其重新格式化为文本时,它将返回错误代码。最初如何从网站上提取想要的数据作为文本?
感谢所有帮助!
答案 0 :(得分:0)
我稍微更改了代码
编辑:添加了qt.WebDisableDateRecognition
Option Explicit
Sub GetBaseballReferenceData()
'created loop so we can loop through all different team url's
Dim x As Integer
Dim i As Integer
Dim Team As String
Dim qt As QueryTable
Dim ws As Worksheet
Dim WLRange As Range
'these two strings are used for url, they don't change
Const bbref_site As String = "https://www.baseball-reference.com/teams/"
Const year_schedule_scores As String = "/2017-schedule-scores"
Set ws = ActiveSheet
For i = 1 To 1
x = Cells(Rows.Count, 2).End(xlUp).Row
x = x + 2
'gets the team abbreviation that we use in our url
Team = Cells(i, "A")
'uses Url to return data
Set qt = ws.QueryTables.Add(Connection:="URL;" & bbref_site & Team & year_schedule_scores & ".shtml", Destination:=Cells(x, 2))
qt.WebDisableDateRecognition = True
qt.Refresh False
'qt.Refresh BackgroundQuery:=False
Next i
End Sub
答案 1 :(得分:0)
您也可以使用XHR
Option Explicit
Public Sub GetSchedules()
Dim x As Long, i As Long, URL As String, Team As String
Const bbref_site As String = "https://www.baseball-reference.com/teams/"
Const year_schedule_scores As String = "/2017-schedule-scores"
Dim sResponse As String, HTML As New HTMLDocument, wsSchedule As Worksheet, wsTeam As Worksheet
Dim wb As Workbook: Set wb = ThisWorkbook
Set wsSchedule = wb.Worksheets("Schedules"): Set wsTeam = wb.Worksheets("TeamNames")
wsSchedule.Cells.ClearContents
Application.ScreenUpdating = False
Dim http As Object: Set http = CreateObject("MSXML2.XMLHTTP")
With wsTeam
For i = 1 To 30
Team = .Cells(i, "A")
URL = bbref_site & Team & year_schedule_scores & ".shtml"
http.Open "GET", URL, False
http.send
sResponse = StrConv(http.responseBody, vbUnicode)
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With HTML
.body.innerHTML = sResponse
End With
WriteTable HTML, GetLastRow(wsSchedule, 1) + 2, wsSchedule
Next i
Application.ScreenUpdating = True
End With
End Sub
Public Sub WriteTable(ByVal HTML As HTMLDocument, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
Dim headers As Object, i As Long, columnCounter As Long
Dim columnInfo As Object, rowCounter As Long
With ws
Set headers = HTML.querySelectorAll("#team_schedule thead th")
For i = 0 To headers.Length - 1
columnCounter = columnCounter + 1
.Cells(startRow, columnCounter) = headers.item(i).innerText
Next i
Set columnInfo = HTML.querySelectorAll("#team_schedule tbody tr td")
columnCounter = 2
For i = 0 To columnInfo.Length - 1
If i Mod 20 = 0 Then
rowCounter = rowCounter + 1
columnCounter = 2
.Cells(startRow + rowCounter, 1) = rowCounter
Else
columnCounter = columnCounter + 1
End If
If columnCounter = 11 Then
.Cells(startRow + rowCounter, columnCounter) = Chr$(39) & columnInfo.item(i).innerText
Else
.Cells(startRow + rowCounter, columnCounter) = columnInfo.item(i).innerText
End If
Next i
End With
End Sub