我正在一个项目上运行有关NFL球员统计数据的一些分析模型。我下面有一些其他用户传递给我的代码。这段代码获取了我在Sheet1上拥有的链接列表,该列表名为“ PlayerList”,并为每个播放器创建了一个新标签并提取其通过的统计信息。所有链接均指向《职业橄榄球参考》。我可以更改此代码以提取除四分卫以外的所有位置的所有必要数据。对于QB,我想提取传递的统计信息表以及紧急和接收的统计信息表。任何帮助将不胜感激。作为参考,这里有一些示例链接:
https://www.pro-football-reference.com/players/R/RodgAa00.htm https://www.pro-football-reference.com/players/B/BreeDr00.htm
下面是代码:
Option Explicit
Public Sub GetInfo()
Di If InStr(links(link, 1), "https://") > 0 Then
Set html = GetHTMLDoc(links(link, 1))
Set hTable = html.getElementById("passing")
If Not hTable Is Nothing Then
playerName = GetNameAbbr(links(link, 1))
Set ws = AddPlayerSheet(playerName)
WriteTableToSheet hTable, ws
FixTable ws
End If
End If
Next
Application.ScreenUpdating = True
End Sub
Public Function GetHTMLDoc(ByVal url As String) As HTMLDocument
Dim sResponse As String, html As New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", url, False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
html.body.innerHTML = sResponse
Set GetHTMLDoc = html
End Function
Public Sub WriteTableToSheet(ByVal hTable As HTMLTable, ByVal ws As Worksheet)
Dim x As Long, y As Long
With hTable
For x = 0 To .Rows.Length - 1
For y = 0 To .Rows(x).Cells.Length - 1
If y = 6 Or y = 7 Then
ws.Cells(x + 4, y + 1).Value = Chr$(39) & .Rows(x).Cells(y).innerText
Else
ws.Cells(x + 4, y + 1).Value = .Rows(x).Cells(y).innerText
End If
Next y
Next x
End With
End Sub
Public Function GetNameAbbr(ByVal url As String)
Dim tempArr() As String
tempArr = Split(url, "/")
GetNameAbbr = Left$(tempArr(UBound(tempArr)), 6)
End Function
Public Function AddPlayerSheet(ByVal playerName As String) As Worksheet
Dim ws As Worksheet
If SheetExists(playerName) Then
Application.DisplayAlerts = False
ThisWorkbook.Worksheets(playerName).Delete
Application.DisplayAlerts = True
End If
Set ws = ThisWorkbook.Worksheets.Add
ws.Name = playerName
Set AddPlayerSheet = ws
End Function
Public Function SheetExists(ByVal playerName As String) As Boolean
SheetExists = Evaluate("ISREF('" & playerName & "'!A1)")
End Function
Public Sub FixTable(ByVal ws As Worksheet)
Dim found As Range, numSummaryRows As Long
With ws
Set found = .Columns("A").Find("Career")
If found Is Nothing Then Exit Sub
numSummaryRows = .Cells(.Rows.Count, "A").End(xlUp).Row - found.Row
numSummaryRows = IIf(numSummaryRows = 0, 1, numSummaryRows + 1)
Debug.Print found.Offset(, 1).Resize(numSummaryRows, 30).Address, ws.Name
found.Offset(, 1).Resize(numSummaryRows, 30).Copy found.Offset(, 2)
found.Offset(, 1).Resize(numSummaryRows, 1).ClearContents
End With
End Subm html As New HTMLDocument, links(), link As Long, wsSourceSheet As Worksheet
Dim hTable As HTMLTable, ws As Worksheet, playerName As String
Set wsSourceSheet = ThisWorkbook.Worksheets("PlayerList")
Application.ScreenUpdating = False
With wsSourceSheet
links = .Range("C2:C" & .Cells(.Rows.Count, "C").End(xlUp).Row).Value
End With
For link = LBound(links, 1) To UBound(links, 1)
答案 0 :(得分:3)
您是否需要使用VBA进行此操作? Excel完全能够导入组织良好的数据,例如该页面上的[几个]表。
在数据标签下,点击From Web
,然后输入网站URL。
接下来,您将选择所需的表。别生气-只能满足您的需求,但是您可以通过启用复选框来选择多个表。
解析和整理页面上的所有数据可能需要几分钟的时间...
回到工作表后,您将在右侧看到查询。右键单击查询,然后选择Load To...
,然后选择Table
和表数据的位置。您可以自定义许多其他属性。有一些教程介绍了您可以做什么。
要自定义的更多内容隐藏在两个功能区选项卡中,这些功能区选项卡仅在您单击表时出现,即设计和查询。
我认为,还有一种方法可以创建一个球员列表,然后在输入URL时使用Advanced
选项,以允许您动态选择所需的任何球员,而只添加一次表。 。但是我还没有弄清楚那部分内容。
我不是体育迷,但是我认为数据会在整个季节中变化,因此使用这样的表格的好处是,一旦您按需要设置了工作表,便可以进行一些设置每次您打开工作簿,按计划,手动,或从不自动更新;适当的话。
Google“ Excel网络查询 ”以查找有关使用查询时可用的众多选项的更多信息(又称“ 获取并转换”)以提取和整理您的数据。
也许这可以考虑替代Excel中已内置的编码功能。
祝您好运,“开始运动!”
答案 1 :(得分:1)
是的,使用VBA这样做是有原因的。实际上至少有五个.....
虽然可以使用内置工具来处理此问题,但我爱我一些强力查询,它不再是 “开箱即用” ,但需要知道如何在某种程度上用M编码和/或仍然要使用某些VBA。
如果将此按钮绑定到工作表上的按钮,则可以在需要时轻松按一下以刷新,将其链接到workbook_open事件以在打开时刷新,甚至让Windows Scheduler打开工作簿并在特定时间刷新(这样您知道 VBA仍然可以帮到您!尽管可能得到了我的朋友们(又名Windows)的小帮助。
对于每个页面下面的表,XHR似乎有点太快了,但是不是绝望,您可以使用Internet Explorer,并稍作延迟以确保Rushing & Receiving
表格已填充,或者使用Selenium自动化浏览器(我使用过Chrome,但可以使用Internet Explorer)。尽管这比XHR慢,但通过运行无头浏览器实例,我们可以提高效率。
在这里使用VBA,它将在您运行时为您提供不同的选项卡,并仅选择所需的那些表。基于sheet1上C2中的链接。
Option Explicit
Public Sub GetInfo()
Dim d As New ChromeDriver
Dim html As New HTMLDocument, links(), link As Long, wsSourceSheet As Worksheet, clipboard As Object
Dim hTablePass As HTMLTable, hTableRushReceive As HTMLTable, ws As Worksheet, playerName As String
Set wsSourceSheet = ThisWorkbook.Worksheets("Sheet1") '<change to sheet containing links
Application.ScreenUpdating = False
With wsSourceSheet
If .Cells(.Rows.Count, "C").End(xlUp).Row = 2 Then
ReDim links(1 To 1, 1 To 1): links(1, 1) = .Range("C2")
Else
links = .Range("C2:C" & .Cells(.Rows.Count, "C").End(xlUp).Row).Value
End If
End With
For link = LBound(links, 1) To UBound(links, 1)
If InStr(links(link, 1), "https://") > 0 Then
With d
.AddArgument "--headless"
.get links(link, 1)
html.body.innerHTML = .PageSource
Set hTablePass = html.querySelector("#all_passing #passing")
Set hTableRushReceive = html.querySelector("#all_rushing_and_receiving #rushing_and_receiving")
playerName = GetNameAbbr(links(link, 1))
Set ws = AddPlayerSheet(playerName)
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
If Not hTablePass Is Nothing Then
clipboard.SetText Replace$(Replace$(hTablePass.outerHTML, "--></DIV>", vbNullString), "<!--", vbNullString)
clipboard.PutInClipboard
ws.Cells(GetLastRow(ws, 1), 1).PasteSpecial
End If
If Not hTableRushReceive Is Nothing Then
clipboard.SetText hTableRushReceive.outerHTML
clipboard.PutInClipboard
ws.Cells(GetLastRow(ws, 1) + 2, 1).PasteSpecial
End If
End With
End If
Next
Application.ScreenUpdating = True
End Sub
Public Function GetNameAbbr(ByVal url As String) As String
Dim tempArr() As String
tempArr = Split(url, "/")
GetNameAbbr = Left$(tempArr(UBound(tempArr)), 6)
End Function
Public Function AddPlayerSheet(ByVal playerName As String) As Worksheet
Dim ws As Worksheet
If SheetExists(playerName) Then
Application.DisplayAlerts = False
ThisWorkbook.Worksheets(playerName).Delete
Application.DisplayAlerts = True
End If
Set ws = ThisWorkbook.Worksheets.Add
ws.Name = playerName
Set AddPlayerSheet = ws
End Function
Public Function SheetExists(ByVal playerName As String) As Boolean '<== *@Rory
SheetExists = Evaluate("ISREF('" & playerName & "'!A1)")
End Function
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function
参考:
硒基本下载:
* 功能改编自@Rory