#Initialization for beautifulsoup to access site for per game stats
url = "https://stats.nba.com/players/traditional/?sort=PTS&dir=-1&Season=2018-19&SeasonType=Regular%20Season"
d = webdriver.Chrome(ChromeDriverManager().install())
d.get(url)
#Initializes data frame to store player data
data_df= pd.DataFrame(columns={'Player','Team','3PA','3P%','3PaTotal','Season'})
for yearCount in range(0,20):
season = [18,19]
seasonStr = str(season[0])+"/"+str(season[1])
for pageCounter in range(0,11):
#Scrapes all of the data putting it into headers
soup = BeautifulSoup(d.page_source, 'html.parser').find('table')
headers, [_, *data] = [i.text for i in soup.find_all('th')], [[i.text for i in b.find_all('td')] for b in soup.find_all('tr')]
final_data = [i for i in data if len(i) > 1]
#Creates a dictionary of headers
data_attrs = [dict(zip(headers, i)) for i in final_data]
#Collects stats that are used for graph
players = [i['PLAYER'] for i in data_attrs]
teams = [i['TEAM'] for i in data_attrs]
threePointAttempts = [i['3PA'] for i in data_attrs]
threePointPercentage = [i['3P%'] for i in data_attrs]
#Adds the data collected to the dataframe
temp_df = pd.DataFrame({'Player': players,
'Team': teams,
'3PA': threePointAttempts,
'3P%': threePointPercentage,
'3PaTotal' : 0,
'Season' : seasonStr})
data_df = data_df.append(temp_df, ignore_index=True)
data_df = data_df[['Player','Team','3PA','3P%','3PaTotal','Season']]
#Goes to next page
nxt = d.find_element_by_class_name("stats-table-pagination__next")
nxt.click()
dropDown = Select(d.find_element_by_name("Season"))
dropDown.select_by_index(yearCount)
我的错误代码:
回溯(最近通话最近):文件 “ C:/ Users / brenn / PycharmProjects / NBAstats / venv / Lib / site-packages / Player 3-Point.py“,第44行, 标头,[_,*数据] = [i.soup.find_all('thd)中的i.text],[[b.find_all('td')中i的i.text],soup.find_all( 'tr')]
AttributeError:“ NoneType”对象没有属性“ find_all”
在NBA网站上尝试收集过去几个赛季的数据时遇到问题。我的代码收集了当前赛季的所有球员数据(逐页重复,没有任何问题)。但是,当我尝试通过下拉菜单来收集过去一年的数据时,它不起作用。如果我使用上一个季节的URL,而不使用下拉菜单导航,则它将毫无问题地收集数据。同样在硒铬选项卡中,页面切换到上一年,但是在尝试读取数据时遇到问题。
答案 0 :(得分:0)
我喜欢处理体育数据!
我想提出一种略有不同的方法。数据通过请求URL呈现,该URL将返回json响应。您可以使用该查询参数遍历季节(从1996年开始)。然后,您可以将整个内容转储到数据帧中,并根据需要过滤/操作该数据帧。
import requests
import pandas as pd
request_url = 'https://stats.nba.com/stats/leaguedashplayerstats'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'}
results = pd.DataFrame()
for yearCount in range(1996,2019):
season = int(str(yearCount)[-2:])
seasonStr = '%02d/%02d' %(int(str(season)[-2:]), int(str(season+1)[-2:]))
season_query = '%s-%s' %(yearCount, str(yearCount+1)[-2:])
params = {
'College': '',
'Conference':'',
'Country': '',
'DateFrom': '',
'DateTo': '',
'Division': '',
'DraftPick': '',
'DraftYear': '',
'GameScope': '',
'GameSegment': '',
'Height': '',
'LastNGames': '0',
'LeagueID': '00',
'Location': '',
'MeasureType': 'Base',
'Month': '0',
'OpponentTeamID': '0',
'Outcome': '',
'PORound': '0',
'PaceAdjust': 'N',
'PerMode': 'PerGame',
'Period': '0',
'PlayerExperience':'',
'PlayerPosition': '',
'PlusMinus': 'N',
'Rank': 'N',
'Season': season_query,
'SeasonSegment': '',
'SeasonType': 'Regular Season',
'ShotClockRange': '',
'StarterBench': '',
'TeamID': '0',
'TwoWay': '0',
'VsConference': '',
'VsDivision': '',
'Weight': ''}
jsonObj = requests.get(request_url, headers=headers, params=params).json()
cols = jsonObj['resultSets'][0]['headers']
rows = jsonObj['resultSets'][0]['rowSet']
temp_df = pd.DataFrame(columns = cols)
for row in rows:
row_df = pd.DataFrame([row], columns = cols)
temp_df = temp_df.append(row_df)
temp_df['Season'] = seasonStr
print ('Aquired %s stats' %(seasonStr))
results = results.append(temp_df).reset_index(drop=True)
输出:
print(results)
PLAYER_ID PLAYER_NAME ... CFPARAMS Season
0 1489 None ... 1489, 96/97
1 902 None ... 902, 96/97
2 2179 None ... 2179, 96/97
3 1049 None ... 1049, 96/97
4 775 None ... 775, 96/97
5 93 None ... 93, 96/97
6 920 A.C. Green ... 920,1610612742 96/97
7 243 Aaron McKie ... 243,1610612765 96/97
8 1425 Aaron Williams ... 1425,1610612763 96/97
9 768 Acie Earl ... 768,1610612749 96/97
10 228 Adam Keefe ... 228,1610612762 96/97
11 154 Adrian Caldwell ... 154,1610612755 96/97
12 673 Alan Henderson ... 673,1610612737 96/97
13 1059 Aleksandar Djordjevic ... 1059,1610612757 96/97
14 275 Allan Houston ... 275,1610612752 96/97
15 947 Allen Iverson ... 947,1610612755 96/97
16 297 Alonzo Mourning ... 297,1610612748 96/97
17 175 Alton Lister ... 175,1610612738 96/97
18 1043 Amal McCaskill ... 1043,1610612753 96/97
19 692 Andrew DeClercq ... 692,1610612744 96/97
20 457 Andrew Lang ... 457,1610612749 96/97
21 358 Anfernee Hardaway ... 358,1610612753 96/97
22 924 Anthony Goldwire ... 924,1610612743 96/97
23 193 Anthony Mason ... 193,1610612766 96/97
24 292 Anthony Miller ... 292,1610612737 96/97
25 324 Anthony Peeler ... 324,1610612763 96/97
26 156 Antoine Carr ... 156,1610612762 96/97
27 952 Antoine Walker ... 952,1610612738 96/97
28 213 Antonio Davis ... 213,1610612754 96/97
29 176 Antonio Harvey ... 176,1610612760 96/97
... ... ... ... ...
10599 204020 Tyler Johnson ... 204020,1610612756 18/19
10600 1628399 Tyler Lydon ... 1628399,1610612743 18/19
10601 1627755 Tyler Ulis ... 1627755,1610612741 18/19
10602 203092 Tyler Zeller ... 203092,1610612737 18/19
10603 201936 Tyreke Evans ... 201936,1610612754 18/19
10604 1627820 Tyrone Wallace ... 1627820,1610612746 18/19
10605 2199 Tyson Chandler ... 2199,1610612747 18/19
10606 1626145 Tyus Jones ... 1626145,1610612750 18/19
10607 2617 Udonis Haslem ... 2617,1610612748 18/19
10608 203506 Victor Oladipo ... 203506,1610612754 18/19
10609 1713 Vince Carter ... 1713,1610612737 18/19
10610 1629053 Vincent Edwards ... 1629053,1610612745 18/19
10611 1627735 Wade Baldwin IV ... 1627735,1610612757 18/19
10612 201961 Wayne Ellington ... 201961,1610612765 18/19
10613 1627782 Wayne Selden ... 1627782,1610612741 18/19
10614 1628976 Wendell Carter Jr. ... 1628976,1610612741 18/19
10615 1628411 Wes Iwundu ... 1628411,1610612753 18/19
10616 202325 Wesley Johnson ... 202325,1610612764 18/19
10617 202083 Wesley Matthews ... 202083,1610612754 18/19
10618 203115 Will Barton ... 203115,1610612743 18/19
10619 1626161 Willie Cauley-Stein ... 1626161,1610612758 18/19
10620 1626195 Willy Hernangomez ... 1626195,1610612766 18/19
10621 201163 Wilson Chandler ... 201163,1610612746 18/19
10622 1627812 Yogi Ferrell ... 1627812,1610612758 18/19
10623 1629139 Yuta Watanabe ... 1629139,1610612763 18/19
10624 1628380 Zach Collins ... 1628380,1610612757 18/19
10625 203897 Zach LaVine ... 203897,1610612741 18/19
10626 1629155 Zach Lofton ... 1629155,1610612765 18/19
10627 2585 Zaza Pachulia ... 2585,1610612765 18/19
10628 1627753 Zhou Qi ... 1627753,1610612745 18/19
[10629 rows x 66 columns]