使用Selenium Select导航下拉菜单

时间:2019-03-21 02:09:20

标签: python pandas selenium web-scraping beautifulsoup

#Initialization for beautifulsoup to access site for per game stats
url = "https://stats.nba.com/players/traditional/?sort=PTS&dir=-1&Season=2018-19&SeasonType=Regular%20Season"
d = webdriver.Chrome(ChromeDriverManager().install())
d.get(url)

#Initializes data frame to store player data
data_df= pd.DataFrame(columns={'Player','Team','3PA','3P%','3PaTotal','Season'})

for yearCount in range(0,20):
    season = [18,19]
    seasonStr = str(season[0])+"/"+str(season[1])
    for pageCounter in range(0,11):
        #Scrapes all of the data putting it into headers
        soup = BeautifulSoup(d.page_source, 'html.parser').find('table')
        headers, [_, *data] = [i.text for i in soup.find_all('th')], [[i.text for i in b.find_all('td')] for b in soup.find_all('tr')]
        final_data = [i for i in data if len(i) > 1]

        #Creates a dictionary of headers
        data_attrs = [dict(zip(headers, i)) for i in final_data]

        #Collects stats that are used for graph
        players = [i['PLAYER'] for i in data_attrs]
        teams = [i['TEAM'] for i in data_attrs]
        threePointAttempts = [i['3PA'] for i in data_attrs]
        threePointPercentage = [i['3P%'] for i in data_attrs]

        #Adds the data collected to the dataframe
        temp_df = pd.DataFrame({'Player': players,
                                'Team': teams,
                                '3PA': threePointAttempts,
                                '3P%': threePointPercentage,
                                '3PaTotal' : 0,
                                'Season' : seasonStr})
        data_df = data_df.append(temp_df, ignore_index=True)
        data_df = data_df[['Player','Team','3PA','3P%','3PaTotal','Season']]

        #Goes to next page
        nxt = d.find_element_by_class_name("stats-table-pagination__next")
        nxt.click()

    dropDown = Select(d.find_element_by_name("Season"))
    dropDown.select_by_index(yearCount)
  

我的错误代码:

     

回溯(最近通话最近):文件   “ C:/ Users / brenn / PycharmProjects / NBAstats / venv / Lib / site-packages / Player   3-Point.py“,第44行,       标头,[_,*数据] = [i.so​​up.find_all('thd)中的i.text],[[b.find_all('td')中i的i.text],soup.find_all( 'tr')]

     

AttributeError:“ NoneType”对象没有属性“ find_all”

在NBA网站上尝试收集过去几个赛季的数据时遇到问题。我的代码收集了当前赛季的所有球员数据(逐页重复,没有任何问题)。但是,当我尝试通过下拉菜单来收集过去一年的数据时,它不起作用。如果我使用上一个季节的URL,而不使用下拉菜单导航,则它将毫无问题地收集数据。同样在硒铬选项卡中,页面切换到上一年,但是在尝试读取数据时遇到问题。

1 个答案:

答案 0 :(得分:0)

我喜欢处理体育数据!

我想提出一种略有不同的方法。数据通过请求URL呈现,该URL将返回json响应。您可以使用该查询参数遍历季节(从1996年开始)。然后,您可以将整个内容转储到数据帧中,并根据需要过滤/操作该数据帧。

import requests
import pandas as pd


request_url = 'https://stats.nba.com/stats/leaguedashplayerstats'

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'}

results = pd.DataFrame()
for yearCount in range(1996,2019):
    season = int(str(yearCount)[-2:])
    seasonStr = '%02d/%02d' %(int(str(season)[-2:]), int(str(season+1)[-2:]))
    season_query = '%s-%s' %(yearCount, str(yearCount+1)[-2:])

    params = {
    'College': '',
    'Conference':'', 
    'Country': '',
    'DateFrom': '',
    'DateTo': '',
    'Division': '',
    'DraftPick': '',
    'DraftYear': '',
    'GameScope': '',
    'GameSegment': '',
    'Height': '',
    'LastNGames': '0',
    'LeagueID': '00',
    'Location': '',
    'MeasureType': 'Base',
    'Month': '0',
    'OpponentTeamID': '0',
    'Outcome': '',
    'PORound': '0',
    'PaceAdjust': 'N',
    'PerMode': 'PerGame',
    'Period': '0',
    'PlayerExperience':'', 
    'PlayerPosition': '',
    'PlusMinus': 'N',
    'Rank': 'N',
    'Season': season_query,
    'SeasonSegment': '',
    'SeasonType': 'Regular Season',
    'ShotClockRange': '',
    'StarterBench': '',
    'TeamID': '0',
    'TwoWay': '0',
    'VsConference': '', 
    'VsDivision': '',
    'Weight': ''}


    jsonObj = requests.get(request_url, headers=headers, params=params).json()

    cols = jsonObj['resultSets'][0]['headers']
    rows = jsonObj['resultSets'][0]['rowSet']

    temp_df = pd.DataFrame(columns = cols)
    for row in rows:
        row_df = pd.DataFrame([row], columns = cols)
        temp_df = temp_df.append(row_df)

    temp_df['Season'] = seasonStr

    print ('Aquired %s stats' %(seasonStr))
    results = results.append(temp_df).reset_index(drop=True)

输出:

print(results)
      PLAYER_ID            PLAYER_NAME  ...              CFPARAMS Season
0          1489                   None  ...                 1489,  96/97
1           902                   None  ...                  902,  96/97
2          2179                   None  ...                 2179,  96/97
3          1049                   None  ...                 1049,  96/97
4           775                   None  ...                  775,  96/97
5            93                   None  ...                   93,  96/97
6           920             A.C. Green  ...        920,1610612742  96/97
7           243            Aaron McKie  ...        243,1610612765  96/97
8          1425         Aaron Williams  ...       1425,1610612763  96/97
9           768              Acie Earl  ...        768,1610612749  96/97
10          228             Adam Keefe  ...        228,1610612762  96/97
11          154        Adrian Caldwell  ...        154,1610612755  96/97
12          673         Alan Henderson  ...        673,1610612737  96/97
13         1059  Aleksandar Djordjevic  ...       1059,1610612757  96/97
14          275          Allan Houston  ...        275,1610612752  96/97
15          947          Allen Iverson  ...        947,1610612755  96/97
16          297        Alonzo Mourning  ...        297,1610612748  96/97
17          175           Alton Lister  ...        175,1610612738  96/97
18         1043         Amal McCaskill  ...       1043,1610612753  96/97
19          692        Andrew DeClercq  ...        692,1610612744  96/97
20          457            Andrew Lang  ...        457,1610612749  96/97
21          358      Anfernee Hardaway  ...        358,1610612753  96/97
22          924       Anthony Goldwire  ...        924,1610612743  96/97
23          193          Anthony Mason  ...        193,1610612766  96/97
24          292         Anthony Miller  ...        292,1610612737  96/97
25          324         Anthony Peeler  ...        324,1610612763  96/97
26          156           Antoine Carr  ...        156,1610612762  96/97
27          952         Antoine Walker  ...        952,1610612738  96/97
28          213          Antonio Davis  ...        213,1610612754  96/97
29          176         Antonio Harvey  ...        176,1610612760  96/97
        ...                    ...  ...                   ...    ...
10599    204020          Tyler Johnson  ...     204020,1610612756  18/19
10600   1628399            Tyler Lydon  ...    1628399,1610612743  18/19
10601   1627755             Tyler Ulis  ...    1627755,1610612741  18/19
10602    203092           Tyler Zeller  ...     203092,1610612737  18/19
10603    201936           Tyreke Evans  ...     201936,1610612754  18/19
10604   1627820         Tyrone Wallace  ...    1627820,1610612746  18/19
10605      2199         Tyson Chandler  ...       2199,1610612747  18/19
10606   1626145             Tyus Jones  ...    1626145,1610612750  18/19
10607      2617          Udonis Haslem  ...       2617,1610612748  18/19
10608    203506         Victor Oladipo  ...     203506,1610612754  18/19
10609      1713           Vince Carter  ...       1713,1610612737  18/19
10610   1629053        Vincent Edwards  ...    1629053,1610612745  18/19
10611   1627735        Wade Baldwin IV  ...    1627735,1610612757  18/19
10612    201961        Wayne Ellington  ...     201961,1610612765  18/19
10613   1627782           Wayne Selden  ...    1627782,1610612741  18/19
10614   1628976     Wendell Carter Jr.  ...    1628976,1610612741  18/19
10615   1628411             Wes Iwundu  ...    1628411,1610612753  18/19
10616    202325         Wesley Johnson  ...     202325,1610612764  18/19
10617    202083        Wesley Matthews  ...     202083,1610612754  18/19
10618    203115            Will Barton  ...     203115,1610612743  18/19
10619   1626161    Willie Cauley-Stein  ...    1626161,1610612758  18/19
10620   1626195      Willy Hernangomez  ...    1626195,1610612766  18/19
10621    201163        Wilson Chandler  ...     201163,1610612746  18/19
10622   1627812           Yogi Ferrell  ...    1627812,1610612758  18/19
10623   1629139          Yuta Watanabe  ...    1629139,1610612763  18/19
10624   1628380           Zach Collins  ...    1628380,1610612757  18/19
10625    203897            Zach LaVine  ...     203897,1610612741  18/19
10626   1629155            Zach Lofton  ...    1629155,1610612765  18/19
10627      2585          Zaza Pachulia  ...       2585,1610612765  18/19
10628   1627753                Zhou Qi  ...    1627753,1610612745  18/19

[10629 rows x 66 columns]