如何修复循环,以便将所有已抓取的数据附加到其列表中?

时间:2019-01-03 00:28:59

标签: python python-2.7 loops web-scraping

我要编写的脚本是抓取NHL-API,它应该抓取2017020001-2017021271某个季节中的所有游戏。我刚刚结束学习,意识到只有最后一个游戏才被附加到每个列表中。因此,就我而言,只有2017021271游戏,而不是其余的1270游戏。

我的代码看起来像这样,我做错了什么?

我知道这与边界线延迟循环有关,但我不知道如何解决。谢谢您的谅解!

#Importing Libraries 
import numpy as np
import pandas as pd
import requests
import json
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder

#Create Empty lists
player_id = {}
goalie_id = {}

person = []
position = []
skaterstats = []

goalie_person=[]
goalie_position=[]
goalie_stats=[]

team = []
team_goals = []
matchid = []

#Connect to NHL-API
for game_id in range(2017020001, 2017020100, 1):
    url = 'https://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id)
    r = requests.get(url)
    game_data = r.json()

#Get Keys for Players/Goalies
for homeaway in ['home','away']:
    player_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('skaters')
    player_id[homeaway] = player_dict

for homeaway in ['home','away']:
    goalie_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('goalies')
    goalie_id[homeaway] = goalie_dict 

#Get PlayerStats/TeamStats
for homeaway in player_id:
    for playerID in player_id[homeaway]:
    play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
    play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
    play_dict_gameid = game_data.get('gamePk')

    play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('person')
    play_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('position')
    play_dict_skaterstats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('stats').get('skaterStats')

     #Append TeamStats to Empty list
    team.append(play_dict_teamname)
    team_goals.append(play_dict_teamgoals)
    matchid.append(play_dict_gameid)

    #Append PlayerStats to Empty list
    person.append(play_dict_person)
    position.append(play_dict_position)
    if play_dict_skaterstats: 
        skaterstats.append(play_dict_skaterstats)
    if not play_dict_skaterstats:
        play_dict_skaterstats = {}
        play_dict_skaterstats['timeOnIce'] = None
        play_dict_skaterstats['assists'] = None
        play_dict_skaterstats['goals'] = None
        play_dict_skaterstats['shots'] = None
        play_dict_skaterstats['hits'] = None
        play_dict_skaterstats['powerPlayGoals'] = None
        play_dict_skaterstats['powerPlayAssists'] = None
        play_dict_skaterstats['penaltyMinutes'] = None
        play_dict_skaterstats['faceOffPct'] = None
        play_dict_skaterstats['faceOffWins'] = None
        play_dict_skaterstats['faceoffTaken'] = None
        play_dict_skaterstats['takeaways'] = None
        play_dict_skaterstats['giveaways'] = None
        play_dict_skaterstats['shortHandedGoals'] = None
        play_dict_skaterstats['shortHandedAssists'] = None
        play_dict_skaterstats['blocked'] = None
        play_dict_skaterstats['plusMinus'] = None
        play_dict_skaterstats['evenTimeOnIce'] = None
        play_dict_skaterstats['powerPlayTimeOnIce'] = None
        play_dict_skaterstats['shortHandedTimeOnIce'] = None

skaterstats.append(play_dict_skaterstats)

#Get GoalieStats
for homeaway in goalie_id:
    for goalieID in goalie_id[homeaway]:
    play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
    play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
    play_dict_gameid = game_data.get('gamePk')

    goalie_dict_person = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('person')
    goalie_dict_position = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('position')
    goalie_dict_stats = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('stats').get('goalieStats')

    #Append GoalieStats to Empty list
    goalie_person.append(goalie_dict_person)
    goalie_position.append(goalie_dict_position)
    if goalie_dict_stats: 
        goalie_stats.append(goalie_dict_stats)

    #Append TeamStats to Empty list
    team.append(play_dict_teamname)
    team_goals.append(play_dict_teamgoals)
    matchid.append(play_dict_gameid)

#Create DataFrames for all lists
df_person = pd.DataFrame(person)
df_position = pd.DataFrame(position)
df_skaterstats = pd.DataFrame(skaterstats)

df_team = pd.DataFrame(team)
df_teamgoals = pd.DataFrame(team_goals)
df_gameID = pd.DataFrame(matchid)

df_goalie_per = pd.DataFrame(goalie_person)
df_goalie_pos = pd.DataFrame(goalie_position)
df_goalie_stats = pd.DataFrame(goalie_stats)

1 个答案:

答案 0 :(得分:2)

修复了缩进以将所需的内容包含在初始for循环中的问题,看看是否可以解决您的问题:

#Importing Libraries 
import numpy as np
import pandas as pd
import requests
import json
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder

#Create Empty lists
player_id = {}
goalie_id = {}

person = []
position = []
skaterstats = []

goalie_person=[]
goalie_position=[]
goalie_stats=[]

team = []
team_goals = []
matchid = []

#Connect to NHL-API
for game_id in range(2017020001, 2017020100, 1):
    url = 'https://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id)
    r = requests.get(url)
    game_data = r.json()

    #Get Keys for Players/Goalies
    for homeaway in ['home','away']:
        player_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('skaters')
        player_id[homeaway] = player_dict

    for homeaway in ['home','away']:
        goalie_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('goalies')
        goalie_id[homeaway] = goalie_dict 

    #Get PlayerStats/TeamStats
    for homeaway in player_id:
        for playerID in player_id[homeaway]:
            play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
            play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
            play_dict_gameid = game_data.get('gamePk')

            play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('person')
            play_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('position')
            play_dict_skaterstats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('stats').get('skaterStats')

             #Append TeamStats to Empty list
            team.append(play_dict_teamname)
            team_goals.append(play_dict_teamgoals)
            matchid.append(play_dict_gameid)

            #Append PlayerStats to Empty list
            person.append(play_dict_person)
            position.append(play_dict_position)
            if play_dict_skaterstats: 
                skaterstats.append(play_dict_skaterstats)
            if not play_dict_skaterstats:
                play_dict_skaterstats = {}
                play_dict_skaterstats['timeOnIce'] = None
                play_dict_skaterstats['assists'] = None
                play_dict_skaterstats['goals'] = None
                play_dict_skaterstats['shots'] = None
                play_dict_skaterstats['hits'] = None
                play_dict_skaterstats['powerPlayGoals'] = None
                play_dict_skaterstats['powerPlayAssists'] = None
                play_dict_skaterstats['penaltyMinutes'] = None
                play_dict_skaterstats['faceOffPct'] = None
                play_dict_skaterstats['faceOffWins'] = None
                play_dict_skaterstats['faceoffTaken'] = None
                play_dict_skaterstats['takeaways'] = None
                play_dict_skaterstats['giveaways'] = None
                play_dict_skaterstats['shortHandedGoals'] = None
                play_dict_skaterstats['shortHandedAssists'] = None
                play_dict_skaterstats['blocked'] = None
                play_dict_skaterstats['plusMinus'] = None
                play_dict_skaterstats['evenTimeOnIce'] = None
                play_dict_skaterstats['powerPlayTimeOnIce'] = None
                play_dict_skaterstats['shortHandedTimeOnIce'] = None

    skaterstats.append(play_dict_skaterstats)

    #Get GoalieStats
    for homeaway in goalie_id:
        for goalieID in goalie_id[homeaway]:
            play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
            play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
            play_dict_gameid = game_data.get('gamePk')

            goalie_dict_person = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('person')
            goalie_dict_position = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('position')
            goalie_dict_stats = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('stats').get('goalieStats')

            #Append GoalieStats to Empty list
            goalie_person.append(goalie_dict_person)
            goalie_position.append(goalie_dict_position)
            if goalie_dict_stats: 
                goalie_stats.append(goalie_dict_stats)

            #Append TeamStats to Empty list
            team.append(play_dict_teamname)
            team_goals.append(play_dict_teamgoals)
            matchid.append(play_dict_gameid)

#Create DataFrames for all lists
df_person = pd.DataFrame(person)
df_position = pd.DataFrame(position)
df_skaterstats = pd.DataFrame(skaterstats)

df_team = pd.DataFrame(team)
df_teamgoals = pd.DataFrame(team_goals)
df_gameID = pd.DataFrame(matchid)

df_goalie_per = pd.DataFrame(goalie_person)
df_goalie_pos = pd.DataFrame(goalie_position)
df_goalie_stats = pd.DataFrame(goalie_stats)