除了一个问题,下面的代码完美无缺。运行后,它会返回每个URL的数据两次而不是一次。有人可以解释我为此做错了吗?
import requests
import csv
from random import choice
import pandas as pd
url_template = "https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=SpeedDistance&Season=2017-18&SeasonSegment=&SeasonType=Regular+Season&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
lineup_df = pd.DataFrame()
df = pd.read_csv('NBADates.csv')
df.to_dict('series')
url_list=[url_template.format(date=date) for date in df.loc[ : ,"Date"]]
for url in url_list:
data = requests.get(url, headers={
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0',})
headers = data.json()['resultSets'][0]['headers']
stats = data.json()['resultSets'][0]['rowSet']
stats_df = pd.DataFrame(stats, columns=headers)
stats_df=[stats_df.assign(Date2=Date2) for Date2 in df.loc[ : ,"Date2"]]
# Append to the big dataframe
lineup_df = lineup_df.append(stats_df, ignore_index=True)
lineup_df.to_csv("Stats.csv")
编辑:这是文件
print(df)
Date Date2
0 10%2F17%2F2017 10/17/2017
1 10%2F18%2F2017 10/18/2017
答案 0 :(得分:1)
这是问题所在:
stats_df=[stats_df.assign(Date2=Date2) for Date2 in df.loc[ : ,"Date2"]]
对于输入文件中的每个stats_df
,这会复制Date2
中的行。我想您只想获得与您刚刚下载的网址相对应的Date2
,而不是Date2
中的每个df
。使用url_list
中的索引访问df
的相应行。
import requests
import csv
from random import choice
import pandas as pd
url_template = "https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=SpeedDistance&Season=2017-18&SeasonSegment=&SeasonType=Regular+Season&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
lineup_df = pd.DataFrame()
df = pd.read_csv('NBADates.csv')
df.to_dict('series')
url_list=[url_template.format(date=date) for date in df.loc[ : ,"Date"]]
for index, url in enumerate(url_list):
data = requests.get(url, headers={
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0',})
headers = data.json()['resultSets'][0]['headers']
stats = data.json()['resultSets'][0]['rowSet']
stats_df = pd.DataFrame(stats, columns=headers)
stats_df = stats_df.assign(Date2=df.loc[index, "Date2"])
# Append to the big dataframe
lineup_df = lineup_df.append(stats_df, ignore_index=True)
lineup_df.to_csv("Stats.csv")