我有一个组织从网站关于NHL球员数据的Python脚本。这些值被放置到数据帧中。我还建立了一个函数,它的球员名和队名拿到工资总额的玩家排队。我想从数据框球员的名字(F1,F2,F3)传递给函数(名称),并把结果保存在我的Excel工作表(totalSalary)。
我试过ILOC功能,进入功能和我感到困惑。
from bs4 import BeautifulSoup
import requests
import pandas as pd
import colorama
import crayons
import datetime
import xlsxwriter
import nhl_player_salary as nps
def playerProductionData():
#Getting today's date
#today = str(datetime.date.today())
today = datetime.date.today().strftime("%m-%#d")
today = str(today).replace("-","/")
#print (today)
#Make it work on Windows machines
colorama.init()
# parameters for pandas display
def start():
options = {
'display': {
'max_columns': None,
'max_colwidth': 200,
'expand_frame_repr': False, # Don't wrap to multiple pages
'max_rows': 20,
'max_seq_items': 50, # Max length of printed sequence
'precision': 4,
'show_dimensions': False,
'colheader_justify': 'left'
},
'mode': {
'chained_assignment': None # Controls SettingWithCopyWarning
}
}
for category, option in options.items():
for op, value in option.items():
pd.set_option(f'{category}.{op}', value) # Python 3.6+
if __name__ == '__main__':
start()
del start # Clean up namespace in the interpreter
#Set Agent Header to scrape data
headers = {"User-Agent":'Mozilla/5.0 (Windows NT 6.3; WOW64)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
page_link = 'https://www.leftwinglock.com/line-production/index.php?type=3'
#https://leftwinglock.com/articles.php?id=3049
page_response = requests.get(page_link, headers=headers, allow_redirects=False, timeout=5)
# here, we fetch the content from the url, using the requests library
page_content = BeautifulSoup(page_response.content, "html.parser")
#column_headers = page_content.findAll('tr')[0]
column_headers = [th.getText() for th in page_content.findAll('tr')[0].findAll('th')]
data_rows = page_content.findAll('tr')[1:]
player_data = [[td.getText() for td in data_rows[i].findAll('td', limit=14)] for i in range(len(data_rows))] #PLAYER DATA
#print (column_headers)
df = pd.DataFrame(player_data,columns=['Team', 'F1', 'F2', 'F3', 'GF', 'GA', 'GF%', 'SATF', 'SAT%', 'USATF', 'USAT%', 'SH%', 'SV%', 'SHSV%'])
#initilize total salary
df['TotalSalary'] = 0
#nps.getPlayerSalary(player_data.teamAbbrv)
#df['TotalSalary'] = nps.getPlayerSalary(df.iloc[:,0], ["ARVIDSSON","JOHANSEN", "FORSBERG"])
#print (df)
convert_fill(df)
df['SATF'] = df['SATF'].astype(int)
df['GF'] = df['GF'].astype(int)
#Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('player_line_production_data.xlsx', engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Player Line Production Data')
# Close the Pandas Excel writer and output the Excel file.
writer.save()
return 'Run Successful'
def convert_fill(df):
return df.stack().apply(pd.to_numeric, errors='ignore').fillna(0).unstack()
print(playerProductionData())
import pandas as pd
from pandas import DataFrame
def getPlayerSalary(teamAbbrv, names):
#Get Most Recent Draft Kings Salary List
DKSalary = (pd.read_csv('DKSalaries.csv'))
DKSalary_DF = DataFrame(DKSalary, columns=['Position', 'Name', 'Salary', 'AvgPointsPerGame', 'TeamAbbrev'])
i = 0
def getDataFrameForNameTeam(teamAbbrv, name):
filterName = DKSalary_DF[DKSalary_DF['Name'].str.contains(name.title())]
filterName = filterName[filterName['TeamAbbrev'].str.contains(teamAbbrv)]
return filterName
nameDF = getDataFrameForNameTeam(teamAbbrv, names[0])
while i < len(names) - 1:
newframe = getDataFrameForNameTeam(teamAbbrv, names[i + 1])
nameDF = pd.concat([nameDF, newframe])
i += 1
return nameDF['Salary'].sum()
print (getPlayerSalary ('NSH', ["ARVIDSSON","JOHANSEN", "FORSBERG"]))
答案 0 :(得分:0)
一旦建立了DataFrame,您就可以查询单个球员以通过以下方式获取薪水:
<StructureFareRulesRS xmlns="http://webservices.sabre.com/sabreXML/2003/07" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" Version="1.0.4">
<Errors>
<Error ErrorCode="005106" ErrorMessage="FORMAT FARE BASIS NOT AVAILABLE"/>
</Errors>
</StructureFareRulesRS>
您可能意识到的问题是,不能保证名称唯一。在您的示例中未将他们编入索引。因此,以上内容将使任何一支名为“鲍勃”的球员进入任何一支球队并添加他们。
从您的帖子中看来,您正在寻找团队总数,只需使用熊猫的“ groupie()”函数对团队进行求和:
df['Name' == 'Bob']['Salary'].sum()
以上内容按团队对df进行分组,然后对每组的“薪水”列求和。