我的网络抓取工具无法获取“赔率”值,并且不确定出了什么问题。对于每条信息,我使用 try/except 来查看元素是否可用。我不确定获取赔率值有什么问题。感谢您的帮助
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
url = 'https://www.ncaagamesim.com/college-basketball-predictions.asp'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table')
# Get column names
headers = table.find_all('th')
cols = [x.text for x in headers]
# Get all rows in table body
table_rows = table.find_all('tr')
rows = []
# Grab the text of each td, and put into a rows list
for each in table_rows[1:]:
odd_avail = True
data = each.find_all('td')
time = data[0].text.strip()
# Get matchup and odds
try:
matchup, odds = data[1].text.strip().split('\xa0')
odd_margin = float(odds.split('by')[-1].strip())
except:
matchup = data[1].text.strip()
odd_margin = '-'
odd_avail = False
# Get favored team
try:
odd_team_win = data[1].find_all('img')[-1]['title']
except:
odd_team_win = '-'
odd_avail = False
# Get simulation winner
try:
sim_team_win = data[2].find('img')['title']
except:
sim_team_win = '-'
odd_avail = False
awayTeam = matchup.split('@')[0].strip()
homeTeam = matchup.split('@')[1].strip()
# Get simulation margin
try:
sim_margin = float(re.findall("\d+\.\d+", data[2].text)[-1])
except:
sim_margin = '-'
odd_avail = False
# If all variables available, determine odds, simulation margin points, and optimal bet
if odd_avail == True:
if odd_team_win == sim_team_win:
diff = abs(sim_margin - odd_margin)
if sim_margin > odd_margin:
bet = odd_team_win
else:
if odd_team_win == homeTeam:
bet = awayTeam
else:
bet = homeTeam
else:
diff = odd_margin + sim_margin
bet = sim_team_win
else:
diff = -1
bet = '-'
# Create table
row = {cols[0]: time, 'Matchup': matchup, 'Odds Winner': odd_team_win, 'Odds': odd_margin,
'Simulation Winner': sim_team_win, 'Simulation Margin': sim_margin, 'Diff': diff, 'Bet' : bet}
rows.append(row)
df = pd.DataFrame(rows)
df = df.sort_values(by = ['Diff'], ascending = False)
print (df.to_string())
# df.to_csv('odds.csv', index=False)
当我运行此代码时,一切正常并获得所有其他值,但表中的所有赔率值都是“-”。
答案 0 :(得分:1)
我在代码中添加了一些东西,以说明
至于赔率未显示。检查 csv 文件以查看它是否存在。如果是,可能只是您需要在 pycharm 中更改的偏好(可能只是切断一些字符串)
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
url = 'https://www.ncaagamesim.com/college-basketball-predictions.asp'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table')
# Get column names
headers = table.find_all('th')
cols = [x.text for x in headers]
# Get all rows in table body
table_rows = table.find_all('tr')
rows = []
# Grab the text of each td, and put into a rows list
for each in table_rows[1:]:
odd_avail = True
data = each.find_all('td')
time = data[0].text.strip()
# Get matchup and odds
try:
matchup, odds = data[1].text.strip().split('\xa0')
odd_margin = float(odds.split('by')[-1].strip())
except:
matchup = data[1].text.strip()
if 'Even' in matchup:
matchup, odds = data[1].text.strip().split('\xa0')
odd_margin = 0
else:
odd_margin = '-'
odd_avail = False
awayTeam = matchup.split('@')[0].strip()
homeTeam = matchup.split('@')[1].strip()
# Get favored team
try:
odd_team_win = data[1].find_all('img')[-1]['title']
except:
odd_team_win = '-'
odd_avail = False
# Get simulation winner
try:
sim_team_win = data[2].find('img')['title']
except:
if 'wins' in data[2].text:
sim_team_win = data[2].text.split('wins')[0].strip()
else:
sim_team_win = '-'
odd_avail = False
# Get simulation margin
try:
sim_margin = float(re.findall("\d+\.\d+", data[2].text)[-1])
except:
sim_margin = '-'
odd_avail = False
# If all variables available, determine odds and simulation margin points
if odd_avail == True:
if odd_team_win == sim_team_win:
diff = abs(sim_margin - odd_margin)
else:
diff = odd_margin + sim_margin
else:
diff = '-'
# Create table
row = {cols[0]: time, 'Away Team': awayTeam, 'Home Team':homeTeam, 'Odds Winner': odd_team_win, 'Odds': odd_margin,
'Simulation Winner': sim_team_win, 'Simulation Margin': sim_margin, 'Diff': diff}
rows.append(row)
df = pd.DataFrame(rows)
print (df.to_string())
# df.to_csv('odds.csv', index=False)