我试图通过webpage for the home team从游戏的第一季度中获取分数,而我的代码并没有返回任何内容。这是我到目前为止所拥有的。任何反馈都将不胜感激。
from bs4 import BeautifulSoup
import urllib
import urllib.request
import pandas as pd
page = urllib.request.urlopen('http://espn.go.com/nba/scoreboard').read()
soup = BeautifulSoup(page)
first_quarter = []
#will find tr and for every the 2nd instance of td, should extract that value?
for row in soup.find_all('tr')[0:]:
col = row.find_all('td')
column_1=col[1].string.strip()
first_quarter.append(column_1)
#adds value from 2nd instance of td and puts it under heading 'first_quarter'
columns = {'first_quarter': first_quarter}
df = pd.DataFrame(columns)
df
答案 0 :(得分:1)
内容是动态加载的,如果我们做一些解析,我们可以从源中提取数据:
import re
import json
from pprint import pprint as pp
r = requests.get("http://espn.go.com/nba/scoreboard").content
scr = BeautifulSoup(r).find("script",text=re.compile("window.espn.scoreboardData")).text.split("=",1)[1].rstrip(";")
js = json.loads(scr[:scr.index(";")])
data = js["events"][0]["competitions"][0]["competitors"]
pp(data)
这给你一个包含所有游戏信息,篮板,胜利,损失,分数等的dicts列表。第一个是离开,第二个是家,所以我们可以打开包装:
away, home = data
打印出我们得到的字典:
{u'homeAway': u'away',
u'id': u'5',
u'leaders': [{u'abbreviation': u'Pts',
u'displayName': u'Points',
u'leaders': [{u'athlete': {u'displayName': u'LeBron James',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/1966.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/1966'}],
u'position': {u'abbreviation': u'SF'},
u'shortName': u'L. James',
u'team': {u'id': u'5'}},
u'displayValue': u'27',
u'team': {u'id': u'5'},
u'value': None}],
u'name': u'points'},
{u'abbreviation': u'Reb',
u'displayName': u'Rebounds',
u'leaders': [{u'athlete': {u'displayName': u'Kevin Love',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/3449.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/3449'}],
u'position': {u'abbreviation': u'PF'},
u'shortName': u'K. Love',
u'team': {u'id': u'5'}},
u'displayValue': u'14',
u'team': {u'id': u'5'},
u'value': None}],
u'name': u'rebounds'},
{u'abbreviation': u'Ast',
u'displayName': u'Assists',
u'leaders': [{u'athlete': {u'displayName': u'LeBron James',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/1966.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/1966'}],
u'position': {u'abbreviation': u'SF'},
u'shortName': u'L. James',
u'team': {u'id': u'5'}},
u'displayValue': u'11',
u'team': {u'id': u'5'},
u'value': None}],
u'name': u'assists'},
{u'abbreviation': u'RAT',
u'displayName': u'Rating',
u'leaders': [{u'athlete': {u'displayName': u'LeBron James',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/1966.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/1966'}],
u'position': {u'abbreviation': u'SF'},
u'shortName': u'L. James',
u'team': {u'id': u'5'}},
u'displayValue': u'27 PTS, 11 REB, 11 AST, 2 STL, 3 BLK',
u'team': {u'id': u'5'},
u'value': None}],
u'name': u'rating'}],
u'linescores': [{u'value': 23},
{u'value': 19},
{u'value': 33},
{u'value': 18}],
u'records': [{u'abbreviation': u'Total',
u'name': u'Total',
u'summary': u'57-25',
u'type': u'total'},
{u'name': u'Home', u'summary': u'33-8', u'type': u'home'},
{u'name': u'Road', u'summary': u'24-17', u'type': u'road'}],
u'score': u'93',
u'statistics': [{u'abbreviation': u'REB',
u'displayValue': u'48',
u'name': u'rebounds'},
{u'abbreviation': u'RPG',
u'displayValue': u'48.0',
u'name': u'avgRebounds'},
{u'abbreviation': u'AST',
u'displayValue': u'17',
u'name': u'assists'},
{u'abbreviation': u'FGA',
u'displayValue': u'82',
u'name': u'fieldGoalsAttempted'},
{u'abbreviation': u'FGM',
u'displayValue': u'33',
u'name': u'fieldGoalsMade'},
{u'abbreviation': u'FG%',
u'displayValue': u'40.2',
u'name': u'fieldGoalPct'},
{u'abbreviation': u'FT%',
u'displayValue': u'84.0',
u'name': u'freeThrowPct'},
{u'abbreviation': u'FTA',
u'displayValue': u'25',
u'name': u'freeThrowsAttempted'},
{u'abbreviation': u'FTM',
u'displayValue': u'21',
u'name': u'freeThrowsMade'},
{u'abbreviation': u'PTS',
u'displayValue': u'93',
u'name': u'points'},
{u'abbreviation': u'3P%',
u'displayValue': u'24.0',
u'name': u'threePointPct'},
{u'abbreviation': u'3PA',
u'displayValue': u'25',
u'name': u'threePointFieldGoalsAttempted'},
{u'abbreviation': u'3PM',
u'displayValue': u'6',
u'name': u'threePointFieldGoalsMade'},
{u'abbreviation': u'PPG',
u'displayValue': u'93.0',
u'name': u'avgPoints'},
{u'abbreviation': u'APG',
u'displayValue': u'17.0',
u'name': u'avgAssists'},
{u'abbreviation': u'3P%',
u'displayValue': u'24.0',
u'name': u'threePointFieldGoalPct'}],
u'team': {u'abbreviation': u'CLE',
u'color': u'061642',
u'displayName': u'Cleveland Cavaliers',
u'id': u'5',
u'isActive': True,
u'links': [{u'href': u'http://espn.go.com/nba/team/_/name/cle',
u'isExternal': False,
u'isPremium': False,
u'rel': [u'clubhouse', u'desktop', u'team'],
u'text': u'Clubhouse'},
{u'href': u'sportscenter://x-callback-url/showClubhouse?uid=s:40~l:46~t:5',
u'isExternal': False,
u'isPremium': False,
u'rel': [u'clubhouse',
u'sportscenter',
u'app',
u'team'],
u'text': u'Clubhouse'},
{u'href': u'http://espn.go.com/nba/team/roster/_/name/cle',
u'isExternal': False,
u'isPremium': False,
u'rel': [u'roster', u'desktop', u'team'],
u'text': u'Roster'},
{u'href': u'http://espn.go.com/nba/team/stats/_/name/cle',
u'isExternal': False,
u'isPremium': False,
u'rel': [u'stats', u'desktop', u'team'],
u'text': u'Statistics'},
{u'href': u'http://espn.go.com/nba/team/schedule/_/name/cle',
u'isExternal': False,
u'isPremium': False,
u'rel': [u'schedule', u'desktop', u'team'],
u'text': u'Schedule'},
{u'href': u'http://espn.go.com/nba/team/photos/_/name/cle',
u'isExternal': False,
u'isPremium': False,
u'rel': [u'photos', u'desktop', u'team'],
u'text': u'photos'},
{u'href': u'http://espn.go.com/nba/team/stadium/_/name/cle',
u'isExternal': False,
u'isPremium': False,
u'rel': [u'stadium', u'desktop', u'team'],
u'text': u'Stadium'},
{u'href': u'http://www.dickssportinggoods.com/category/index.jsp?categoryId=15528656',
u'isExternal': True,
u'isPremium': False,
u'rel': [u'shop', u'desktop', u'team'],
u'text': u'Shop'},
{u'href': u'sportscenter://x-callback-url/showClubhouse?uid=s:40~l:46~t:5§ion=scores',
u'isExternal': False,
u'isPremium': False,
u'rel': [u'scores',
u'sportscenter',
u'app',
u'team'],
u'text': u'Scores'},
{u'href': u'http://insider.espn.go.com/nbadraft/results/team/_/team/cle',
u'isExternal': False,
u'isPremium': True,
u'rel': [u'draftpicks', u'desktop', u'team'],
u'text': u'Draft Picks'}],
u'location': u'Cleveland',
u'logo': u'http://a.espncdn.com/i/teamlogos/nba/500/scoreboard/cle.png',
u'name': u'Cavaliers',
u'shortDisplayName': u'Cavaliers',
u'uid': u's:40~l:46~t:5',
u'venue': {u'id': u'3417'}},
u'type': u'team',
u'uid': u's:40~l:46~t:5',
u'winner': True}
然后,您可以使用dict的键获取所需的所有信息:
In [49]: js = json.loads(scr[:scr.index(";")])
In [50]: data = js["events"][0]["competitions"][0]["competitors"]
In [51]: away, home = data
In [52]: away[u'score'], home["score"]
Out[52]: (u'89', u'93')
In [53]: away["linescores"], home["linescores"]
Out[53]:
([{u'value': 22}, {u'value': 27}, {u'value': 27}, {u'value': 13}],
[{u'value': 23}, {u'value': 19}, {u'value': 33}, {u'value': 18}])
In [54]: away["statistics"]
Out[54]:
[{u'abbreviation': u'REB', u'displayValue': u'39', u'name': u'rebounds'},
{u'abbreviation': u'RPG', u'displayValue': u'39.0', u'name': u'avgRebounds'},
{u'abbreviation': u'AST', u'displayValue': u'22', u'name': u'assists'},
{u'abbreviation': u'FGA',
u'displayValue': u'83',
u'name': u'fieldGoalsAttempted'},
{u'abbreviation': u'FGM', u'displayValue': u'32', u'name': u'fieldGoalsMade'},
{u'abbreviation': u'FG%', u'displayValue': u'38.6', u'name': u'fieldGoalPct'},
{u'abbreviation': u'FT%', u'displayValue': u'76.9', u'name': u'freeThrowPct'},
{u'abbreviation': u'FTA',
u'displayValue': u'13',
u'name': u'freeThrowsAttempted'},
{u'abbreviation': u'FTM', u'displayValue': u'10', u'name': u'freeThrowsMade'},
{u'abbreviation': u'PTS', u'displayValue': u'89', u'name': u'points'},
{u'abbreviation': u'3P%',
u'displayValue': u'36.6',
u'name': u'threePointPct'},
{u'abbreviation': u'3PA',
u'displayValue': u'41',
u'name': u'threePointFieldGoalsAttempted'},
{u'abbreviation': u'3PM',
u'displayValue': u'15',
u'name': u'threePointFieldGoalsMade'},
{u'abbreviation': u'PPG', u'displayValue': u'89.0', u'name': u'avgPoints'},
{u'abbreviation': u'APG', u'displayValue': u'22.0', u'name': u'avgAssists'},
{u'abbreviation': u'3P%',
u'displayValue': u'36.6',
u'name': u'threePointFieldGoalPct'}]
获得最佳表现者:
In [57]: away["leaders"]
Out[57]:
[{u'abbreviation': u'Pts',
u'displayName': u'Points',
u'leaders': [{u'athlete': {u'displayName': u'Draymond Green',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/6589.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/6589'}],
u'position': {u'abbreviation': u'PF'},
u'shortName': u'D. Green',
u'team': {u'id': u'9'}},
u'displayValue': u'32',
u'team': {u'id': u'9'},
u'value': None}],
u'name': u'points'},
{u'abbreviation': u'Reb',
u'displayName': u'Rebounds',
u'leaders': [{u'athlete': {u'displayName': u'Draymond Green',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/6589.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/6589'}],
u'position': {u'abbreviation': u'PF'},
u'shortName': u'D. Green',
u'team': {u'id': u'9'}},
u'displayValue': u'15',
u'team': {u'id': u'9'},
u'value': None}],
u'name': u'rebounds'},
{u'abbreviation': u'Ast',
u'displayName': u'Assists',
u'leaders': [{u'athlete': {u'displayName': u'Draymond Green',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/6589.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/6589'}],
u'position': {u'abbreviation': u'PF'},
u'shortName': u'D. Green',
u'team': {u'id': u'9'}},
u'displayValue': u'9',
u'team': {u'id': u'9'},
u'value': None}],
u'name': u'assists'},
{u'abbreviation': u'RAT',
u'displayName': u'Rating',
u'leaders': [{u'athlete': {u'displayName': u'Draymond Green',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/6589.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/6589'}],
u'position': {u'abbreviation': u'PF'},
u'shortName': u'D. Green',
u'team': {u'id': u'9'}},
u'displayValue': u'32 PTS, 15 REB, 9 AST, 2 STL',
u'team': {u'id': u'9'},
u'value': None}],
u'name': u'rating'}]
In [58]: home["leaders"]
Out[58]:
[{u'abbreviation': u'Pts',
u'displayName': u'Points',
u'leaders': [{u'athlete': {u'displayName': u'LeBron James',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/1966.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/1966'}],
u'position': {u'abbreviation': u'SF'},
u'shortName': u'L. James',
u'team': {u'id': u'5'}},
u'displayValue': u'27',
u'team': {u'id': u'5'},
u'value': None}],
u'name': u'points'},
{u'abbreviation': u'Reb',
u'displayName': u'Rebounds',
u'leaders': [{u'athlete': {u'displayName': u'Kevin Love',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/3449.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/3449'}],
u'position': {u'abbreviation': u'PF'},
u'shortName': u'K. Love',
u'team': {u'id': u'5'}},
u'displayValue': u'14',
u'team': {u'id': u'5'},
u'value': None}],
u'name': u'rebounds'},
{u'abbreviation': u'Ast',
u'displayName': u'Assists',
u'leaders': [{u'athlete': {u'displayName': u'LeBron James',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/1966.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/1966'}],
u'position': {u'abbreviation': u'SF'},
u'shortName': u'L. James',
u'team': {u'id': u'5'}},
u'displayValue': u'11',
u'team': {u'id': u'5'},
u'value': None}],
u'name': u'assists'},
{u'abbreviation': u'RAT',
u'displayName': u'Rating',
u'leaders': [{u'athlete': {u'displayName': u'LeBron James',
u'headshot': u'http://a.espncdn.com/i/headshots/nba/players/full/1966.png',
u'links': [{u'href': u'http://espn.go.com/nba/player/_/id/1966'}],
u'position': {u'abbreviation': u'SF'},
u'shortName': u'L. James',
u'team': {u'id': u'5'}},
u'displayValue': u'27 PTS, 11 REB, 11 AST, 2 STL, 3 BLK',
u'team': {u'id': u'5'},
u'value': None}],
u'name': u'rating'}]