我试图从每个灯具的下拉列表中提取目标时间http://www.bbc.co.uk/sport/football/league-one/results
我无法在搜索时找到数据 - 任何想法为什么?
import requests
from bs4 import BeautifulSoup
# Load Page Data
r = requests.get("http://www.bbc.co.uk/sport/football/league-one/results")
soup = BeautifulSoup(r.content)
print soup.prettify()
# Save Teams
for link in soup.find_all("a"):
print link.text
# Save Results
for link in soup.find_all("abbr"):
print link.text
FF
答案 0 :(得分:0)
所以这是非常重的数据(更不用说因为加载所有单独的页面而缓慢)并且它们可能最终会阻止你查看过多的请求,但这是我看到的唯一方法。我所做的就是通过并获取与href
按钮相关联的Results
,加载该页面并解析它以获取其中的得分信息
import requests
from bs4 import BeautifulSoup
def parse_page(data):
subsoup = BeautifulSoup(data)
matchoverview = subsoup.find('div', attrs={'id':'match-overview'})
print '--------------'
homeTeam = matchoverview.find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('a').text
homeScore = matchoverview.find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('span').text
homeGoalScorers = ["Home Goal Scorers:"]
for goals in matchoverview.find('div', attrs={'class':'team-match-details'}).findNext('p').find_all('span'):
homeGoalScorers.append(goals.text.replace(u'\u2032', "'"))
homeGoals = "\n".join(homeGoalScorers)
awayTeam = matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('a').text
awayScore = matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findNext('span').findNext('span').text
awayGoalScorers = ["Away Goal Scorers:"]
for goals in matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findNext('p').find_all('span'):
awayGoalScorers.append(goals.text.replace(u'\u2032', "'"))
awayGoals = "\n".join(awayGoalScorers)
print '{0} {1} - {2} {3}'.format(homeTeam, homeScore, awayTeam, awayScore)
print homeGoals
print awayGoals
def all_league_results():
r = requests.get("http://www.bbc.co.uk/sport/football/league-one/results")
soup = BeautifulSoup(r.content)
# Save Teams
for link in soup.find_all("a", attrs={'class': 'report'}):
fullLink = 'http://www.bbc.com' + link['href']
subr = requests.get(fullLink)
parse_page(subr.text)
def specific_game_results(url):
subr = requests.get(url)
parse_page(subr.text)
#get specific games results
specific_game_results('http://www.bbc.co.uk/sport/0/football/32460049')
#get all current league results
all_league_results()