我写了以下代码:
从django.core.management.base导入BaseCommand,CommandError
从sbbetting.models导入团队,联赛,赛程,国家
汇入要求
从bs4导入BeautifulSoup
从django.core.exceptions导入ObjectDoesNotExist
从datetime导入datetime,timedelta
pool = gevent.pool.Pool()
class Command(BaseCommand):
def get_page(self, url, use_headers=None):
headers = {
"Host": "d.flashscore.com",
}
if(use_headers):
response = requests.get(url, headers=headers)
else:
response = requests.get(url)
return BeautifulSoup(response.content, features="lxml")
def find_ids(self, input):
return_value = []
for index in range (0, len(input)):
if(input[index:index + 3] == "AA÷"):
return_value.append(input[index+3:index+11])
return return_value
def create_related_fixtures(self, fixture, related_fixtures):
for match in related_fixtures[0:10]:
gevent.sleep(10)
match_code = match.get('onclick')[17:25]
base_url = "https://www.flashscore.com/match/" + match_code
summary_url = "https://d.flashscore.com/x/feed/d_su_" + match_code + "_en_1"
fixture_data = self.get_page(base_url)
summary_data = self.get_page(summary_url, True)
teams = fixture_data.find_all('div', {'class': 'side-images-row'})
home_id = teams[0].find('a').get('onclick').split('/')[3].split("'")[0]
away_id = teams[1].find('a').get('onclick').split('/')[3].split("'")[0]
home_name = teams[0].find('img').get('alt').split(" (")[0]
away_name = teams[1].find('img').get('alt').split(" (")[0]
country_name = str(fixture_data.find('div', {'class': 'fleft'}).find_all('span')[1].text.split(":")[0]).lower().title()
league_name = fixture_data.find('div', {'class': 'fleft'}).find_all('span')[1].text.split(":")[1].split(" -")[0].replace(" ", "", 1)
league_url = "https://flashscore.com" + fixture_data.find('div', {'class': 'fleft'}).find('a').get('onclick').split("'")[1].split("'")[0]
league_data = self.get_page(league_url)
season = league_data.find('div', {'class': 'tournament-season'}).text
country = Country.create(country_name)
league = League.create(league_name, season, country)
home = Team.create(home_name, league, home_id)
away = Team.create(away_name, league, away_id)
fh_goals_home = 0
fh_goals_away = 0
sh_goals_home = 0
sh_goals_away = 0
all_fields_populated = True
if(len(summary_data.find_all('div', {'class': 'detailMS__incidentsHeader'})) > 1):
fh_goals_home = int(summary_data.find_all('div', {'class': 'detailMS__incidentsHeader'})[0].find('span', {'class': 'p1_home'}).text)
fh_goals_away = int(summary_data.find_all('div', {'class': 'detailMS__incidentsHeader'})[0].find('span', {'class': 'p1_away'}).text)
sh_goals_home = int(summary_data.find_all('div', {'class': 'detailMS__incidentsHeader'})[1].find('span', {'class': 'p2_home'}).text)
sh_goals_away = int(summary_data.find_all('div', {'class': 'detailMS__incidentsHeader'})[1].find('span', {'class': 'p2_away'}).text)
total_goals_home = fh_goals_home + sh_goals_home
total_goals_away = fh_goals_away + sh_goals_away
else:
match_result = fixture_data.find('div', {'id': "event_detail_current_result"}).find_all('span', {'class': 'scoreboard'})
total_goals_home = int(match_result[0].text)
total_goals_away = int(match_result[1].text)
all_fields_populated = False
total_goals = total_goals_home + total_goals_away
def analyse_schedule(self, fixture):
gevent.sleep(10)
base_url = "https://flashscore.com/match/{}".format(fixture)
fixture_data = self.get_page(base_url)
date = datetime.utcfromtimestamp(int(fixture_data.find_all('script')[9].text.split('= ')[8].split(";")[0])) + timedelta(hours=2)
country_name = str(fixture_data.find('div', {'class': 'fleft'}).find_all('span')[1].text.split(":")[0]).lower().title()
teams = fixture_data.find_all('div', {'class': 'side-images-row'})
home_id = teams[0].find('a').get('onclick').split('/')[3].split("'")[0]
away_id = teams[1].find('a').get('onclick').split('/')[3].split("'")[0]
league_name = fixture_data.find('div', {'class': 'fleft'}).find_all('span')[1].text.split(":")[1].split(" -")[0].replace(" ", "", 1)
league_url = "https://flashscore.com" + fixture_data.find('div', {'class': 'fleft'}).find('a').get('onclick').split("'")[1].split("'")[0]
league_data = self.get_page(league_url)
season = league_data.find('div', {'class': 'tournament-season'}).text
country = Country.create(name=country_name)
league = League.create(name=league_name, season=season, country=country)
home_name = teams[0].find('img').get('alt').split(" (")[0]
away_name = teams[1].find('img').get('alt').split(" (")[0]
home = Team.create(name=home_name, league=league, flashscore_id=home_id)
away = Team.create(name=away_name, league=league, flashscore_id=away_id)
new_fixture = Fixture(home=home, away=away, date=date, league=league, flashscore_id=fixture)
new_fixture.save()
h2h_url = "https://d.flashscore.com/x/feed/d_hh_" + fixture + "_en_1"
h2h_data = self.get_page(h2h_url, True)
h2h_list = h2h_data.find_all('div', {'class': 'h2h-wrapper'})
home_overal = h2h_list[0]
away_overal = h2h_list[1]
h2h = h2h_list[2]
home_matches = home_overal.find('table').find('tbody').find_all('tr')
away_matches = away_overal.find('table').find('tbody').find_all('tr')
home_home_matches = h2h_data.find('div', {'id': 'tab-h2h-home'}).find('table').find('tbody').find_all('tr')
away_away_matches = h2h_data.find('div', {'id': 'tab-h2h-away'}).find('table').find('tbody').find_all('tr')
if(len(home_matches) > 10 and len(away_matches) > 10):
home_overal_details_process = gevent.spawn(self.create_related_fixtures, new_fixture, home_matches)
away_overal_details_process = gevent.spawn(self.create_related_fixtures, new_fixture, away_matches)
if(len(home_home_matches) > 10 and len(away_away_matches) > 10):
home_home_details_process = gevent.spawn(self.create_related_fixtures, new_fixture, home_home_matches)
away_away_details_process = gevent.spawn(self.create_related_fixtures, new_fixture, away_away_matches)
def handle(self, *args, **options):
schedule_data = self.get_page("https://d.flashscore.com/x/feed/f_1_3_2_en_1", True)
schedule_fixtures = self.find_ids(schedule_data.text)
list(pool.imap_unordered(self.analyse_schedule, schedule_fixtures))
self.stdout.write(self.style.SUCCESS('Successfully run command'))
但是当我运行它时,我时不时地得到以下错误:
requests.exceptions.ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
我最初尝试通过在每个循环中添加gevent.sleep(3)
来解决此问题,但这似乎不起作用。然后,我尝试升值gevent.sleep
的值,但这似乎也不起作用。猴子补丁是在manage.py(我使用Django)中完成的
有人可以帮我避免这个问题吗?