from selenium import webdriver
from bs4 import BeautifulSoup
import re
from selenium.webdriver.support.ui import WebDriverWait
driver = webdriver.Chrome(executable_path=r'chromedriver.exe', service_args = ['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1'])
def GetOdds(url):
driver.get(url)
soup3 = BeautifulSoup(driver.page_source, 'lxml')
for i in range(0,4):
try:
matches = driver.find_elements_by_class_name("table-participant")
matches[i].click()
driver.implicitly_wait(10)
driver.execute_script("window.history.go(-1)")
except:
continue
# finally:
# driver.execute_script("window.history.go(-1)")
def GetPagination():
soup2 = BeautifulSoup(driver.page_source, 'lxml')
pagination = int(soup2.find('div',{'id':'pagination'}).find_all('a')[-1]['x-page'].strip())
for x in range(1,pagination):
try:
url = 'https://www.oddsportal.com/basketball/usa/nba-2016-2017/results/#/page/%d/' % x
GetOdds(url)
except:
break
driver.get('https://www.oddsportal.com/basketball/usa/nba-2016-2017/results/#/page/1/')
GetPagination()
我的Selenium代码意外地跳过了下一页matches = driver.find_elements_by_class_name("table-participant")
的第一个元素。
为简单起见,我使for循环迭代的数量仅为每页4个匹配项。