我正试图从英超联赛中获得历史结果,但是当html获得所有结果时,Beautiful Soup find_all仅返回200个结果(应该有463个。反正有什么办法解决这个问题?
非常感谢
马特
import requests
from bs4 import BeautifulSoup
url = "https://www.skysports.com/premier-league-
results/1992-93"
url_content = requests.get(url).content
url_bs = BeautifulSoup(url_content,'html.parser')
match_list =
url_bs.find_all(attrs="class":"fixres__item"})
print(len(match_list))
答案 0 :(得分:2)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get('https://www.skysports.com/premier-league-results/1992-93')
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, 'plus-more__text')))
if driver.find_element_by_class_name('plus-more__text'):
print('Found')
driver.execute_script("arguments[0].scrollIntoView();", driver.find_element_by_class_name('plus-more__text'))
driver.execute_script("arguments[0].click();", driver.find_element_by_class_name('plus-more__text'))
html = driver.page_source
soup = BeautifulSoup(html, 'lxml')
links = soup.findAll('div', class_='fixres__item')
print(len(links))
driver.quit()