我正在尝试抓取下面适用于page的网站的数据。但是,只要单击复选框,作业就不起作用。之前您可以看到我只检测到24个元素并且没有点击复选框,这是正确的擦除。
单击复选框后,会有更多元素,但无法正常工作,如下所示。它为什么这样做?我相信Selenium通常会刮掉它看到的东西但是在这种情况下它并没有这样做......
driver = webdriver.Chrome()
driver.set_window_size(1024, 600)
driver.maximize_window()
try:
os.remove('vtg121.csv')
except OSError:
pass
driver.get('https://www.palmerbet.com/sports/soccer')
#SCROLL_PAUSE_TIME = 0.5
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#clickMe = wait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, ('//*[@id="TopPromotionBetNow"]'))))
#if driver.find_element_by_css_selector('#TopPromotionBetNow'):
#driver.find_element_by_css_selector('#TopPromotionBetNow').click()
#last_height = driver.execute_script("return document.body.scrollHeight")
#while True:
#driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
#time.sleep(SCROLL_PAUSE_TIME)
#new_height = driver.execute_script("return document.body.scrollHeight")
#if new_height == last_height:
#break
#last_height = new_height
time.sleep(1)
clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, ('//*[contains(@class,"filter_labe")]'))))
clickMe.click()
time.sleep(0)
clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'(//*[contains(@class,"filter_labe")])')))
options = driver.find_elements_by_xpath('//*[contains(@class,"filter_labe")]')
indexes = [index for index in range(len(options))]
shuffle(indexes)
for index in indexes:
time.sleep(0)
#driver.get('https://www.bet365.com.au/#/AS/B1/')
clickMe1 = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'(//ul[@id="tournaments"]//li//input)[%s]' % str(index + 1))))
clickMe1 = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'(//ul[@id="tournaments"]//li//input)[%s]' % str(index + 1))))
driver.find_element_by_tag_name('body').send_keys(Keys.UP)
driver.find_element_by_tag_name('body').send_keys(Keys.UP)
driver.find_element_by_tag_name('body').send_keys(Keys.UP)
driver.execute_script("return arguments[0].scrollIntoView();", clickMe1)
clickMe1.click()
time.sleep(0)
##tournaments > li > input
#//*[@id='tournaments']//li//input
# Team
#clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,("#mta_row td:nth-child(1)"))))
time.sleep(5)
langs3 = driver.find_elements_by_xpath('//*[@id="mta_row"]/td[1]')
langs3_text = []
for lang in langs3:
#print(lang.text)
langs3_text.append(lang.text)
time.sleep(0)
# Team ODDS
langs = driver.find_elements_by_css_selector("#mta_row .mpm_teams_cell_click:nth-child(2) .mpm_teams_bet_val")
langs_text = []
for lang in langs:
#print(lang.text)
langs_text.append(lang.text)
time.sleep(0)
# HREF
#langs2 = driver.find_elements_by_xpath("//ul[@class='runners']//li[1]")
#a[href*="/sports/soccer/"]
url1 = driver.current_url
#clickMe = wait(driver, 15).until(
#EC.element_to_be_clickable((By.CSS_SELECTOR, ('.match-pop-market a[href*="/sports/soccer/"]'))))
try:
clickMe = wait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, "//*[@class='match-pop-market']//a[contains(@href, '/sports/soccer/')]")))
clickMe1.click()
except TimeoutException:
print("No link was found")
elems = driver.find_elements_by_css_selector('.match-pop-market a[href*="/sports/soccer/"]')
elem_href = []
for elem in elems:
#print(elem.get_attribute("href"))
elem_href.append(elem.get_attribute("href"))
print(("NEW LINE BREAK"))
import sys
import io
with open('vtg121.csv', 'a', newline='', encoding="utf-8") as outfile:
writer = csv.writer(outfile)
for row in zip(langs3_text, langs_text, elem_href):
writer.writerow(row)
print(row)
答案 0 :(得分:1)
您可以使用以下内容获取团队名称。添加更多代码
from selenium import webdriver
import json
import time
driver = webdriver.Chrome()
driver.get("https://www.palmerbet.com/sports/soccer")
values = []
time.sleep(5)
for elem in driver.find_elements_by_css_selector("li.sport-grp-filter.filter_item input"):
val = elem.get_attribute("value")
values.append(val)
for val in values:
driver.get("https://www.palmerbet.com/sports/getroundmatches/socc/" + val)
json_data = driver.find_element_by_tag_name("pre").text
data = json.loads(json_data)
for item in data["m"]:
print (item["mta"]["cn"], item["mtb"]["cn"])