这更多的是我的理解(和抚慰我的挫败感)的问题,而不是关于如何解决它的问题,但正如该问题所指出的那样;为什么在Selenium上加载URL /页面(在我的情况下为Python)通常不加载并抛出NoSuchElementException错误?我了解与正常浏览一样,有时网页无法加载。但是我发现加载URL /页面的尝试中有25%-50%的操作在30秒的超时时间内不起作用,因此,在每次尝试尝试之前,随着每次尝试之间的超时时间的增加,我不得不重试10次。 URL /页面最终加载的实例。
如果您能帮助我理解,将不胜感激。
预先感谢您的解释。
示例代码
我目前正在尝试https://www.carsales.com.au
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import mysql.connector
import time
import datetime
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1920, 1080))
display.start()
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-setuid-sandbox")
driver = webdriver.Chrome(chrome_options=chrome_options)
con = mysql.connector.connect(*****)
cursor = con.cursor()
sql_user_searches = "****"
cursor.execute(sql_user_searches)
searches = cursor.fetchall()
for z in searches:
offset = 0
url = "https://www.carsales.com.au/cars/{0}/{1}/".format(z[2],z[4],offset)
sleep_time = 5
num_retries = 100
error = 0
for loopingcow in range(0, num_retries):
try:
error = 0
driver.get(url)
time.sleep(sleep_time)
driver.find_element_by_xpath("""//*[@class="result-set-container "]""").get_attribute("outerHTML")
print("success")
except NoSuchElementException:
print("error")
error = 1
pass
if error == 1:
time.sleep(sleep_time) # wait before trying to fetch the data again
sleep_time += 1 # Implement your backoff algorithm here i.e. exponential backoff
else:
break
total_pagination = driver.find_elements_by_xpath("""//div[@class="tabbed-pagination"]/div[@class="pagination-container"]/div[@class="pagination-container"]/div[@class="pagination"]/p""")[0].text
number_of_pages_split = total_pagination.split(" ")
number_of_pages = int(number_of_pages_split[1])
page = 0
while page < number_of_pages:
offset = page * 12
url = "https://www.carsales.com.au/cars/{0}/{1}/?offset={2}".format(z[2],z[4],offset)
print(url)
sleep_time = 5
num_retries = 100
error = 0
for loopyloop in range(0, num_retries):
try:
error = 0
driver.get(url)
time.sleep(sleep_time)
driver.find_element_by_xpath("""//*[@class="result-set-container "]""").get_attribute("outerHTML")
print("success")
except NoSuchElementException:
print("error")
error = 1
pass
if error == 1:
time.sleep(sleep_time) # wait before trying to fetch the data again
sleep_time += 1 # Implement your backoff algorithm here i.e. exponential backoff
else:
break
rows = driver.find_elements_by_xpath("""//div[contains(@class,"listing-item")]""")
count = len(rows)
i = 0
while i < count:
title = rows[i].find_elements_by_xpath("""//div[contains(@class,"title ")]/a/h2""")[i].text
i = i + 1
query = """****""".format(*****)
cursor.execute(query)
con.commit()
page = page + 1
cursor.close()
con.close()
driver.quit()
display.popen.kill()
print("success")
具有30秒超时的第二示例代码
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import mysql.connector
import time
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1920, 1080))
display.start()
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-setuid-sandbox")
driver = webdriver.Chrome(chrome_options=chrome_options)
date = int(time.strftime("%d"))
month = int(time.strftime("%m"))
con = mysql.connector.connect(*****)
cursor = con.cursor()
for z in range(11, 13):
if z == 9:
end_date = 31
elif z == 10:
end_date = 32
elif z == 11:
end_date = 31
elif z == 12:
end_date = 32
elif z == 8:
end_date = 32
start_date = 1
if z == month and (end_date - date) < 5:
start_date = end_date
elif z == (month + 1) and (end_date - date) < 5:
start_date = start_date + 4 - (end_date - date)
elif z > month:
start_date = 1
else:
start_date = date
print(z)
print(start_date)
print(end_date)
for x in range(start_date, end_date):
time.sleep(2)
x_url = str(x).zfill(2)
z_url = str(z).zfill(2)
date = x_url + "-" + z_url
url = "https://www.tiket.com/pesawat/cari?d=DPS&a=JKT&date=2017-{1}-{0}&adult=2&child=0&infant=0".format(x_url,z_url)
print(url)
driver.get(url)
time.sleep(30)
last_height = driver.execute_script("return document.body.scrollHeight")
print(last_height)
w = 0
while w < last_height:
print("Success")
w = last_height
try:
time.sleep(30)
print(driver.find_element_by_xpath("""//*[@id="tbody_depart"]""").get_attribute("outerHTML"))
rows = driver.find_elements_by_xpath("""//tr[contains(@id,"flight")]""")
for row in rows:
airline = row.get_attribute("data-airlinesname")
price = row.get_attribute("data-price")
departure = row.get_attribute("data-depart")
arrival = row.get_attribute("data-arrival")
baggage = row.get_attribute("data-baggage")
stops = row.get_attribute("data-stoptext")
query = """****""".format(******)
print(query)
cursor.execute(query)
con.commit()
except:
driver.get(url)
time.sleep(30)
print(driver.find_element_by_xpath("""//*[@id="tbody_depart"]""").get_attribute("outerHTML"))
rows = driver.find_elements_by_xpath("""//tr[contains(@id,"flight")]""")
for row in rows:
airline = row.get_attribute("data-airlinesname")
price = row.get_attribute("data-price")
departure = row.get_attribute("data-depart")
arrival = row.get_attribute("data-arrival")
baggage = row.get_attribute("data-baggage")
stops = row.get_attribute("data-stoptext")
query = """*****""".format(*****)
print(query)
cursor.execute(query)
con.commit()
cursor.close()
con.close()
driver.close()
display.popen.kill()