我尝试使用selenium自动化亚马逊/小说列表页面。它有时工作,有时不工作。我无法理解代码中的错误。它运行良好一段时间,并从20页滚动了13页。但从下一次,它不能正常工作。直到现在它还没有滚动完整的20页。
from selenium import webdriver
from time import sleep
from bs4 import BeautifulSoup
class App:
def __init__(self,path='F:\Imaging'):
self.path=path
self.driver = webdriver.Chrome('F:\chromedriver')
self.driver.get('https://www.amazon.in/s/ref=sr_pg_1?rh=i%3Aaps%2Ck%3Anovels&keywords=novels&ie=UTF8&qid=1510727563')
sleep(1)
self.scroll_down()
self.driver.close()
def scroll_down(self):
self.driver.execute_script("window.scrollTo(0,5500);")
sleep(1)
load_more = self.driver.find_element_by_xpath('//span[@class="pagnRA"]/a[@title="Next Page"]')
load_more.click()
sleep(2)
for value in range(2,19):
print(self.driver.current_url)
sleep(3)
self.driver.execute_script("window.scrollTo(0,5500);")
sleep(2)
load_more = self.driver.find_element_by_xpath('//span[@class="pagnRA"]/a[@title="Next Page"]')
load_more.click()
sleep(3)
if __name__=='__main__':
app=App()
我得到的这段代码的输出是:
C:\Users\Akhil\AppData\Local\Programs\Python\Python36-32\python.exe C:/Users/Akhil/Scrape/amazon.py
https://www.amazon.in/s/ref=sr_pg_2/257-8503487-3570721?rh=i%3Aaps%2Ck%3Anovels&page=2&keywords=novels&ie=UTF8&qid=1510744188
https://www.amazon.in/s/ref=sr_pg_3?rh=i%3Aaps%2Ck%3Anovels&page=3&keywords=novels&ie=UTF8&qid=1510744197
https://www.amazon.in/s/ref=sr_pg_4?rh=i%3Aaps%2Ck%3Anovels&page=4&keywords=novels&ie=UTF8&qid=1510744204
Traceback (most recent call last):
File "C:/Users/Akhil/Scrape/amazon.py", line 31, in <module>
app=App()
File "C:/Users/Akhil/Scrape/amazon.py", line 11, in __init__
self.scroll_down()
File "C:/Users/Akhil/Scrape/amazon.py", line 26, in scroll_down
load_more.click()
File "C:\Users\Akhil\AppData\Local\Programs\Python\Python36-32\lib\site-packages\selenium\webdriver\remote\webelement.py", line 80, in click
self._execute(Command.CLICK_ELEMENT)
File "C:\Users\Akhil\AppData\Local\Programs\Python\Python36-32\lib\site-packages\selenium\webdriver\remote\webelement.py", line 501, in _execute
return self._parent.execute(command, params)
File "C:\Users\Akhil\AppData\Local\Programs\Python\Python36-32\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 308, in execute
self.error_handler.check_response(response)
File "C:\Users\Akhil\AppData\Local\Programs\Python\Python36-32\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: Element <a title="Next Page" id="pagnNextLink" class="pagnNext" href="/gp/search/ref=sr_pg_5?rh=i%3Aaps%2Ck%3Anovels&page=5&keywords=novels&ie=UTF8&qid=1510744210">...</a> is not clickable at point (809, 8). Other element would receive the click: <a href="/gp/prime/ref=nav_prime_try_btn/257-8503487-3570721" class="nav-a nav-a-2" data-ux-mouseover="true" id="nav-link-prime" tabindex="26">...</a>
(Session info: chrome=62.0.3202.94)
(Driver info: chromedriver=2.33.506120 (e3e53437346286c0bc2d2dc9aa4915ba81d9023f),platform=Windows NT 10.0.15063 x86_64)
Process finished with exit code 1
如何解决这个问题?
答案 0 :(得分:0)
错误是没有Next Page元素可见或可点击。您可以等待此元素like this的存在,或将.click()
放在try / exception块中以检测它何时失败。
可能是你的目标合法地用完了下一页(你已经看过它们),或者页面仍在加载,或者下一个链接的格式已经改变。
答案 1 :(得分:0)
请尝试以下代码:
load_more = ui.WebDriverWait(driver, timeout).until(EC.element_to_be_clickable((By.XPATH, "//span[@class="pagnRA"]/a[@title="Next Page"]")))
driver.execute_script("arguments[0].scrollIntoView(true);", load_more)
load_more.click()
其中timeout - 等待元素可点击的时间(以秒为单位)。
此外,在脚本开头导入以下内容:
from selenium.webdriver.support import ui
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
答案 2 :(得分:0)
我最终使用@RatmirAsanov给出的答案的小修改得到了正确的结果。
请参阅此代码。这将滚动所有页面。
from selenium import webdriver
from time import sleep
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
class App:
def __init__(self,path='F:\Imaging'):
self.path=path
self.driver = webdriver.Chrome('F:\chromedriver')
self.driver.get('https://www.amazon.in/s/ref=sr_pg_1?rh=i%3Aaps%2Ck%3Anovels&keywords=novels&ie=UTF8&qid=1510727563')
sleep(1)
self.scroll_down()
self.driver.close()
def scroll_down(self):
sleep(3)
self.driver.execute_script("window.scrollTo(0,5450);")
sleep(3)
load_more = self.driver.find_element_by_xpath('//span[@class="pagnRA"]/a[@title="Next Page"]')
load_more.click()
sleep(3)
for value in range(2,19):
print(self.driver.current_url)
sleep(5)
self.driver.execute_script("window.scrollTo(0,5500);")
sleep(3)
load_more = WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//span[@class='pagnRA']/a[@title='Next Page']")))
self.driver.execute_script("arguments[0].click();", load_more)
#load_more.click()
sleep(3)
sleep(3)
if __name__=='__main__':
APP=App()