我试图以递归方式访问一系列网页。访问一页然后返回然后转到下一页,依此类推。使用我的代码,我可以访问第一页然后返回上一页,但我无法访问下一页并获得陈旧的元素引用错误。我仔细阅读了有关此错误的答案,但仍然没有得到我的代码更改状态...
以下是我的代码
"""Get the browser (a "driver")."""
# find the path with 'which chromedriver'
path_to_chromedriver = ('/Users/xxxx/Desktop/chromedriver')
browser = webdriver.Chrome(executable_path=path_to_chromedriver)
url = "http://www.presidency.ucsb.edu/index_docs.php"
browser.get(url)
#time.sleep(600)
# 2008 Presidential Election
pe_2008 = browser.find_element_by_xpath(
"/html/body/table/tbody/tr[2]/td/table/tbody/tr/td[2]/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/ul/li[22]/ul/li[5]/a")
# 2012 Presidential Election
pe_2012 = browser.find_element_by_xpath(
"/html/body/table/tbody/tr[2]/td/table/tbody/tr/td[2]/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/ul/li[22]/ul/li[6]/a")
# 2016 Presidential Election
pe_2016 = browser.find_element_by_xpath("/html/body/table/tbody/tr[2]/td/table/tbody/tr/td[2]/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/ul/li[22]/ul/li[7]/a")
pe_2016.click()
links = browser.find_elements_by_partial_link_text('campaign')
for i in range(len(links)):
links[i].click()
time.sleep(5)
elems = browser.find_elements_by_xpath("//a[@href]")
linkls = []
for elem in elems:
address = elem.get_attribute("href")
if 'http://www.presidency.ucsb.edu/ws/' in address:
linkls.append(elem)
try:
k = len(linkls)
for j in range(k):
linkls[j].click()
time.sleep(5)
browser.back()
time.sleep(5)
except Exception as e:
print(e)
Video Transcript: Presidential Exploratory Committee Announcement
Message: stale element reference: element is not attached to the page document
(Session info: chrome=63.0.3239.132)
(Driver info: chromedriver=2.35.528157 (4429ca2590d6988c0745c24c8858745aaaec01ef),platform=Mac OS X 10.12.6 x86_64)
---------------------------------------------------------------------------
StaleElementReferenceException Traceback (most recent call last)
<ipython-input-255-edb48028a541> in <module>()
1 for i in range(len(links)):
----> 2 links[i].click()
3 time.sleep(5)
4 elems = browser.find_elements_by_xpath("//a[@href]")
5 linkls = []
/Users/misun/anaconda/lib/python3.5/site- packages/selenium/webdriver/remote/webelement.py in click(self)
78 def click(self):
79 """Clicks the element."""
---> 80 self._execute(Command.CLICK_ELEMENT)
81
82 def submit(self):
/Users/misun/anaconda/lib/python3.5/site- packages/selenium/webdriver/remote/webelement.py in _execute(self, command, params)
626 params = {}
627 params['id'] = self._id
--> 628 return self._parent.execute(command, params)
629
630 def find_element(self, by=By.ID, value=None):
/Users/misun/anaconda/lib/python3.5/site- packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
310 response = self.command_executor.execute(driver_command, params)
311 if response:
--> 312 self.error_handler.check_response(response)
313 response['value'] = self._unwrap_value(
314 response.get('value', None))
/Users/misun/anaconda/lib/python3.5/site- packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
235 elif exception_class == UnexpectedAlertPresentException and 'alert' in value:
236 raise exception_class(message, screen, stacktrace, value['alert'].get('text'))
--> 237 raise exception_class(message, screen, stacktrace)
238
239 def _value_or_default(self, obj, key, default):
StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=63.0.3239.132)
(Driver info: chromedriver=2.35.528157 (4429ca2590d6988c0745c24c8858745aaaec01ef),platform=Mac OS X 10.12.6 x86_64)
答案 0 :(得分:0)
当您转到新页面时,上一页中的所有元素都将被销毁/变为陈旧。每次返回起始页面时都必须重新获取元素列表。
最好的解决方案可能是找到所有元素,而迭代它们只是为了取出URL。然后,您可以迭代url列表而不是元素列表。
答案 1 :(得分:-1)
1)在another post中查看我的答案,以帮助您更好地理解StaleReferenceException的原因。
2)我对您的代码进行了一些更改,您可以尝试一下:
# 2012 Presidential Election
pe_2012 = browser.find_element_by_link_text("2012 Election")
# 2016 Presidential Election
pe_2016 = browser.find_element_by_link_text("2016 Election")
pe_2016.click()
candidates = browser.find_element_by_link_text("campaign speeches")
for i in range(len(candidates)):
candidates[i].click()
time.sleep(5)
speeches = browser.find_elements_by_css_selector("td.listdate > a")
speeche_count = len(linkls)
// Begin click each speech link of one candidate
try:
for j in range(speeche_count):
// click on one speeche link
speeches[j].click()
time.sleep(5)
// back to previous page
browser.back()
time.sleep(5)
// find speeches again to avoid StaleReferenceException
speeches = browser.find_elements_by_css_selector("td.listdate > a")
except Exception as e:
print(e)
// End click each speech link of one candidate
// back to candidates list page to enter next candidate speech list page
browser.back()
time.sleep(5)
// find candidates again to avoid StaleReferenceException
candidates = browser.find_element_by_link_text("campaign speeches")