我想在此网站https://nhqrnet.ahrq.gov/inhqrdr/data/submit上提交所有可能组合的表格,并下载所有excel文件。我的代码成功运行了一次迭代,但是一旦到达第二次迭代,它就会返回下面提到的错误。
我的代码:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions
import time
browser = webdriver.Chrome(executable_path="C:/Users/IN027/chromedriver.exe")
browser.get('https://nhqrnet.ahrq.gov/inhqrdr/data/submit')
dropdown = Select(browser.find_element_by_name("stateName"))
list=[]
for element in browser.find_element_by_id('stateName').find_elements_by_tag_name('option'):
list.append(element.get_attribute('value'))
list=[z for z in list if z!='']
for i in list:
dropdown.select_by_value(i)
# time.sleep(3)
dropdown1 = Select(browser.find_element_by_name("subjectAreaId"))
list3=[]
for element1 in browser.find_element_by_id('subjectAreas').find_elements_by_tag_name('option'):
list3.append(element1.get_attribute('value'))
list3= [m for m in list3 if m!='']
for z in list3:
dropdown1.select_by_value(z)
time.sleep(2)
dropdown2 = Select(browser.find_element_by_name("topicId"))
a = []
for element2 in browser.find_element_by_id('topics').find_elements_by_tag_name('option'):
a.append(element2.get_attribute('value'))
a = [x for x in a if x!='']
for d in a:
time.sleep(2)
dropdown2.select_by_value(d)
dropdown3= Select(browser.find_element_by_name("subMeasureId"))
list2=[]
print('check1')
for element3 in browser.find_element_by_id('measures').find_elements_by_tag_name('option'):
#time.sleep(2)
print(element3.text)
# print(element3.get_attribute('value'))
time.sleep(2)
list2.append(element3.get_attribute('value'))
list2= [y for y in list2 if y!='']
print(len(list2))
for b in list2:
print('check2')
time.sleep(2)
dropdown3.select_by_value(b)
#browser.implicitly_wait(10)
#browser.find_element_by_xpath("//*[@id='filterByCategory']").click
time.sleep(0.5)
browser.find_elements_by_css_selector("input[type='radio'][value='byTotal']")[0].click()
time.sleep(0.5)
#browser.implicitly_wait(10)
#browser.find_element_by_xpath("//*[@id='query']/form/input[2]").click
browser.find_elements_by_css_selector("input[type='submit'][value='Get Data']")[0].click()
time.sleep(4)
browser.find_element_by_id('table').find_elements_by_tag_name('a')[0].click()
time.sleep(2)
browser.find_element_by_id('formTab').find_elements_by_tag_name('a')[0].click()
错误消息如下:
Traceback (most recent call last):
File "C:/Users/IN027/web_scraping/sel_scraper.py", line 60, in <module>
list2.append(element3.get_attribute('value'))
File "C:\Python36\lib\site-packages\selenium\webdriver\remote\webelement.py", line 143, in get_attribute
resp = self._execute(Command.GET_ELEMENT_ATTRIBUTE, {'name': name})
File "C:\Python36\lib\site-packages\selenium\webdriver\remote\webelement.py", line 633, in _execute
return self._parent.execute(command, params)
File "C:\Python36\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "C:\Python36\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=72.0.3626.109)
(Driver info: chromedriver=71.0.3578.137 (86ee722808adfe9e3c92e6e8ea746ade08423c7e),platform=Windows NT 10.0.16299 x86_64)