您好我正在尝试从以下页面中删除一些信息: http://verify.sos.ga.gov/verification/
我的代码如下:
import sys
reload(sys)
sys.setdefaultencoding('utf8')
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import time
import csv
url = 'http://verify.sos.ga.gov/verification/'
def init_Selenium():
global driver
driver = webdriver.Chrome("/Users/rodrigopeniche/Downloads/chromedriver")
driver.get(url)
def select_profession():
select = Select(driver.find_element_by_name('t_web_lookup__profession_name'))
options = select.options
for index in range(1, len(options) - 1):
select = Select(driver.find_element_by_name('t_web_lookup__profession_name'))
select.select_by_index(index)
select_license_type()
def select_license_type():
select = Select(driver.find_element_by_name('t_web_lookup__license_type_name'))
options = select.options
for index in range(1, len(options) - 1):
select = Select(driver.find_element_by_name('t_web_lookup__license_type_name'))
select.select_by_index(index)
search_button = driver.find_element_by_id('sch_button')
driver.execute_script('arguments[0].click();', search_button)
scrap_licenses_results()
def scrap_licenses_results():
table_rows = driver.find_elements_by_tag_name('tr')
for index, row in enumerate(table_rows):
if index < 9:
continue
else:
attributes = row.find_elements_by_xpath('td')
try:
name = attributes[0].text
license_number = attributes[1].text
profession = attributes[2].text
license_type = attributes[3].text
status = attributes[4].text
address = attributes[5].text
license_details_page_link = attributes[0].find_element_by_id('datagrid_results__ctl3_name').get_attribute('href')
driver.get(license_details_page_link)
data_rows = driver.find_elements_by_class_name('rdata')
issued_date = data_rows[len(data_rows) - 3].text
expiration_date = data_rows[len(data_rows) - 2].text
last_renewal_day = data_rows[len(data_rows) - 1].text
print name, license_number, profession, license_type, status, address, issued_date, expiration_date, last_renewal_day
driver.back()
except:
pass
init_Selenium()
select_profession()
当我执行脚本时,它适用于第一次迭代,但在第二次迭代中失败。引发错误的确切位置在scrap_licenses_results()函数的attributes = row.find_elements_by_xpath('td')
行中。
任何帮助将不胜感激
答案 0 :(得分:1)
staleElementReferenceException是由循环迭代之前收集的行列表引起的。最初,您创建了一个名为table_rows的所有行的列表。
table_rows = driver.find_elements_by_tag_name('tr')
现在处于循环中,在第一次迭代期间,您的第一个行元素是新鲜的,并且可以由驱动程序找到。在第一次迭代结束时,您正在执行driver.back()
,您的页面更改/刷新HTML DOM。以前收集的所有引用现在都丢失了。 table_rows列表中的所有行现在都是陈旧的。因此,在第二次迭代中,您将面临此类异常。
您必须在循环中移动查找行操作,以便每次在目标应用程序上找到新的引用时。伪造的代码应该做这样的事情。
total_rows = driver.find_elements_by_tag_name('tr').length()
for i in total_rows
driver.find_element_by_xpath('//tr[i]')
.. further code..