我正试图抓住这个网站" https://www.realtruck.com/clazzio-leather-seat-covers/"。它包含6个下拉菜单,我试图通过迭代来收集所有选项的年份,品牌,型号,子模型,SKU数据。基本上是一个巨大的巨型循环。前4个下降没有问题,但最后两个是棘手的。在每次点击时,我们会重新隐藏"重新隐藏"并且我的对象在第一次迭代后变得陈旧。以下包含我的代码:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import StaleElementReferenceException
import time
outFile = r'filepath.csv'
outHandler = open(outFile, 'w')
chrome_driver = r'\FMinerProjects\chromedriver.exe'
driver = webdriver.Chrome(chrome_driver)
driver.maximize_window()
driver.get("https://www.realtruck.com/clazzio-leather-seat-covers/")
driver.implicitly_wait(15)
driver.find_element_by_xpath("""//*[@id="newsletterPopupClosex"]""").click()
select_year = driver.find_element_by_xpath("""//[@id="d_ye"]/option[1]""").text
select_make = driver.find_element_by_xpath("""//*[@id="d_ma"]/option[1]""").text
select_model = driver.find_element_by_xpath("""//*[@id="d_mo"]/option[1]""").text
dropdowns = driver.find_element_by_xpath("""//*[@id="d_ye"]""")
dropoptions = dropdowns.find_elements_by_tag_name("option")
for values in dropoptions:
if values.text != select_year:
year = values.click()
time.sleep(3)
print values.text
makes = driver.find_element_by_xpath("""//*[@id="d_ma"]""")
make = makes.find_elements_by_tag_name("option")
for each in make:
if each.text != select_make:
each.click()
time.sleep(3)
print each.text
models = driver.find_element_by_xpath("""//*[@id="d_mo"]""")
model = models.find_elements_by_tag_name("option")
for sub in model:
if sub.text != select_model:
sub.click()
print sub.text
time.sleep(3)
bodies = driver.find_element_by_xpath("""//*[@id="d_bo"]""")
body = bodies.find_elements_by_tag_name("option")
for cab in body:
select_body = driver.find_element_by_xpath("""//*[@id="d_bo"]/option[1]""").text
if cab.text != select_body:
cab.click()
time.sleep(4)
urls = driver.current_url
link = '"' + urls + '"'
print link
driver.get(urls)
driver.implicitly_wait(5)
driver.find_element_by_xpath("""//*[@id="choose_options"]""").click()
time.sleep(2)
这是我的问题开始的地方。由于动态页面javascript在最后两次下拉列表每次点击后隐藏元素,以下元素在第二次迭代后变得陈旧
element = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]""")
driver.execute_script("arguments[0].setAttribute('style', 'display: block;');", element)
lines = element.find_elements_by_tag_name("li")
for levels in lines:
select_submodel = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]/ul/li[1]""").text
if levels.text != select_submodel:
levels.click()
time.sleep(5)
sku = driver.find_element_by_xpath("""//*[@id="group_189148"]/table/tbody/tr/td[2]/div[2]/div[5]/div[2]/a""").text
if sku:
print sku
element = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]""")
driver.execute_script("arguments[0].setAttribute('style', 'display: block;');", element)
lines = element.find_elements_by_tag_name("li")
continue
else:
element_2 = driver.find_element_by_xpath("""//*[@id="group_189148_d_a1"]/div[2]""")
driver.execute_script("arguments[0].setAttribute('style', 'display: block;');", element_2)
last = element_2.find_elements_by_tag_name("li")
for end in last:
select_color = driver.find_element_by_xpath("""//*[@id="group_189148_d_a1"]/div[2]/ul/li[1]""").text
if end.text != select_color:
end.click()
time.sleep(2)
crazy = driver.find_element_by_xpath("""//*[@id="attribute_splitter"]/div""").text
print crazy
sku = driver.find_elements_by_xpath("""//*[@id="group_189148"]/table/tbody/tr/td[2]/div[2]/div[5]/div[2]/a""")
price = driver.find_elements_by_xpath("""//*[@id="group_189148"]/table/tbody/tr/td[2]/div[2]/div[2]/div[2]/span""")
continue
我正在尝试从多个页面上的多个下拉列表中抓取数据。我的问题是我在第二次迭代后不断获得过时的元素引用异常,因为元素会重新隐藏。我假设第一页中的变量也会变得陈旧。请帮忙。
以下内容包含我收到的错误报告。它发生在第62行" element = driver.find_element_by_xpath(""" // * [@ id =" group_189148_d_an"] / div [2]&# 34;"")"因为每次点击后webelement都会被隐藏,因此持有它们的对象会变得陈旧。先前的错误报告报告了第72行" if levels.text!= select_submodel:"在第二次迭代后变得陈旧。我尝试在每次迭代之后取消隐藏元素,就像其他一些表格所暗示的那样,但这确实起作用。
Traceback (most recent call last):
File "C:\Python27\Lib\site-packages\pythonwin\pywin\framework\scriptutils.py", line 326, in RunScript
exec codeObject in __main__.__dict__
File "C:\Users\marketing-x1-carbon\Documents\August2017_Files\IncompleteWebscrappers\real_truck_oct10.py", line 62, in <module>
element = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]""")
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 365, in find_element_by_xpath
return self.find_element(by=By.XPATH, value=xpath)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 843, in find_element
'value': value})['value']
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 308, in execute
self.error_handler.check_response(response)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[@id="group_189148_d_an"]/div[2]"}