我目前正在使用Selenium进行简单的刮擦作业,但是遇到了问题。该代码在执行的随机点抛出StaleElementReferenceException,同时以其他方式返回良好的数据(但从未完全完成)
我一直在尝试硒的各种等待,但无济于事。
如何改善urlList上的密钥循环?
代码:
import time
from selenium import webdriver
# Specifying incognito mode as you launch your browser[OPTIONAL]
option = webdriver.ChromeOptions()
option.add_argument('window-size=1600,1200')
option.add_argument("--headless")
# Create new Instance of Chrome in incognito mode
browser = webdriver.Chrome(executable_path='/Users/mikael/Documents/StockScraper/scraper/chromedriver', chrome_options=option)
browser.implicitly_wait(2)
# Inital login
browser.get("https://www.tradingview.com/chart/LW1VvtHB/")
browser.find_element_by_xpath(".//div[contains(@class,'menu')]/div[contains(@class,'button')]").click()
browser.find_element_by_xpath(".//div[contains(@class,'menuBox')]/div[2]").click()
browser.find_element_by_xpath('.//*[@id="signin-form"]/div[1]/div[1]/input').send_keys('*****')
browser.find_element_by_xpath('//*[@id="signin-form"]/div[2]/div[1]/div[1]/input').send_keys('*****')
browser.find_element_by_xpath('//*[@id="signin-form"]/div[3]/div[2]/button').click()
time.sleep(2)
# Populate URL-list
urlList = []
RowCount = 0
with open('urls.txt', 'r') as f:
for i in f:
urlList.append(i)
# print(', '.join(urlList))
data = []
# Check and append
print('Name' + ' | ' + 'Ticker' + ' | ' + 'Latest Base' + ' | ' + 'Latest Close' + ' | ' + 'Base Broken')
start_time = time.time()
for i in (urlList):
baseBroken = 'No'
browser.get(i)
time.sleep(2)
if browser.find_elements_by_xpath(".//div[contains(@class,'wizard-tooltip-btn skip')]"):
browser.find_element_by_xpath(".//div[contains(@class,'wizard-tooltip-btn skip')]").click()
title_element = browser.find_element_by_class_name('pane-legend-title__description')
if browser.find_elements_by_xpath(".//div[contains(@class,'widgetbar-pages hidden')]"):
browser.find_element_by_xpath(".//div[@data-name='data-window']").click()
ticker = browser.find_element_by_class_name('chart-data-window-header')
tickerClean = ticker.text.split(',')[0]
latestClose = browser.find_element_by_xpath('/html/body/div[1]/div[3]/div/div[1]/div[1]/div[3]/div/div[2]/div[1]/div[1]/div[2]/div[4]/div[2]')
latestBase = browser.find_element_by_xpath('/html/body/div[1]/div[3]/div/div[1]/div[1]/div[3]/div/div[2]/div[1]/div[3]/div[2]/div[9]/div[2]/span')
latestCloseValue = float(latestClose.text.strip())
latestBaseValue = float(latestBase.text.strip())
if latestCloseValue <= latestBaseValue:
baseBroken = 'Yes'
data.append(title_element.text + ',' + tickerClean + ',' + format(latestBaseValue) + ',' + format(latestCloseValue) + ',' + baseBroken)
print(title_element.text + ' | ' + tickerClean + ' | ' + format(latestBaseValue) + ' | ' + format(latestCloseValue) + ' | ' + baseBroken)
elapsed_time = time.time() - start_time
print(elapsed_time)
# Save to file
filename = 'data.txt'
fileout = open(filename, 'w')
for item in data:
fileout.write("%s\n" % item)
fileout.close()
print('Done!')
# Quit Selenium
browser.close()
browser.quit()
错误示例:
Iridonia:StockScraper mikael$ python3 scraper3.py
Name | Ticker | Latest Base | Latest Close | Base Broken
A1M PHARMA AB | A1M | 4.445 | 4.77 | No
AAK AB | AAK | 145.66 | 147.9 | No
ABB LTD | ABB | 203.4 | 214.8 | No
ACADEMEDIA AB | ACAD | 49.05 | 49.7 | No
ACANDO AB SER. B | ACAN_B | 32.25 | 36.4 | No
ACCONEER AB | ACCON | 23.6 | 24.0 | No
ACTIVE BIOTECH AB | ACTI | 3.89 | 3.78 | Yes
ADDNODE GROUP AB SER. B | ANOD_B | 100.0 | 102.0 | No
ADDTECH AB SER. B | ADDT_B | 201.0 | 202.0 | No
Traceback (most recent call last):
File "scraper3.py", line 65, in <module>
data.append(title_element.text + ',' + tickerClean + ',' + format(latestBaseValue) + ',' + format(latestCloseValue) + ',' + baseBroken)
File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 76, in text
return self._execute(Command.GET_ELEMENT_TEXT)['value']
File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 628, in _execute
return self._parent.execute(command, params)
File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 320, in execute
self.error_handler.check_response(response)
File "/usr/local/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=68.0.3440.106)
(Driver info: chromedriver=2.41.578706 (5f725d1b4f0a4acbf5259df887244095596231db),platform=Mac OS X 10.12.6 x86_64)
要加载的示例网址:
https://www.tradingview.com/chart/?symbol=OMXSTO:A1M
https://www.tradingview.com/chart/?symbol=OMXSTO:AAK
https://www.tradingview.com/chart/?symbol=OMXSTO:ABB
https://www.tradingview.com/chart/?symbol=OMXSTO:ACAD
https://www.tradingview.com/chart/?symbol=OMXSTO:ACAN_B
https://www.tradingview.com/chart/?symbol=OMXSTO:ACCON
答案 0 :(得分:0)
最重要的改进将是学习如何找到简单稳定的定位器。
示例:/html/body/div[1]/div[3]/div/div[1]/div[1]/div[3]/div/div[2]/div[1]/div[1]/div[2]/div[4]/div[2]
可以是.box.with-actions:nth-child(1) .chart-data-window-item span
CSS选择器。
此处已改进但代码被截断,请在其中添加必需的部分:
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
browser.implicitly_wait(10)
browser.find_element_by_css_selector('div[class*="menu"] > div[class*="button"]').click()
browser.find_element_by_xpath("//div[.='Sign In']").click()
browser.find_element_by_css_selector('form#signin-form input[name="username"]').send_keys('*****')
browser.find_element_by_css_selector('form#signin-form input[name="password"]').send_keys('*****')
browser.find_element_by_css_selector('form#signin-form button[type="submit"]').click()
for url in (urls):
browser.get(url)
try:
browser.find_element_by_css_selector('div.wizard-tooltip-btn.skip"').click()
except:
pass
dataWindow = browser.find_element_by_css_selector('div[data-name="data-window"]')
if "isActive" not in dataWindow.get_attribute("class"):
dataWindow.click();
latestClose = WebDriverWait(driver, timeout=10, poll_frequency=0.01, (StaleElementReferenceException))
.until(EC.visibility_of_element_located(By.CSS_SELECTOR, '.box.with-actions:nth-child(1) .chart-data-window-item span'))
.text.strip()
print(latestClose)