我正试图从TripAdvisor搜索酒店的评论并将其写在文本文件中。到目前为止,代码表现良好,除了事实上它偶尔会在我编写文本文件的行上抛出StaleElementException。这是我的代码:
for num in range(page_count):
try:
if num != 0:
try:
nxt = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.nav.next.rndBtn.ui_button.primary.taLnk")))
#nxt = driver.find_element_by_css_selector("a.nav.next.rndBtn.ui_button.primary.taLnk")
nxt.click()
driver.implicitly_wait(5)
except NoSuchElementException:
driver.refresh()
#driver.implicitly_wait(5)
nxt = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.nav.next.rndBtn.ui_button.primary.taLnk")))
nxt.click()
driver.implicitly_wait(5)
try:
more = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "span.taLnk.ulBlueLinks")))
#more = driver.find_element_by_css_selector("span.taLnk.ulBlueLinks")
more.click()
time.sleep(1)
except TimeoutException:
print("There is no 'more' button on page %d" % (num+1))
except WebDriverException:
nxt = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "span.taLnk.ulBlueLinks")))
nxt.click()
driver.implicitly_wait(5)
review_result = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'entry')))
with open('New_Review.txt', 'a') as fid:
for review in review_result:
fid.write(unidecode(review.text))
fid.write(sep)
fid.write(line_break)
print ("processing done till page number %d" % (num+1))
except StaleElementReferenceException:
driver.refresh()
driver.implicitly_wait(5)
try:
more = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "span.taLnk.ulBlueLinks")))
#more = driver.find_element_by_css_selector("span.taLnk.ulBlueLinks")
more.click()
except TimeoutException:
print("There is no 'more' button on page %d" % (num+1))
except WebDriverException:
nxt = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "span.taLnk.ulBlueLinks")))
nxt.click()
driver.implicitly_wait(5)
review_result = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'entry')))
#print (review_result[1].text)
with open('New_Review.csv', 'a') as fid:
writer = csv.writer(fid, delimiter = ',', line_break = '\n')
for review in review_result:
fid.write(unidecode(review.text))
fid.write(sep)
fid.write(line_break)
print ("processing done till page number %d" % (num+1))
这是错误:
StaleElementReferenceException:陈旧元素引用:元素是 未附加到页面文档
追溯给出了这一行:
fid.write(unidecode(review.text))
我已经尝试处理异常,但它不适合我,我很难弄清楚到底哪里错了。任何帮助表示赞赏!
答案 0 :(得分:0)
尝试创建辅助方法,例如
def get_text(locator):
staled = True
while staled:
try:
return WebDriverWait(driver, 10).until(EC.presence_of_element(locator)).text
except StaleElementReferenceException:
`log something or limit retry to certain times`
然后改变你获得文本的方式
review_result = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located())
num_of_review - review_result.length
with open('New_Review.txt', 'a') as fid:
for index in range(1, num_of_review):
review_text = get_text((By.XPATH, "//*[@class='entry'][index]"))
fid.write(unidecode(review_text))
fid.write(sep)
fid.write(line_break)