我想使用Python3和Selenium在公共信息站点上自动进行搜索。在此站点中,有必要输入一个人的名字,然后选择为该名字选择的拼写(不带重音符号或名称变体),访问包含找到的诉讼列表的页面,并且在此列表中您可以访问该页面每种情况。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.keys import Keys
import time
import re
将要搜索的名称
name = 'JOSE ROBERTO ARRUDA'
创建路径,搜索开始链接和空列表以存储信息
firefoxPath="/home/abraji/Documentos/Code/geckodriver"
link = 'https://ww2.stj.jus.br/processo/pesquisa/?aplicacao=processos.ea'
processos = []
致电司机并进入第一个搜索页面
driver = webdriver.Firefox(executable_path=firefoxPath)
driver.get(link)
定位光标,填充并单击
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idParteNome'))).click()
time.sleep(1)
driver.find_element_by_xpath('//*[@id="idParteNome"]').send_keys(name)
time.sleep(6)
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoPesquisarFormularioExtendido'))).click()
标记所有拼写搜索的可能性
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoMarcarTodos'))).click()
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoPesquisarMarcados'))).click()
time.sleep(1)
检查有多少页数据-在“用于范围”中使用
capta = driver.find_element_by_xpath('//*[@id="idDivBlocoPaginacaoTopo"]/div/span/span[2]').text
print(capta)
paginas = int(re.search(r'\d+', capta).group(0))
paginas = int(paginas) + 1
print(paginas)
捕获例程
for acumula in range(1, paginas):
# Fill the field with the page number and press enter
driver.find_element_by_xpath('//*[@id="idDivBlocoPaginacaoTopo"]/div/span/span[2]/input').send_keys(acumula)
driver.find_element_by_xpath('//*[@id="idDivBlocoPaginacaoTopo"]/div/span/span[2]/input').send_keys(Keys.RETURN)
time.sleep(2)
# Captures the number of processes found on the current page - qt
qt = driver.find_element_by_xpath('//*[@id="idDivBlocoMensagem"]/div/b').text
qt = int(qt) + 2
print(qt)
# Iterate from found number of processes
for item in range(2, qt):
# Find the XPATH of each process link - start at number 2
vez = '//*[@id="idBlocoInternoLinhasProcesso"]/div[' + str(item) + ']/span[1]/span[1]/span[1]/span[2]/a'
# Access the direct link and click
time.sleep(2)
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, vez))).click()
time.sleep(1)
# Run tests to get data
try:
num_unico = driver.find_element_by_xpath('//*[@id="idProcessoDetalhesBloco1"]/div[6]/span[2]/a').text
except NoSuchElementException:
num_unico = "sem_numero_unico"
try:
nome_proc = driver.find_element_by_xpath('//*[@id="idSpanClasseDescricao"]').text
except NoSuchElementException:
nome_proc = "sem_nome_encontrado"
try:
data_autu = driver.find_element_by_xpath('//*[@id="idProcessoDetalhesBloco1"]/div[5]/span[2]').text
except NoSuchElementException:
data_autu = "sem_data_encontrada"
# Fills dictionary and list
dicionario = {"num_unico": num_unico,
"nome_proc": nome_proc,
"data_autu": data_autu
}
processos.append(dicionario)
# Return a page to click on next process
driver.execute_script("window.history.go(-1)")
# Close driver
driver.quit()
在点击了每个进程的信息的直接链接约30次后,我遇到了这个错误:
---------------------------------------------------------------------------
TimeoutException Traceback (most recent call last)
<ipython-input-10-a901a514bd82> in <module>
16
17 time.sleep(2)
---> 18 WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, vez))).click()
19 time.sleep(1)
20
~/Documentos/Code/publique_se/lib/python3.6/site-packages/selenium/webdriver/support/wait.py in until(self, method, message)
78 if time.time() > end_time:
79 break
---> 80 raise TimeoutException(message, screen, stacktrace)
81
82 def until_not(self, method, message=''):
TimeoutException: Message:
显然网站变慢了,脚本显示错误,因为找不到信息,对吧?
请问,在硒遍历多页的网站上,有没有办法避免这种错误?
网站按顺序感知点击量时,网站本身会放慢速度吗?
答案 0 :(得分:2)
您正在使用线程等待,这不是一个好方法,并且在获取时会导致错误
您正在使用time.sleep(2)
而不是使用硒的显式等待
示例:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
element = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, "myXpath")))
element.click();
注意:您需要将时间从20
更改为根据应用程序观察到的时间