所以,经过大量的尝试,我放弃了搜索和研究。
我有一个网页,所有员工的姓名,电话,邮件和用户ID都可以查询。你这样做的方式是对服务器的请求至少需要4个数字,所有26个ascll字符+ 0-9个数字。我能用Python中的Selenium做到这一点......但它需要20天才能完成 - 请参阅代码。
from selenium import webdriver
import csv
alphanum = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '1',
'2', '3', '4', '5', '6', '7', '8', '9', '0']
driver = webdriver.Firefox()
driver.get('http://brnet.intra.corpintra.net/Quem/pessoas2/Default.asp')
list_base = dict()
i = 0
data_str = []
found = False
for first_chr in alphanum:
for second_chr in alphanum:
for third_chr in alphanum:
text = first_chr + second_chr + third_chr
element_name = driver.find_element_by_name('nome').clear()
element_name = driver.find_element_by_name('nome')
element_name.send_keys(text)
element_search = driver.find_element_by_name('B1')
element_search.click()
if driver.find_elements_by_class_name('dados'):
for table_data in driver.find_elements_by_class_name('dados'):
cells_table = table_data.find_elements_by_tag_name('td')
for cell_data in cells_table:
data_str.append(cell_data.text.strip())
if list_base:
for key, value in list_base.items():
for data in data_str:
if data in value:
found = False
else:
found = True
else:
found = False
if found is False:
list_base[i] = data_str
i = i+1
data_str = []
found = False
driver.back()
w = csv.writer(open("output.csv", "w"))
for key, value in list_base.items():
w.writerow([key, value])
driver.quit()
有没有办法减少时间?