我正在使用Selenium从同一域中的多个网页打印一些项目,但是我认为我没有正确实现多重处理,因为没有它,它以大约相同的速度运行。感谢您帮助加快以下代码的速度:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from multiprocessing import Process
chromedriver = 'C:\\Users\\cookie2\\Downloads\\chromedriver_win32\\chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('window-size=1200x600')
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
browser = webdriver.Chrome(executable_path=chromedriver, chrome_options=options)
numbers = list(range(1, 187722))
def capture(x):
for x in numbers:
j=str(x)
browser.get('http://database.globalreporting.org/organizations/'+j)
try:
company = WebDriverWait(browser, 1).until(EC.visibility_of_all_elements_located((By.XPATH,"//body/div/section/div/div/div/h1")))
country = browser.find_elements_by_xpath("//body/div/section/div[2]/div/ul/li[5]")
ticker = browser.find_elements_by_xpath("//body/div/section/div[2]/div/ul/li[10]")
except Exception:
continue
print(company[0].text)
print('http://database.globalreporting.org/organizations/' + j)
print(country[0].text)
print(ticker[0].text)
if __name__ == '__main__':
p1 = Process(target=capture, args=('x',))
p1.start()
p1.join()