硒加速多处理

时间:2019-03-18 17:42:05

标签: python selenium multiprocessing

我正在使用Selenium从同一域中的多个网页打印一些项目,但是我认为我没有正确实现多重处理,因为没有它,它以大约相同的速度运行。感谢您帮助加快以下代码的速度:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from multiprocessing import Process

chromedriver = 'C:\\Users\\cookie2\\Downloads\\chromedriver_win32\\chromedriver.exe'

options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('window-size=1200x600')
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")

browser = webdriver.Chrome(executable_path=chromedriver, chrome_options=options)
numbers = list(range(1, 187722))
def capture(x):

    for x in numbers:
        j=str(x)
        browser.get('http://database.globalreporting.org/organizations/'+j)
        try:
            company = WebDriverWait(browser, 1).until(EC.visibility_of_all_elements_located((By.XPATH,"//body/div/section/div/div/div/h1")))
            country = browser.find_elements_by_xpath("//body/div/section/div[2]/div/ul/li[5]")
            ticker = browser.find_elements_by_xpath("//body/div/section/div[2]/div/ul/li[10]")

        except Exception:
            continue

        print(company[0].text)
        print('http://database.globalreporting.org/organizations/' + j)
        print(country[0].text)
        print(ticker[0].text)

if __name__ == '__main__':

    p1 = Process(target=capture, args=('x',))
    p1.start()
    p1.join()

0 个答案:

没有答案