无法建立新的连接错误

时间:2019-05-06 16:11:48

标签: python selenium web-scraping urllib apscheduler

我正在开发一个Web抓取程序,遇到了一些问题。

并引发MaxRetry错误:

raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='127.0.0.1', port=58826): Max retries exceeded with url: /session/c6ef075beb50a6e44c0bb16555679dcd/window/current/size (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x10b9849b0>: Failed to establish a new connection: [Errno 61] Connection refused'))

为此,我在Github上对其进行了搜索,但是在我的程序中却无法使用。 https://github.com/timgrossmann/InstaPy/issues/3311

from apscheduler.schedulers.blocking import BlockingScheduler
import random
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from random_user_agent.user_agent import UserAgent
from random_user_agent.params import HardwareType, OperatingSystem

def randomUserAgent():
    operating_systems = [OperatingSystem.IOS.value, OperatingSystem.ANDROID.value]
    hardware_types = [HardwareType.MOBILE.value]
    user_agent_rotator = UserAgent(operating_systems=operating_systems,hardware_types=hardware_types)
    ua = user_agent_rotator.get_random_user_agent()
    return ua

def mainBody(times):
    for i in range(times):
        n = 0
        randomUrl = 'https://www.thermofisher.com/'
        driver.set_window_size(375, 677)
        driver.get(randomUrl)
        driver.delete_all_cookies()
        driver.quit()


if __name__=='__main__':
    Times = [5]
    Timer = ['2019-5-6 23:06:30']
    ua = randomUserAgent()
    opts = Options()
    opts.add_argument(ua)
    driver = webdriver.Chrome('/chromedriver',options=opts)
    for i in range(10):
        scheduler = BlockingScheduler()
        times = int(Times[i])
        scheduler.add_job(mainBody, 'date', run_date=Timer[i], args=[times])
        scheduler.start()

1 个答案:

答案 0 :(得分:0)

问题似乎出在驱动程序尝试共享会话上。让每个循环创建自己的驱动程序可以为我解决错误。

from apscheduler.schedulers.blocking import BlockingScheduler
import random
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from random_user_agent.user_agent import UserAgent
from random_user_agent.params import HardwareType, OperatingSystem

def randomUserAgent():
    operating_systems = [OperatingSystem.IOS.value, OperatingSystem.ANDROID.value]
    hardware_types = [HardwareType.MOBILE.value]
    user_agent_rotator = UserAgent(operating_systems=operating_systems,hardware_types=hardware_types)
    ua = user_agent_rotator.get_random_user_agent()
    return ua

def mainBody(times):
    for i in range(times):
        n = 0
        randomUrl = 'https://www.thermofisher.com/'
        driver = webdriver.Chrome('/chromedriver',options=opts)
        driver.set_window_size(375, 677)
        driver.get(randomUrl)
        driver.delete_all_cookies()
        driver.quit()


if __name__=='__main__':
    Times = [5]
    Timer = ['2019-5-6 23:06:30']
    ua = randomUserAgent()
    opts = Options()
    opts.add_argument(ua)
    for i in range(10):
        scheduler = BlockingScheduler()
        times = int(Times[i])
        scheduler.add_job(mainBody, 'date', run_date=Timer[i], args=[times])
        scheduler.start()