我正在开发一个Web抓取程序,遇到了一些问题。
并引发MaxRetry错误:
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='127.0.0.1', port=58826): Max retries exceeded with url: /session/c6ef075beb50a6e44c0bb16555679dcd/window/current/size (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x10b9849b0>: Failed to establish a new connection: [Errno 61] Connection refused'))
为此,我在Github上对其进行了搜索,但是在我的程序中却无法使用。 https://github.com/timgrossmann/InstaPy/issues/3311
from apscheduler.schedulers.blocking import BlockingScheduler
import random
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from random_user_agent.user_agent import UserAgent
from random_user_agent.params import HardwareType, OperatingSystem
def randomUserAgent():
operating_systems = [OperatingSystem.IOS.value, OperatingSystem.ANDROID.value]
hardware_types = [HardwareType.MOBILE.value]
user_agent_rotator = UserAgent(operating_systems=operating_systems,hardware_types=hardware_types)
ua = user_agent_rotator.get_random_user_agent()
return ua
def mainBody(times):
for i in range(times):
n = 0
randomUrl = 'https://www.thermofisher.com/'
driver.set_window_size(375, 677)
driver.get(randomUrl)
driver.delete_all_cookies()
driver.quit()
if __name__=='__main__':
Times = [5]
Timer = ['2019-5-6 23:06:30']
ua = randomUserAgent()
opts = Options()
opts.add_argument(ua)
driver = webdriver.Chrome('/chromedriver',options=opts)
for i in range(10):
scheduler = BlockingScheduler()
times = int(Times[i])
scheduler.add_job(mainBody, 'date', run_date=Timer[i], args=[times])
scheduler.start()
答案 0 :(得分:0)
问题似乎出在驱动程序尝试共享会话上。让每个循环创建自己的驱动程序可以为我解决错误。
from apscheduler.schedulers.blocking import BlockingScheduler
import random
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from random_user_agent.user_agent import UserAgent
from random_user_agent.params import HardwareType, OperatingSystem
def randomUserAgent():
operating_systems = [OperatingSystem.IOS.value, OperatingSystem.ANDROID.value]
hardware_types = [HardwareType.MOBILE.value]
user_agent_rotator = UserAgent(operating_systems=operating_systems,hardware_types=hardware_types)
ua = user_agent_rotator.get_random_user_agent()
return ua
def mainBody(times):
for i in range(times):
n = 0
randomUrl = 'https://www.thermofisher.com/'
driver = webdriver.Chrome('/chromedriver',options=opts)
driver.set_window_size(375, 677)
driver.get(randomUrl)
driver.delete_all_cookies()
driver.quit()
if __name__=='__main__':
Times = [5]
Timer = ['2019-5-6 23:06:30']
ua = randomUserAgent()
opts = Options()
opts.add_argument(ua)
for i in range(10):
scheduler = BlockingScheduler()
times = int(Times[i])
scheduler.add_job(mainBody, 'date', run_date=Timer[i], args=[times])
scheduler.start()