尝试了不同的东西之后,我尝试了grequests,我似乎一直遇到错误
HTTPSConnectionPool(host='', port=443): Max retries exceeded with url: someurl (Caused by SSLError(SSLError("bad handshake: SysCallError(104, 'ECONNRESET')")))
import grequests
import json
import time
import random
def generate_random_proxy():
proxies = {
'http':'http://zone-static-session-0.622093796:@zproxy.lum-superproxy.io:22225',
# 'https':super_proxy_url
}
return proxies
class Test:
def __init__(self):
self.test = ''
def exception(self, request, exception):
print ("Problem: {}: {}".format(request.url, exception))
def do_something(self, response, *args, **kwargs):
print("each response", response)
def main(self):
unsetSentRequest = []
lastNamesWithProxy = json.loads(open('lastNamesWithProxy.json').read())[:50]
for payload in lastNamesWithProxy:
headers = {
'Connection': 'keep-alive',
'Accept': '*/*',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Accept-Language': 'en-US,en;q=0.9',
'Content-Type': 'application/json',
}
params = (
('Name', payload['lastName']),
('type', 'P'),
)
unsetSentRequest.append(grequests.get('someurl',
# hooks={'response': self.do_something},
proxies=generate_random_proxy(),
stream=False,
params=params))
MAX_CONNECTIONS = 100
results = []
pages = 0
responses = grequests.map(unsetSentRequest, exception_handler=self.exception, size=30)
return responses
def collate_responses(self, results):
return [x.text for x in results]
test = Test()
start_time = time.time()
results = test.main()
duration = time.time() - start_time
print(f"Downloaded 10 in {duration} seconds")
给我的印象是,如果我使用新的IP访问每个站点(我正在使用generate_random_proxy(),它为我提供了新的代理),则服务器会假定它是新用户并允许我进入,因此我怀疑错误来自grequests中的参数(size=30)
,如果我在其中使用1,一切正常。但是我希望能够使用尽可能多的东西。我正在处理大约2万个网址。