我正在使用multithreathing从API导入数据,
def importdata(data, auth_token):
# # --- Get data from Keros API --
hed = {'Authorization': 'Bearer ' + auth_token, 'Accept': 'application/json'}
urlApi = 'http://...&offset=0&limit=1'
responsedata = requests.get(urlApi, data=data, headers=hed, verify=False)
if responsedata.ok:
num_of_records = int(math.ceil(responsedata.json()['total']))
value_limit = 249 # Number of records per page.
num_of_pages = num_of_records / value_limit
print num_of_records
print num_of_pages
pages = [i for i in range(0, num_of_pages-1)]
#pages = [i for i in range(0, 3)]
datarALL = []
with ThreadPoolExecutor(max_workers=num_of_pages) as executor:
futh = [(executor.submit(getdata, page, hed, value_limit)) for page in pages]
for data in as_completed(futh):
datarALL = datarALL + data.result()
return datarALL
else:
return None
def getdata(page,hed,limit):
is_valid = True
value_offset = page * limit
value_limit = limit #limit of records allowed per page
datarALL = []
url = 'http://...&offset={0}&limit={1}'.format(value_offset,value_limit)
responsedata = requests.get(url, data=data, headers=hed, verify=False)
if responsedata.status_code == 200: #200 for successful call
responsedata = responsedata.text
jsondata = json.loads(responsedata)
if "results" in jsondata:
if jsondata["results"]:
datarALL = datarALL + jsondata["results"]
print "page {} finished".format(page)
return datarALL
设置后:
pages = [i for i in range(0, 3)]
有效,没有问题。
但是当我尝试
pages = [i for i in range(0, num_of_pages-1)]
它会产生此错误:
page 317 finished
page 240 finished
page 15 finished
page 12 finished
page 350 finished
page 16 finished
page 288 finished
page 18 finished
page 17 finished
Traceback (most recent call last):
File "/home/ubuntu/scripts/import.py", line 84, in importdata
datarALL = datarALL + data.result()
File "/usr/local/lib/python2.7/dist-packages/concurrent/futures/_base.py", line 455, in result
return self.__get_result()
File "/usr/local/lib/python2.7/dist-packages/concurrent/futures/thread.py", line 63, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/ubuntu/scripts/import.py", line 54, in getdata
responsedata = requests.get(url, data=data, headers=hed, verify=False)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 512, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 662, in send
r.content
File "/usr/local/lib/python2.7/dist-packages/requests/models.py", line 827, in content
self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
File "/usr/local/lib/python2.7/dist-packages/requests/models.py", line 752, in generate
raise ChunkedEncodingError(e)
requests.exceptions.ChunkedEncodingError: ("Connection broken: error(104, 'Connection reset by peer')", error(104, 'Connection reset by peer'))
我不确定为什么会发生这种情况以及为什么它会使我的连接断开? 知道为什么它不起作用吗?
有没有一种方法可以真正了解损坏的细节?例如哪个特定的url
导致了问题等?
答案 0 :(得分:1)
requests.exceptions.ChunkedEncodingError :(“连接断开:错误(104,'对等方重置连接')”,错误(104,'对等方重置连接))
该日志很清楚,脚本和服务器之间的连接已断开。原因是Connection reset by peer
。如果您不明白这意味着什么,this question
我建议您将代码包装到try...catch块中,然后优雅地重试,记录或结束脚本。