我有以下代码:
from suds.client import Client
from concurrent.futures import ThreadPoolExecutor, as_completed
from multiprocessing import cpu_count
from pandas import DataFrame
from sqlalchemy import create_engine
class Br:
def __init__(self, TOKEN, Br_WSDL):
self.TOKEN = TOKEN
self.Br_WSDL = Br_WSDL
def _get_bornto_data(self, page_num):
self.bApi = Client(self.Br_WSDL)
session_id = self.bApi.service.login(self.TOKEN)
session_header = self.bApi.factory.create("sessionHeader")
filter_type = self.bApi.factory.create("filterType")
self.contact_filter = self.bApi.factory.create("contactFilter")
session_header.sessionId = session_id
self.bApi.set_options(soapheaders=session_header)
self.contact_filter.listId.append(contacts_list_id)
self.contact_filter.type = filter_type.AND
return self.bApi.service.readContacts(self.contact_filter, False, [], page_num)
def get_Br_content(self, page_num):
full_results = []
for res in self._get_bornto_data(page_num):
if "'" not in res["email"] and '"' not in res["email"]:
full_results.append(dict(res))
return full_results
NUM_WORKERS = cpu_count()
...
bro = Br(TOKEN=TOKEN, Br_WSDL=Br_WSDL)
while True:
results = []
pages = [i for i in range(c, c + NUM_WORKERS)]
with ThreadPoolExecutor(NUM_WORKERS) as executor:
futures = [executor.submit(bro.get_Br_content, page) for page in pages]
for future in as_completed(futures):
results.extend(future.result())
if len(results) < 1:
break
print("Get batch {0} with {1} results".format(c, len(results)))
df = DataFrame(results)
df.to_sql(...)
print("Pages {0} to {1} was insert".format(c, c + NUM_WORKERS))
c += NUM_WORKERS
此代码连接到SOAP API获取记录并将它们插入到MySQL数据库中。 该代码通常可以正常运行,但有时会因超时而失败:
Pages 184 to 188 was insert
Get batch 188 with 20000 results
Pages 188 to 192 was insert
Traceback (most recent call last):
...
return self.bApi.service.readContacts(self.contact_filter, False, [], page_num)
File "/usr/local/lib/python2.7/dist-packages/suds/client.py", line 521, in __call__
return client.invoke(args, kwargs)
...
File "/usr/lib/python2.7/httplib.py", line 453, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 409, in _read_status
line = self.fp.readline(_MAXLINE + 1)
File "/usr/lib/python2.7/socket.py", line 480, in readline
data = self._sock.recv(self._rbufsize)
File "/usr/lib/python2.7/ssl.py", line 756, in recv
return self.read(buflen)
File "/usr/lib/python2.7/ssl.py", line 643, in read
v = self._sslobj.read(len)
ssl.SSLError: ('The read operation timed out',)
如何避免这种情况? 这段代码将很快被另一个脚本替换,但是直到那时我需要它才能工作。有没有一种方法可以捕获异常并重试有问题的特定页面,而又不会浪费整个过程?
我正在使用Python 2.7(我们将很快升级到Python 3)。