我从s3获取json数据,但在50%的时间内保持运行SSL错误。每个s3存储桶包含500个json文件,我正在读取一个dask包。
import dask.bag as db
text = db.read_text("s3://path/to/bucket/*.json")
text = text.map(json.loads).compute()
然后我遍历列表,处理每个元素
Traceback (most recent call last):
File "parse.py", line 69, in <module>
parse_bag(text.map(json.loads).compute(), args.flightUUID, pickle.load(open(args.inPickle, "rb")), args.outDumpPath.rstrip("/"))
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/base.py", line 86, in compute
return compute(self, **kwargs)[0]
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/base.py", line 179, in compute
results = get(dsk, keys, **kwargs)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/multiprocessing.py", line 83, in get
queue=queue, get_id=_process_get_id, **kwargs)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/async.py", line 484, in get_async
raise(remote_exception(res, tb))
dask.async.SSLError: [SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1754)
Traceback
---------
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/async.py", line 267, in execute_task
result = _execute_task(task, data)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/async.py", line 248, in _execute_task
args2 = [_execute_task(a, cache) for a in args]
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/async.py", line 248, in _execute_task
args2 = [_execute_task(a, cache) for a in args]
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/async.py", line 248, in _execute_task
args2 = [_execute_task(a, cache) for a in args]
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/async.py", line 245, in _execute_task
return [_execute_task(a, cache) for a in arg]
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/async.py", line 248, in _execute_task
args2 = [_execute_task(a, cache) for a in args]
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/async.py", line 249, in _execute_task
return func(*args2)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/dask/bytes/s3.py", line 145, in s3_open_file
return s3.open(s3_path, mode='rb')
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/s3fs/core.py", line 212, in open
return S3File(self, path, mode, block_size=block_size)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/s3fs/core.py", line 680, in __init__
self.size = self.info()['Size']
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/s3fs/core.py", line 686, in info
return self.s3.info(self.path)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/s3fs/core.py", line 303, in info
files = self._lsdir(parent, refresh=refresh)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/s3fs/core.py", line 226, in _lsdir
for i in it:
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/paginate.py", line 102, in __iter__
response = self._make_request(current_kwargs)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/paginate.py", line 174, in _make_request
return self._method(**current_kwargs)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/client.py", line 278, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/client.py", line 561, in _make_api_call
operation_model, request_dict)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/endpoint.py", line 117, in make_request
return self._send_request(request_dict, operation_model)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/endpoint.py", line 146, in _send_request
success_response, exception):
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/endpoint.py", line 219, in _needs_retry
caught_exception=caught_exception, request_dict=request_dict)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/hooks.py", line 227, in emit
return self._emit(event_name, kwargs)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/hooks.py", line 210, in _emit
response = handler(**kwargs)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/retryhandler.py", line 183, in __call__
if self._checker(attempts, response, caught_exception):
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/retryhandler.py", line 251, in __call__
caught_exception)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/retryhandler.py", line 266, in _should_retry
return self._checker(attempt_number, response, caught_exception)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/retryhandler.py", line 314, in __call__
caught_exception)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/retryhandler.py", line 223, in __call__
attempt_number, caught_exception)
File "/Users/hmanjunatha/anaconda/lib/python2.7/site-packages/botocore/retryhandler.py", line 356, in _check_caught_exception
raise caught_exception