我正在本地计算机上的Jupyter Notebook中执行分析,并从S3中读取数据。当我关闭笔记本并打开另一个笔记本以读取另一个文件时,出现以下错误:
ProtocolError: ("Connection broken: ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)", ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))
由于问题似乎出在现有连接上,因此根据this thread,我试图等待并关闭现有连接。 Boto3的.close()
似乎没有s3client.get_object()
或与之等效的符号(请参见下面的代码)
我在启动时的第一次连接不会产生此错误。
当我关闭计算机并重新启动时,可以避免在看到它之后出现此错误。
当我重新启动计算机时,错误仍然存在。
如何在不重新启动计算机的情况下关闭连接?
import pandas as pd
import boto3
import boto3.session
from botocore.client import Config
config = Config(connect_timeout=500, retries={'max_attempts': 5}, read_timeout=1000)
cred = boto3.Session().get_credentials()
ACCESS_KEY = cred.access_key
SECRET_KEY = cred.secret_key
SESSION_TOKEN = cred.token
s3client = boto3.client('s3',
aws_access_key_id = ACCESS_KEY,
aws_secret_access_key = SECRET_KEY,
aws_session_token = SESSION_TOKEN,
config = config
)
response = s3client.get_object(Bucket='mydatabucket', Key='mydata.csv')
df = pd.read_csv(response['Body'])
这是我得到的追溯和错误,而不是存储为df
的预期熊猫数据帧:
---------------------------------------------------------------------------
ConnectionResetError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\response.py in _error_catcher(self)
359 try:
--> 360 yield
361
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\response.py in read(self, amt, decode_content, cache_content)
441 cache_content = False
--> 442 data = self._fp.read(amt)
443 if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
C:\ProgramData\Anaconda3\lib\http\client.py in read(self, amt)
446 b = bytearray(amt)
--> 447 n = self.readinto(b)
448 return memoryview(b)[:n].tobytes()
C:\ProgramData\Anaconda3\lib\http\client.py in readinto(self, b)
490 # (for example, reading in 1k chunks)
--> 491 n = self.fp.readinto(b)
492 if not n and b:
C:\ProgramData\Anaconda3\lib\socket.py in readinto(self, b)
588 try:
--> 589 return self._sock.recv_into(b)
590 except timeout:
C:\ProgramData\Anaconda3\lib\ssl.py in recv_into(self, buffer, nbytes, flags)
1051 self.__class__)
-> 1052 return self.read(nbytes, buffer)
1053 else:
C:\ProgramData\Anaconda3\lib\ssl.py in read(self, len, buffer)
910 if buffer is not None:
--> 911 return self._sslobj.read(len, buffer)
912 else:
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
<ipython-input-5-4d25be33c7b8> in <module>
1 response = s3client.get_object(Bucket='mydatabucket', Key='mydata.csv')
----> 2 audit = pd.read_csv(response['Body'])
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
700 skip_blank_lines=skip_blank_lines)
701
--> 702 return _read(filepath_or_buffer, kwds)
703
704 parser_f.__name__ = name
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
433
434 try:
--> 435 data = parser.read(nrows)
436 finally:
437 parser.close()
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
1137 def read(self, nrows=None):
1138 nrows = _validate_integer('nrows', nrows)
-> 1139 ret = self._engine.read(nrows)
1140
1141 # May alter columns / col_dict
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
1993 def read(self, nrows=None):
1994 try:
-> 1995 data = self._reader.read(nrows)
1996 except StopIteration:
1997 if self._first_chunk:
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.read()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_low_memory()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_rows()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._tokenize_rows()
pandas/_libs/parsers.pyx in pandas._libs.parsers.raise_parser_error()
C:\ProgramData\Anaconda3\lib\site-packages\botocore\response.py in read(self, amt)
76 """
77 try:
---> 78 chunk = self._raw_stream.read(amt)
79 except URLLib3ReadTimeoutError as e:
80 # TODO: the url will be None as urllib3 isn't setting it yet
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\response.py in read(self, amt, decode_content, cache_content)
457 # raised during streaming, so all calls with incorrect
458 # Content-Length are caught.
--> 459 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
460
461 if data:
C:\ProgramData\Anaconda3\lib\contextlib.py in __exit__(self, type, value, traceback)
128 value = type()
129 try:
--> 130 self.gen.throw(type, value, traceback)
131 except StopIteration as exc:
132 # Suppress StopIteration *unless* it's the same exception that
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\response.py in _error_catcher(self)
376 except (HTTPException, SocketError) as e:
377 # This includes IncompleteRead.
--> 378 raise ProtocolError('Connection broken: %r' % e, e)
379
380 # If no exception is thrown, we should avoid cleaning up
ProtocolError: ("Connection broken: ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)", ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))
答案 0 :(得分:0)
无需关闭或删除客户端,以后再重新打开即可。每次对AWS的调用都是针对该服务的终结点的唯一API请求,并且不维护长期连接。
因此,您可以在一个连接中访问多个文件,而不必担心关闭和重新打开与S3的连接。