无法访问Amazon Sagemaker上已部署的Estimator(端点)

时间:2019-12-18 19:07:43

标签: python amazon-sagemaker

我正在按照this tutorial中所述的步骤进行操作:

从步骤5a开始,在该步骤中,我们将部署训练有素的模型

xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')

到5b,我尝试在保留的数据上测试端点

test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).values #load the data into an array
xgb_predictor.content_type = 'text/csv' # set the data type for an inference
xgb_predictor.serializer = csv_serializer # set the serializer type
predictions = xgb_predictor.predict(test_data_array).decode('utf-8')

我收到有关关闭连接的错误,这似乎是非特定的:

ConnectionResetError                      Traceback (most recent call last)
~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    599                                                   body=body, headers=headers,
--> 600                                                   chunked=chunked)
    601 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    353         else:
--> 354             conn.request(method, url, **httplib_request_kw)
    355 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1238         """Send a complete request to the server."""
-> 1239         self._send_request(method, url, body, headers, encode_chunked)
   1240 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/awsrequest.py in _send_request(self, method, url, body, headers, *args, **kwargs)
    124         rval = super(AWSConnection, self)._send_request(
--> 125             method, url, body, headers, *args, **kwargs)
    126         self._expect_header_set = False

~/anaconda3/envs/tensorflow_p36/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1284             body = _encode(body, 'body')
-> 1285         self.endheaders(body, encode_chunked=encode_chunked)
   1286 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1233             raise CannotSendHeader()
-> 1234         self._send_output(message_body, encode_chunked=encode_chunked)
   1235 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/awsrequest.py in _send_output(self, message_body, *args, **kwargs)
    151             message_body = None
--> 152         self.send(msg)
    153         if self._expect_header_set:

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/awsrequest.py in send(self, str)
    235             return
--> 236         return super(AWSConnection, self).send(str)
    237 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/http/client.py in send(self, data)
    985         try:
--> 986             self.sock.sendall(data)
    987         except TypeError:

~/anaconda3/envs/tensorflow_p36/lib/python3.6/ssl.py in sendall(self, data, flags)
    971                 while count < amount:
--> 972                     v = self.send(byte_view[count:])
    973                     count += v

~/anaconda3/envs/tensorflow_p36/lib/python3.6/ssl.py in send(self, data, flags)
    940                     self.__class__)
--> 941             return self._sslobj.write(data)
    942         else:

~/anaconda3/envs/tensorflow_p36/lib/python3.6/ssl.py in write(self, data)
    641         """
--> 642         return self._sslobj.write(data)
    643 

ConnectionResetError: [Errno 104] Connection reset by peer

During handling of the above exception, another exception occurred:

ProtocolError                             Traceback (most recent call last)
~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/httpsession.py in send(self, request)
    262                 decode_content=False,
--> 263                 chunked=self._chunked(request.headers),
    264             )

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    637             retries = retries.increment(method, url, error=e, _pool=self,
--> 638                                         _stacktrace=sys.exc_info()[2])
    639             retries.sleep()

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    342             # Disabled, indicate to re-raise the error.
--> 343             raise six.reraise(type(error), error, _stacktrace)
    344 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
    684         if value.__traceback__ is not tb:
--> 685             raise value.with_traceback(tb)
    686         raise value

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    599                                                   body=body, headers=headers,
--> 600                                                   chunked=chunked)
    601 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    353         else:
--> 354             conn.request(method, url, **httplib_request_kw)
    355 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1238         """Send a complete request to the server."""
-> 1239         self._send_request(method, url, body, headers, encode_chunked)
   1240 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/awsrequest.py in _send_request(self, method, url, body, headers, *args, **kwargs)
    124         rval = super(AWSConnection, self)._send_request(
--> 125             method, url, body, headers, *args, **kwargs)
    126         self._expect_header_set = False

~/anaconda3/envs/tensorflow_p36/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1284             body = _encode(body, 'body')
-> 1285         self.endheaders(body, encode_chunked=encode_chunked)
   1286 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1233             raise CannotSendHeader()
-> 1234         self._send_output(message_body, encode_chunked=encode_chunked)
   1235 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/awsrequest.py in _send_output(self, message_body, *args, **kwargs)
    151             message_body = None
--> 152         self.send(msg)
    153         if self._expect_header_set:

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/awsrequest.py in send(self, str)
    235             return
--> 236         return super(AWSConnection, self).send(str)
    237 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/http/client.py in send(self, data)
    985         try:
--> 986             self.sock.sendall(data)
    987         except TypeError:

~/anaconda3/envs/tensorflow_p36/lib/python3.6/ssl.py in sendall(self, data, flags)
    971                 while count < amount:
--> 972                     v = self.send(byte_view[count:])
    973                     count += v

~/anaconda3/envs/tensorflow_p36/lib/python3.6/ssl.py in send(self, data, flags)
    940                     self.__class__)
--> 941             return self._sslobj.write(data)
    942         else:

~/anaconda3/envs/tensorflow_p36/lib/python3.6/ssl.py in write(self, data)
    641         """
--> 642         return self._sslobj.write(data)
    643 

ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))

During handling of the above exception, another exception occurred:

ConnectionClosedError                     Traceback (most recent call last)
<ipython-input-54-866c1007c96f> in <module>()
      2 xgb_predictor.content_type = 'text/csv' # set the data type for an inference
      3 xgb_predictor.serializer = csv_serializer # set the serializer type
----> 4 predictions = xgb_predictor.predict(test_data_array).decode('utf-8') # predict!
      5 predictions_array = np.fromstring(predictions[1:], sep=',') # and turn the prediction into an array
      6 print(predictions_array.shape)

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/sagemaker/predictor.py in predict(self, data, initial_args)
    105 
    106         request_args = self._create_request_args(data, initial_args)
--> 107         response = self.sagemaker_session.sagemaker_runtime_client.invoke_endpoint(**request_args)
    108         return self._handle_response(response)
    109 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
    355                     "%s() only accepts keyword arguments." % py_operation_name)
    356             # The "self" in this scope is referring to the BaseClient.
--> 357             return self._make_api_call(operation_name, kwargs)
    358 
    359         _api_call.__name__ = str(py_operation_name)

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
    646         else:
    647             http, parsed_response = self._make_request(
--> 648                 operation_model, request_dict, request_context)
    649 
    650         self.meta.events.emit(

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/client.py in _make_request(self, operation_model, request_dict, request_context)
    665     def _make_request(self, operation_model, request_dict, request_context):
    666         try:
--> 667             return self._endpoint.make_request(operation_model, request_dict)
    668         except Exception as e:
    669             self.meta.events.emit(

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/endpoint.py in make_request(self, operation_model, request_dict)
    100         logger.debug("Making request for %s with params: %s",
    101                      operation_model, request_dict)
--> 102         return self._send_request(request_dict, operation_model)
    103 
    104     def create_request(self, params, operation_model=None):

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/endpoint.py in _send_request(self, request_dict, operation_model)
    135             request, operation_model, context)
    136         while self._needs_retry(attempts, operation_model, request_dict,
--> 137                                 success_response, exception):
    138             attempts += 1
    139             # If there is a stream associated with the request, we need

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/endpoint.py in _needs_retry(self, attempts, operation_model, request_dict, response, caught_exception)
    229             event_name, response=response, endpoint=self,
    230             operation=operation_model, attempts=attempts,
--> 231             caught_exception=caught_exception, request_dict=request_dict)
    232         handler_response = first_non_none_response(responses)
    233         if handler_response is None:

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/hooks.py in emit(self, event_name, **kwargs)
    354     def emit(self, event_name, **kwargs):
    355         aliased_event_name = self._alias_event_name(event_name)
--> 356         return self._emitter.emit(aliased_event_name, **kwargs)
    357 
    358     def emit_until_response(self, event_name, **kwargs):

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/hooks.py in emit(self, event_name, **kwargs)
    226                  handlers.
    227         """
--> 228         return self._emit(event_name, kwargs)
    229 
    230     def emit_until_response(self, event_name, **kwargs):

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/hooks.py in _emit(self, event_name, kwargs, stop_on_response)
    209         for handler in handlers_to_call:
    210             logger.debug('Event %s: calling handler %s', event_name, handler)
--> 211             response = handler(**kwargs)
    212             responses.append((handler, response))
    213             if stop_on_response and response is not None:

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/retryhandler.py in __call__(self, attempts, response, caught_exception, **kwargs)
    181 
    182         """
--> 183         if self._checker(attempts, response, caught_exception):
    184             result = self._action(attempts=attempts)
    185             logger.debug("Retry needed, action of: %s", result)

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/retryhandler.py in __call__(self, attempt_number, response, caught_exception)
    249     def __call__(self, attempt_number, response, caught_exception):
    250         should_retry = self._should_retry(attempt_number, response,
--> 251                                           caught_exception)
    252         if should_retry:
    253             if attempt_number >= self._max_attempts:

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/retryhandler.py in _should_retry(self, attempt_number, response, caught_exception)
    275             # If we've exceeded the max attempts we just let the exception
    276             # propogate if one has occurred.
--> 277             return self._checker(attempt_number, response, caught_exception)
    278 
    279 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/retryhandler.py in __call__(self, attempt_number, response, caught_exception)
    315         for checker in self._checkers:
    316             checker_response = checker(attempt_number, response,
--> 317                                        caught_exception)
    318             if checker_response:
    319                 return checker_response

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/retryhandler.py in __call__(self, attempt_number, response, caught_exception)
    221         elif caught_exception is not None:
    222             return self._check_caught_exception(
--> 223                 attempt_number, caught_exception)
    224         else:
    225             raise ValueError("Both response and caught_exception are None.")

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/retryhandler.py in _check_caught_exception(self, attempt_number, caught_exception)
    357         # the MaxAttemptsDecorator is not interested in retrying the exception
    358         # then this exception just propogates out past the retry code.
--> 359         raise caught_exception

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/endpoint.py in _do_get_response(self, request, operation_model)
    198             http_response = first_non_none_response(responses)
    199             if http_response is None:
--> 200                 http_response = self._send(request)
    201         except HTTPClientError as e:
    202             return (None, e)

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/endpoint.py in _send(self, request)
    242 
    243     def _send(self, request):
--> 244         return self.http_session.send(request)
    245 
    246 

~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/botocore/httpsession.py in send(self, request)
    292                 error=e,
    293                 request=request,
--> 294                 endpoint_url=request.url
    295             )
    296         except Exception as e:

ConnectionClosedError: Connection was closed before we received a valid response from endpoint URL: "https://runtime.sagemaker.us-east-1.amazonaws.com/endpoints/xgboost-2019-12-18-17-33-47-561/invocations".

据我所知,端点已经很好地部署了,而且关于我发现的连接没有任何问题。关于如何调试的任何提示?

0 个答案:

没有答案