我想为scikit创建一个端点,以在sagemaker中学习逻辑回归模型。我有scikit在名为scikitlogistic.py
的文件中学习代码。以下代码包含在scikitlogistic.py
import subprocess as sb
import pandas as pd
import numpy as np
import pickle,json
import sys
def install(package):
sb.call([sys.executable, "-m", "pip", "install", package])
install('s3fs')
if __name__ =='__main__':
train_data = 's3://{}/{}/{}'.format(bucket, prefix, 'train') #train_data
outputlocation = 's3://{}/{}/{}'.format(bucket, prefix, 'logistic_scikit')
os.environ["Train"]=train_data
os.environ["SM_MODEL_DIR"]='s3://<bucket>/<prefix>/model_dir'
os.environ["SM_OUTPUT_DIR"]=outputlocation
parser = argparse.ArgumentParser()
# hyperparameters sent by the client are passed as command-line arguments to the script.
parser.add_argument('--solver', type=str, default='liblinear')
# Data, model, and output directories
parser.add_argument('--output_data_dir', type=str, default=os.environ.get('SM_OUTPUT_DIR'))
parser.add_argument('--model_dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
parser.add_argument('--train', type=str, default=os.environ.get('Train'))
args, _ = parser.parse_known_args()
df = pd.read_csv(args.train)#writing my data into pandas data frame
y = df.Class
X = df.drop('Class', axis=1)
solver = args.solver
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(solver=solver).fit(X_train, y_train)
#creating model.joblib folder
s3 = boto3.resource('s3')
object = s3.Object(<bucket>,"<prefix>/model_dir/model.joblib")
object.put(Body=b'abc')
from sklearn.externals import joblib
if __name__=='__main__':
joblib.dump(lr, '/home/ec2-user/SageMaker/<notebookinstancename>/model.joblib')
在我的jupyter笔记本中,我编写了以下代码以在scikit logistic上训练我的模型
role = <role>
region = boto3.Session().region_name
bucket = <bucket>
prefix = <prefix>
train_data = 's3://{}/{}/{}'.format(bucket, prefix, 'train')
train_channel = sagemaker.session.s3_input(train_data, content_type='text/csv')
output_path = 's3://{}/{}/{}'.format(bucket, prefix,'output_data_dir')
from sagemaker.sklearn.estimator import SKLearn
sklearn = SKLearn(
entry_point='scikitlogistic.py',
train_instance_type="ml.c4.xlarge",
role=role, train_instance_count=1,
sagemaker_session=sagemaker.Session(),output_path=output_path,
hyperparameters={'solver':'liblinear'})
现在我适合它
sklearn.fit({'train': train_channel})
当我尝试拟合模型时,它会在ValueError: Invalid file path or buffer object type: <class 'NoneType'>
中的df = pd.read_csv(args.train)
处抛出scikitlogistic.py
。
2019-12-04 12:31:17 Starting - Starting the training job...
2019-12-04 12:31:18 Starting - Launching requested ML instances......
2019-12-04 12:32:25 Starting - Preparing the instances for training...
2019-12-04 12:33:11 Downloading - Downloading input data...
2019-12-04 12:33:41 Training - Downloading the training image..
2019-12-04 12:34:16 Uploading - Uploading generated training model
2019-12-04 12:34:16 Failed - Training job failed
2019-12-04 12:34:01,194 sagemaker-containers INFO Imported framework sagemaker_sklearn_container.training
2019-12-04 12:34:01,196 sagemaker-containers INFO No GPUs detected (normal if no gpus installed)
2019-12-04 12:34:01,206 sagemaker_sklearn_container.training INFO Invoking user training script.
2019-12-04 12:34:03,100 sagemaker-containers INFO Module scikitlogistic does not provide a setup.py.
Generating setup.py
2019-12-04 12:34:03,101 sagemaker-containers INFO Generating setup.cfg
2019-12-04 12:34:03,101 sagemaker-containers INFO Generating MANIFEST.in
2019-12-04 12:34:03,101 sagemaker-containers INFO Installing module with the following command:
/miniconda3/bin/python -m pip install .
Processing /opt/ml/code
Building wheels for collected packages: scikitlogistic
Building wheel for scikitlogistic (setup.py): started
Building wheel for scikitlogistic (setup.py): finished with status 'done'
Created wheel for scikitlogistic: filename=scikitlogistic-1.0.0-py2.py3-none-any.whl size=7186 sha256=3a209b33ea1fba4843ad74565d16764ebc3c18b5c0b68ea4e7aa0fe4e31960c0
Stored in directory: /tmp/pip-ephem-wheel-cache-rh9bcece/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3
Successfully built scikitlogistic
Installing collected packages: scikitlogistic
Successfully installed scikitlogistic-1.0.0
2019-12-04 12:34:04,479 sagemaker-containers INFO No GPUs detected (normal if no gpus installed)
2019-12-04 12:34:04,490 sagemaker-containers INFO Invoking user script
Training Env:
{
"additional_framework_parameters": {},
"channel_input_dirs": {
"train": "/opt/ml/input/data/train"
},
"current_host": "algo-1",
"framework_module": "sagemaker_sklearn_container.training:main",
"hosts": [
"algo-1"
],
"hyperparameters": {
"solver": "liblinear"
},
"input_config_dir": "/opt/ml/input/config",
"input_data_config": {
"train": {
"TrainingInputMode": "File",
"S3DistributionType": "FullyReplicated",
"RecordWrapperType": "None"
}
},
"input_dir": "/opt/ml/input",
"is_master": true,
"job_name": "sagemaker-scikit-learn-2019-12-04-12-31-17-192",
"log_level": 20,
"master_hostname": "algo-1",
"model_dir": "/opt/ml/model",
"module_dir": "s3://sagemaker2222/sagemaker-scikit-learn-2019-12-04-12-31-17-192/source/sourcedir.tar.gz",
"module_name": "scikitlogistic",
"network_interface_name": "eth0",
"num_cpus": 4,
"num_gpus": 0,
"output_data_dir": "/opt/ml/output/data",
"output_dir": "/opt/ml/output",
"output_intermediate_dir": "/opt/ml/output/intermediate",
"resource_config": {
"current_host": "algo-1",
"hosts": [
"algo-1"
],
"network_interface_name": "eth0"
},
"user_entry_point": "scikitlogistic.py"
}
Environment variables:
SM_HOSTS=["algo-1"]
SM_NETWORK_INTERFACE_NAME=eth0
SM_HPS={"solver":"liblinear"}
SM_USER_ENTRY_POINT=scikitlogistic.py
SM_FRAMEWORK_PARAMS={}
SM_RESOURCE_CONFIG={"current_host":"algo-1","hosts":["algo-1"],"network_interface_name":"eth0"}
SM_INPUT_DATA_CONFIG={"train":{"RecordWrapperType":"None","S3DistributionType":"FullyReplicated","TrainingInputMode":"File"}}
SM_OUTPUT_DATA_DIR=/opt/ml/output/data
SM_CHANNELS=["train"]
SM_CURRENT_HOST=algo-1
SM_MODULE_NAME=scikitlogistic
SM_LOG_LEVEL=20
SM_FRAMEWORK_MODULE=sagemaker_sklearn_container.training:main
SM_INPUT_DIR=/opt/ml/input
SM_INPUT_CONFIG_DIR=/opt/ml/input/config
SM_OUTPUT_DIR=/opt/ml/output
SM_NUM_CPUS=4
SM_NUM_GPUS=0
SM_MODEL_DIR=/opt/ml/model
SM_MODULE_DIR=s3://sagemaker2222/sagemaker-scikit-learn-2019-12-04-12-31-17-192/source/sourcedir.tar.gz
SM_TRAINING_ENV={"additional_framework_parameters":{},"channel_input_dirs":{"train":"/opt/ml/input/data/train"},"current_host":"algo-1","framework_module":"sagemaker_sklearn_container.training:main","hosts":["algo-1"],"hyperparameters":{"solver":"liblinear"},"input_config_dir":"/opt/ml/input/config","input_data_config":{"train":{"RecordWrapperType":"None","S3DistributionType":"FullyReplicated","TrainingInputMode":"File"}},"input_dir":"/opt/ml/input","is_master":true,"job_name":"sagemaker-scikit-learn-2019-12-04-12-31-17-192","log_level":20,"master_hostname":"algo-1","model_dir":"/opt/ml/model","module_dir":"s3://sagemaker2222/sagemaker-scikit-learn-2019-12-04-12-31-17-192/source/sourcedir.tar.gz","module_name":"scikitlogistic","network_interface_name":"eth0","num_cpus":4,"num_gpus":0,"output_data_dir":"/opt/ml/output/data","output_dir":"/opt/ml/output","output_intermediate_dir":"/opt/ml/output/intermediate","resource_config":{"current_host":"algo-1","hosts":["algo-1"],"network_interface_name":"eth0"},"user_entry_point":"scikitlogistic.py"}
SM_USER_ARGS=["--solver","liblinear"]
SM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate
SM_CHANNEL_TRAIN=/opt/ml/input/data/train
SM_HP_SOLVER=liblinear
PYTHONPATH=/miniconda3/bin:/miniconda3/lib/python37.zip:/miniconda3/lib/python3.7:/miniconda3/lib/python3.7/lib-dynload:/miniconda3/lib/python3.7/site-packages
Invoking script with the following command:
/miniconda3/bin/python -m scikitlogistic --solver liblinear
Collecting s3fs
Downloading https://files.pythonhosted.org/packages/72/5c/ec84c7ec49fde2c3b0d885ecae4504fa40fc77fef7684e9f2939c50f9b94/s3fs-0.4.0-py3-none-any.whl
Requirement already satisfied: boto3>=1.9.91 in /miniconda3/lib/python3.7/site-packages (from s3fs) (1.10.6)
Collecting fsspec>=0.6.0
Downloading https://files.pythonhosted.org/packages/04/1e/6108c48f2d4ad9ef1a6bff01fb58245c009f37b2bd0505ec6d0f55cc326d/fsspec-0.6.1-py3-none-any.whl (62kB)
Requirement already satisfied: botocore>=1.12.91 in /miniconda3/lib/python3.7/site-packages (from s3fs) (1.13.6)
Requirement already satisfied: s3transfer<0.3.0,>=0.2.0 in /miniconda3/lib/python3.7/site-packages (from boto3>=1.9.91->s3fs) (0.2.1)
Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /miniconda3/lib/python3.7/site-packages (from boto3>=1.9.91->s3fs) (0.9.4)
Requirement already satisfied: python-dateutil<3.0.0,>=2.1; python_version >= "2.7" in /miniconda3/lib/python3.7/site-packages (from botocore>=1.12.91->s3fs) (2.8.0)
Requirement already satisfied: docutils<0.16,>=0.10 in /miniconda3/lib/python3.7/site-packages (from botocore>=1.12.91->s3fs) (0.15.2)
Requirement already satisfied: urllib3<1.26,>=1.20; python_version >= "3.4" in /miniconda3/lib/python3.7/site-packages (from botocore>=1.12.91->s3fs) (1.24.2)
Requirement already satisfied: six>=1.5 in /miniconda3/lib/python3.7/site-packages (from python-dateutil<3.0.0,>=2.1; python_version >= "2.7"->botocore>=1.12.91->s3fs) (1.12.0)
Installing collected packages: fsspec, s3fs
Successfully installed fsspec-0.6.1 s3fs-0.4.0
Traceback (most recent call last):
File "/miniconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/miniconda3/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/opt/ml/code/scikitlogistic.py", line 101, in <module>
df = pd.read_csv(args.train)
File "/miniconda3/lib/python3.7/site-packages/pandas/io/parsers.py", line 685, in parser_f
return _read(filepath_or_buffer, kwds)
File "/miniconda3/lib/python3.7/site-packages/pandas/io/parsers.py", line 440, in _read
filepath_or_buffer, encoding, compression
File "/miniconda3/lib/python3.7/site-packages/pandas/io/common.py", line 224, in get_filepath_or_buffer
raise ValueError(msg.format(_type=type(filepath_or_buffer)))
ValueError: Invalid file path or buffer object type: <class 'NoneType'>
2019-12-04 12:34:06,008 sagemaker-containers ERROR ExecuteUserScriptError:
Command "/miniconda3/bin/python -m scikitlogistic --solver liblinear"
---------------------------------------------------------------------------
UnexpectedStatusException Traceback (most recent call last)
<ipython-input-66-bfad3082f107> in <module>()
----> 1 sklearn.fit({'train': train_data})
~/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/estimator.py in fit(self, inputs, wait, logs, job_name)
339 self.latest_training_job = _TrainingJob.start_new(self, inputs)
340 if wait:
--> 341 self.latest_training_job.wait(logs=logs)
342
343 def _compilation_job_name(self):
~/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/estimator.py in wait(self, logs)
902 """
903 if logs:
--> 904 self.sagemaker_session.logs_for_job(self.job_name, wait=True)
905 else:
906 self.sagemaker_session.wait_for_job(self.job_name)
~/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/session.py in logs_for_job(self, job_name, wait, poll)
1513
1514 if wait:
-> 1515 self._check_job_status(job_name, description, "TrainingJobStatus")
1516 if dot:
1517 print()
~/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/session.py in _check_job_status(self, job, desc, status_key_name)
1154 ),
1155 allowed_statuses=["Completed", "Stopped"],
-> 1156 actual_status=status,
1157 )
1158
UnexpectedStatusException: Error for Training job sagemaker-scikit-learn-2019-12-04-12-31-17-192: Failed. Reason: AlgorithmError: ExecuteUserScriptError:
Command "/miniconda3/bin/python -m scikitlogistic --solver liblinear"
我确定文件路径有效。我不确定是什么引起了错误。有人可以告诉我我在scikitlogistic.py
中可能犯了什么错误吗?另外,除了AWS文档外,我也找不到在sagemaker中部署scikit学习模型的良好资源。有人可以引导我获得一些好的资源吗?
答案 0 :(得分:0)
S3通道在环境变量中以SM_CHANNEL_{channel name}
的形式创建-请参见documentation here
所以我建议您尝试将os.environ.get('Train')
替换为os.environ.get('SM_CHANNEL_TRAIN')