我已经在GCP的AI平台上创建了一个模型。我正在使用this教程中完成的自定义预测例程。
该模型可以按我的意愿运行,即我可以发送一个普通的输入数组,并且该类负责预处理以及提供预测。部署模型后,虽然我只会收到“未知错误”,并且由于在笔记本上运行时没有收到任何错误,所以相同的代码无法调试。
我的preprocess.py像这样:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import FeatureUnion, make_pipeline
import numpy as np
import pandas as pd
import xgboost as xgb
class PositionalSelector(BaseEstimator, TransformerMixin):
def __init__(self, positions):
self.positions = positions
def fit(self, X, y=None):
return self
def transform(self, X):
return np.array(X)[:, self.positions]
class StripString(BaseEstimator, TransformerMixin):
def fit(self, X, y=None):
return self
def transform(self, X):
strip = np.vectorize(str.strip)
return strip(np.array(X))
class SimpleOneHotEncoder(BaseEstimator, TransformerMixin):
def __init__(self):
self._values = [{'Local-gov': 2, 'Private': 4, 'State-gov': 7, 'Without-pay': 8, 'Federal-gov': 1, 'Self-emp-not-inc': 6, 'Never-worked': 3, '?': 0, 'Self-emp-inc': 5}, {'Bachelors': 9, 'Assoc-voc': 8, '9th': 6, 'Doctorate': 10, 'HS-grad': 11, '12th': 2, '5th-6th': 4, '1st-4th': 3, '10th': 0, 'Preschool': 13, '7th-8th': 5, 'Masters': 12, '11th': 1, 'Assoc-acdm': 7, 'Some-college': 15, 'Prof-school': 14}, {'Divorced': 0, 'Separated': 5, 'Married-AF-spouse': 1, 'Married-civ-spouse': 2, 'Never-married': 4, 'Married-spouse-absent': 3, 'Widowed': 6}, {'Husband': 0, 'Not-in-family': 1, 'Wife': 5, 'Other-relative': 2, 'Unmarried': 4, 'Own-child': 3}]
def fit(self, X):
if not self._values: # during training only
for c in range(X.shape[1]):
Y = X[:, c]
values = {v: i for i, v in enumerate(np.unique(Y))}
self._values.append(values)
return self
def transform(self, X):
X = np.array(X)
matrices = []
for c in range(X.shape[1]):
Y = X[:, c]
matrix = np.zeros(shape=(len(Y), len(self._values[c])), dtype=np.int8)
for i, x in enumerate(Y):
if x in self._values[c]:
matrix[i][self._values[c][x]] = 1
matrices.append(matrix)
res = np.concatenate(matrices, axis=1)
return res
class preProcessing(object):
def __init__(self,cats,nums):
self.cat = cats
self.num = nums
def runPipeline(self,data):
p1 = make_pipeline(
PositionalSelector(self.cat),
StripString(),
SimpleOneHotEncoder()
)
p2 = make_pipeline(
PositionalSelector(self.num),
StandardScaler()
)
pipeline = FeatureUnion([
('numericals', p1),
('categoricals', p2),
])
train_features = pipeline.fit_transform(data)
return train_features
和预报器.py如下:
import os
import pickle
import numpy as np
from sklearn.datasets import load_iris
from sklearn.externals import joblib
class MyPredictor(object):
def __init__(self, model, preprocessor):
self._model = model
self._preprocessor = preprocessor
self._cats = [1, 3, 5, 7]
self._nums = [0, 12]
self._class_names = [True,False]
def predict(self, instances, **kwargs):
inputs = instances['instances']
inputs = np.array(inputs)
preprocessed_inputs = self._preprocessor.runPipeline(inputs)
if kwargs.get('probabilities'):
probabilities = self._model.predict_proba(preprocessed_inputs)
return probabilities.tolist()
else:
print(preprocessed_inputs)
outputs = self._model.predict(preprocessed_inputs)
return [self._class_names[class_num] for class_num in outputs]
@classmethod
def from_path(cls, model_dir):
model_path = os.path.join(model_dir, 'model.joblib')
model = joblib.load(model_path)
preprocessor_path = os.path.join(model_dir, 'preprocessor.joblib')
preprocessor = joblib.load(preprocessor_path)
return cls(model, preprocessor)
我以以下形式发送预测请求:
{"instances": [[39.0, "State-gov", 77516.0, "Bachelors", 13.0, "Never-married", "Adm-clerical", "Not-in-family", "White", "Male", 2174.0, 0.0, 40.0, "United-States"]]}
在笔记本中返回True / False,但是使用UI或API时,我只会得到未知错误。