尝试向模型发送预测请求时出现“未知错误”。在笔记本中进行预测时有效

时间:2019-08-05 08:21:44

标签: python scikit-learn google-cloud-platform pipeline xgboost

我已经在GCP的AI平台上创建了一个模型。我正在使用this教程中完成的自定义预测例程。

该模型可以按我的意愿运行,即我可以发送一个普通的输入数组,并且该类负责预处理以及提供预测。部署模型后,虽然我只会收到“未知错误”,并且由于在笔记本上运行时没有收到任何错误,所以相同的代码无法调试。

我的preprocess.py像这样:

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import FeatureUnion, make_pipeline
import numpy as np
import pandas as pd
import xgboost as xgb

class PositionalSelector(BaseEstimator, TransformerMixin):
  def __init__(self, positions):
    self.positions = positions

  def fit(self, X, y=None):
    return self

  def transform(self, X):
    return np.array(X)[:, self.positions]


class StripString(BaseEstimator, TransformerMixin):
  def fit(self, X, y=None):
    return self

  def transform(self, X):
    strip = np.vectorize(str.strip)
    return strip(np.array(X))


class SimpleOneHotEncoder(BaseEstimator, TransformerMixin):
  def __init__(self):
    self._values = [{'Local-gov': 2, 'Private': 4, 'State-gov': 7, 'Without-pay': 8, 'Federal-gov': 1, 'Self-emp-not-inc': 6, 'Never-worked': 3, '?': 0, 'Self-emp-inc': 5}, {'Bachelors': 9, 'Assoc-voc': 8, '9th': 6, 'Doctorate': 10, 'HS-grad': 11, '12th': 2, '5th-6th': 4, '1st-4th': 3, '10th': 0, 'Preschool': 13, '7th-8th': 5, 'Masters': 12, '11th': 1, 'Assoc-acdm': 7, 'Some-college': 15, 'Prof-school': 14}, {'Divorced': 0, 'Separated': 5, 'Married-AF-spouse': 1, 'Married-civ-spouse': 2, 'Never-married': 4, 'Married-spouse-absent': 3, 'Widowed': 6}, {'Husband': 0, 'Not-in-family': 1, 'Wife': 5, 'Other-relative': 2, 'Unmarried': 4, 'Own-child': 3}]

  def fit(self, X):
    if not self._values: # during training only
      for c in range(X.shape[1]):
          Y = X[:, c]
          values = {v: i for i, v in enumerate(np.unique(Y))}
          self._values.append(values)

    return self

  def transform(self, X):
    X = np.array(X)
    matrices = []
    for c in range(X.shape[1]):
      Y = X[:, c]
      matrix = np.zeros(shape=(len(Y), len(self._values[c])), dtype=np.int8)
      for i, x in enumerate(Y):
        if x in self._values[c]:
          matrix[i][self._values[c][x]] = 1
      matrices.append(matrix)
    res = np.concatenate(matrices, axis=1)
    return res


class preProcessing(object):
  def __init__(self,cats,nums):
    self.cat = cats
    self.num = nums

  def runPipeline(self,data):
    p1 = make_pipeline(
        PositionalSelector(self.cat),
        StripString(),
        SimpleOneHotEncoder()
    )

    p2 = make_pipeline(
        PositionalSelector(self.num),
        StandardScaler()
    )

    pipeline = FeatureUnion([
        ('numericals', p1),
        ('categoricals', p2),
    ])

    train_features = pipeline.fit_transform(data)
    return train_features

和预报器.py如下:

import os
import pickle

import numpy as np
from sklearn.datasets import load_iris
from sklearn.externals import joblib

class MyPredictor(object):
  def __init__(self, model, preprocessor):
    self._model = model
    self._preprocessor = preprocessor
    self._cats = [1, 3, 5, 7]
    self._nums = [0, 12]
    self._class_names = [True,False]

  def predict(self, instances, **kwargs):
    inputs = instances['instances']
    inputs = np.array(inputs)
    preprocessed_inputs = self._preprocessor.runPipeline(inputs)
    if kwargs.get('probabilities'):
      probabilities = self._model.predict_proba(preprocessed_inputs)
      return probabilities.tolist()
    else:
      print(preprocessed_inputs)
      outputs = self._model.predict(preprocessed_inputs)
      return [self._class_names[class_num] for class_num in outputs]

  @classmethod
  def from_path(cls, model_dir):
    model_path = os.path.join(model_dir, 'model.joblib')
    model = joblib.load(model_path)

    preprocessor_path = os.path.join(model_dir, 'preprocessor.joblib')
    preprocessor = joblib.load(preprocessor_path)


    return cls(model, preprocessor)

我以以下形式发送预测请求:

{"instances": [[39.0, "State-gov", 77516.0, "Bachelors", 13.0, "Never-married", "Adm-clerical", "Not-in-family", "White", "Male", 2174.0, 0.0, 40.0, "United-States"]]}

在笔记本中返回True / False,但是使用UI或API时,我只会得到未知错误。

0 个答案:

没有答案