sklearn FeatureUnion不能使用cross_val_score吗?

时间:2016-11-09 13:25:33

标签: python scikit-learn

我正在玩FeatureUnionPipeline,但却遇到了这个用例,

# simply return a column in a Pandas DataFrame
class ItemSelector(BaseEstimator, TransformerMixin):
  def __init__(self, col):
    self.col = col

  def fit(self, x, y=None):
    return self

  def transform(self, x):
    return x[self.col]  # return a column


# convert categorical features into one-hot encoding format
class CategoricalEncoder(BaseEstimator, TransformerMixin):
  def __init__(self):
    self.lb = LabelBinarizer()

  def fit(self, x, y=None):
    self.lb.fit(x)
    return self

  def transform(self, x):
    rez = self.lb.transform(x)
    return rez


# This dummy one just combines the above 2 transformers into one for convenience reasons
class DummyEncoder(BaseEstimator, TransformerMixin):
  def __init__(self, col):
    # here is a feature union inside which a Pipeline used,
    # first, select a column, then one-hot encode the column
    self.union = FeatureUnion([('one', Pipeline([('select', ItemSelector(col)), ('encode', CategoricalEncoder())]))])

  def fit(self, x, y=None):
    self.union.fit(x)
    return self

  def transform(self, x):
    return self.union.transform(x)

# alright, here is the testing code
df = pd.DataFrame(data={'Y': [1,2,1,2,1], 'X': ['a','b','a','b','c']})
pipe_conf = [
      ('union', FeatureUnion([('union_0', DummyEncoder('X'))])),
      ('clf', LogisticRegression())
    ]
pipe = Pipeline(pipe_conf)

# 1) call fit and predict explicitly, it works
pipe.fit(df, df['Y'])
pipe.predict(df)

# 2) via sklearn cross validation, error, lots of error which I'm just not able to understand at all
cross_val_score(pipe, df, df['Y'], cv=2)

我上面代码中的任何错误?请给我一个提示。

错误如下:

Traceback (most recent call last):
  File "a.py", line 65, in <module>
    cross_val_score(pipe, df, df['Y'], cv=2)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 140, in cross_val_score
    for train, test in cv.split(X, y, groups))
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
    while self.dispatch_one_batch(iterator):
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
    self._dispatch(tasks)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
    result = ImmediateResult(func)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 322, in __init__
    self.results = batch()
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 238, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 268, in fit
    Xt, fit_params = self._fit(X, y, **fit_params)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 234, in _fit
    Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 737, in fit_transform
    for name, trans, weight in self._iter())
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
    while self.dispatch_one_batch(iterator):
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
    self._dispatch(tasks)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
    result = ImmediateResult(func)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 322, in __init__
    self.results = batch()
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 580, in _fit_transform_one
    res = transformer.fit_transform(X, y, **fit_params)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/base.py", line 497, in fit_transform
    return self.fit(X, y, **fit_params).transform(X)
  File "a.py", line 38, in fit
    self.union.fit(x)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 712, in fit
    for _, trans, _ in self._iter())
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
    while self.dispatch_one_batch(iterator):
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
    self._dispatch(tasks)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
    result = ImmediateResult(func)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 322, in __init__
    self.results = batch()
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 566, in _fit_one_transformer
    return transformer.fit(X, y)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 268, in fit
    Xt, fit_params = self._fit(X, y, **fit_params)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 234, in _fit
    Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
  File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/base.py", line 494, in fit_transform
    return self.fit(X, **fit_params).transform(X)
  File "a.py", line 19, in transform
    return x[self.col]
  File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py", line 1992, in __getitem__
    return self._getitem_column(key)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py", line 1999, in _getitem_column
    return self._get_item_cache(key)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/generic.py", line 1345, in _get_item_cache
    values = self._data.get(item)
  File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/internals.py", line 3234, in get
    raise ValueError("cannot label index with a null key")
ValueError: cannot label index with a null key

0 个答案:

没有答案