我正在玩FeatureUnion
和Pipeline
,但却遇到了这个用例,
# simply return a column in a Pandas DataFrame
class ItemSelector(BaseEstimator, TransformerMixin):
def __init__(self, col):
self.col = col
def fit(self, x, y=None):
return self
def transform(self, x):
return x[self.col] # return a column
# convert categorical features into one-hot encoding format
class CategoricalEncoder(BaseEstimator, TransformerMixin):
def __init__(self):
self.lb = LabelBinarizer()
def fit(self, x, y=None):
self.lb.fit(x)
return self
def transform(self, x):
rez = self.lb.transform(x)
return rez
# This dummy one just combines the above 2 transformers into one for convenience reasons
class DummyEncoder(BaseEstimator, TransformerMixin):
def __init__(self, col):
# here is a feature union inside which a Pipeline used,
# first, select a column, then one-hot encode the column
self.union = FeatureUnion([('one', Pipeline([('select', ItemSelector(col)), ('encode', CategoricalEncoder())]))])
def fit(self, x, y=None):
self.union.fit(x)
return self
def transform(self, x):
return self.union.transform(x)
# alright, here is the testing code
df = pd.DataFrame(data={'Y': [1,2,1,2,1], 'X': ['a','b','a','b','c']})
pipe_conf = [
('union', FeatureUnion([('union_0', DummyEncoder('X'))])),
('clf', LogisticRegression())
]
pipe = Pipeline(pipe_conf)
# 1) call fit and predict explicitly, it works
pipe.fit(df, df['Y'])
pipe.predict(df)
# 2) via sklearn cross validation, error, lots of error which I'm just not able to understand at all
cross_val_score(pipe, df, df['Y'], cv=2)
我上面代码中的任何错误?请给我一个提示。
错误如下:
Traceback (most recent call last):
File "a.py", line 65, in <module>
cross_val_score(pipe, df, df['Y'], cv=2)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 140, in cross_val_score
for train, test in cv.split(X, y, groups))
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
while self.dispatch_one_batch(iterator):
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
self._dispatch(tasks)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
result = ImmediateResult(func)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 322, in __init__
self.results = batch()
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 238, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 268, in fit
Xt, fit_params = self._fit(X, y, **fit_params)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 234, in _fit
Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 737, in fit_transform
for name, trans, weight in self._iter())
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
while self.dispatch_one_batch(iterator):
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
self._dispatch(tasks)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
result = ImmediateResult(func)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 322, in __init__
self.results = batch()
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 580, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/base.py", line 497, in fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "a.py", line 38, in fit
self.union.fit(x)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 712, in fit
for _, trans, _ in self._iter())
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
while self.dispatch_one_batch(iterator):
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
self._dispatch(tasks)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
result = ImmediateResult(func)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 322, in __init__
self.results = batch()
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 566, in _fit_one_transformer
return transformer.fit(X, y)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 268, in fit
Xt, fit_params = self._fit(X, y, **fit_params)
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py", line 234, in _fit
Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
File "/Users/home/miniconda3/lib/python3.5/site-packages/sklearn/base.py", line 494, in fit_transform
return self.fit(X, **fit_params).transform(X)
File "a.py", line 19, in transform
return x[self.col]
File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py", line 1992, in __getitem__
return self._getitem_column(key)
File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py", line 1999, in _getitem_column
return self._get_item_cache(key)
File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/generic.py", line 1345, in _get_item_cache
values = self._data.get(item)
File "/Users/home/miniconda3/lib/python3.5/site-packages/pandas/core/internals.py", line 3234, in get
raise ValueError("cannot label index with a null key")
ValueError: cannot label index with a null key