我正在尝试创建一个简单的管道来将分类数据转换为单热矢量,不幸的是它失败了,因为出于某种原因,数据需要事先成为ravel()。
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, Imputer
# Create a class to select numerical or categorical columns
# since Scikit-Learn doesn't handle DataFrames yet
class DataFrameSelector(BaseEstimator, TransformerMixin):
def __init__(self, attribute_names):
self.attribute_names = attribute_names
def fit(self, X, y=None):
return self
def transform(self, X):
return X[self.attribute_names].values
cat_pipeline = Pipeline([
('selector', DataFrameSelector(cat_attributes)),
('rave', ravel()), # <- this function need to ravel the data before going into LabelEncoder()
('cat_encoder_label', LabelEncoder()),
('cat_encoder_hot', OneHotEncoder())
])
我输入的数据如下所示:
X.head()
1 Geuzenveld - Slotermeer
2 Westerpark
3 Noord-Oost
4 IJburg - Zeeburgereiland
5 De Aker - Nieuw Sloten
X.type()
numpy.ndarray
Xt = cat_pipeline.fit_transform(X)
- FAILS
我该如何创建此功能?
编辑,这是错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-173-4876cd8a7bee> in <module>()
17 #data['neighbourhood_cleansed'].values.ravel().shape, data['neighbourhood_cleansed'].values.shape
18
---> 19 c = cat_pipeline.fit_transform(data)
~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\lib\site-packages\sklearn\pipeline.py in fit_transform(self, X, y, **fit_params)
288 """
289 last_step = self._final_estimator
--> 290 Xt, fit_params = self._fit(X, y, **fit_params)
291 if hasattr(last_step, 'fit_transform'):
292 return last_step.fit_transform(Xt, y, **fit_params)
~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\lib\site-packages\sklearn\pipeline.py in _fit(self, X, y, **fit_params)
220 Xt, fitted_transformer = fit_transform_one_cached(
221 cloned_transformer, None, Xt, y,
--> 222 **fit_params_steps[name])
223 # Replace the transformer of the step with the fitted
224 # transformer. This is necessary when loading the transformer
~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\lib\site-packages\sklearn\externals\joblib\memory.py in __call__(self, *args, **kwargs)
360
361 def __call__(self, *args, **kwargs):
--> 362 return self.func(*args, **kwargs)
363
364 def call_and_shelve(self, *args, **kwargs):
~\AppData\Local\Continuum\Anaconda3\envs\tensorflow\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, weight, X, y, **fit_params)
587 **fit_params):
588 if hasattr(transformer, 'fit_transform'):
--> 589 res = transformer.fit_transform(X, y, **fit_params)
590 else:
591 res = transformer.fit(X, y, **fit_params).transform(X)
TypeError: fit_transform() takes 2 positional arguments but 3 were given