我试图用自己的类编写复杂的管道,但出现此错误:
TypeError: fit_transform() takes 2 positional arguments but 3 were given
我尝试通过使用自定义LabelBinarizer
来解决类似问题的解决方案,但该错误并未解决。
class NewLabelBinarizer(LabelBinarizer):
def fit(self, X, y=None):
return super(NewLabelBinarizer, self).fit(X)
def transform(self, X, y=None):
return super(NewLabelBinarizer, self).transform(X)
def fit_transform(self, X, y=None):
return super(NewLabelBinarizer, self).fit(X).transform(X)
class LabelPreprocessing(NewLabelBinarizer, TransformerMixin):
def __init__(self, *args, **kwargs):
pass
def fit(self, y,X=None):
super(LabelPreprocessing, self).fit(y)
return self
def transform(self, y, X=None):
y = y.str.findall(r'([a-zA-Z]{3,})')
y = y.replace(regex=r'(film)', value=' ')
return y.values
class PlotPreprocessing(BaseEstimator, TransformerMixin):
def __init__(self, *args, **kwargs):
self.REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
self.BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
X = X.str.lower()
X = X.map(lambda x: re.sub(self.REPLACE_BY_SPACE_RE," ",x))
X = X.map(lambda x: re.sub(r'\s+'," ",x))
return X.values
pipeline = Pipeline([
(
'text_preparation', FeatureUnion([
('label', Pipeline([
('labelPreprocessing', LabelPreprocessing()),
('mlb', MultiLabelBinarizer())
])),
('plot', PlotPreprocessing()
)
])),
('tfidf_vectorizer', TfidfVectorizer(ngram_range=(1,2),max_df=0.9,min_df=5,token_pattern=r'(\S+)')
),
('model', LinearRegression())
])
train_X, train_y, test_X, test_y = train_test_split(plot, label)
pipeline.fit(train_X, train_y)
y_pred = pipeline.predict( test_X )
然后我得到
TypeError Traceback (most recent call last)
<ipython-input-27-a8a60de025fd> in <module>()
17 train_X, train_y, test_X, test_y = train_test_split(plot, label)
18
---> 19 pipeline.fit(train_X, train_y)
20 y_pred = pipeline.predict( test_X )
14 frames
/usr/local/lib/python3.6/dist-packages/sklearn/pipeline.py in fit_transform(self, X, y, **fit_params)
391 return Xt
392 if hasattr(last_step, 'fit_transform'):
--> 393 return last_step.fit_transform(Xt, y, **fit_params)
394 else:
395 return last_step.fit(Xt, y, **fit_params).transform(Xt)
TypeError: fit_transform() takes 2 positional arguments but 3 were given
我还尝试添加**fit_params
以适合/预测参数。
答案 0 :(得分:0)
我认为问题出在MultiLabelBinarizer
签名上。尝试将其替换为:
class MLBinarizer(MultiLabelBinarizer):
def fit_transform(self, X, y=None):
return super(MultiLabelBinarizer, self).fit_transform(X)
请注意,方法fit
,fit_transform
采用X
作为第一个参数。因此,我建议像这样重写您的LabelPreprocessing
:
class LabelPreprocessing(NewLabelBinarizer, TransformerMixin):
def fit(self, X, y=None):
super(LabelPreprocessing, self).fit(X)
return self
def transform(self, y, X=None):
y = y.str.findall(r'([a-zA-Z]{3,})')
y = y.replace(regex=r'(film)', value=' ')
return y.values