致电pipeline.fit_transform(X_train, y_train)
时出现以下错误。
AttributeError:'numpy.ndarray'对象没有属性'fit'
管道中的各个变压器都可以正常工作,但是当我在管道中将它们组合在一起时,会出现错误。
X, y = training_data.drop('Response', axis=1), training_data['Response']
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
class preprocess(TransformerMixin, BaseEstimator):
def __init__():
self.X = None
def fit(self, X, y=None):
self.X = X
self.PI2 = 'Product_Info_2'
self.PI2_categories = list(training_data[self.PI2].unique())
return self
def transform(self, X, y=None):
Xt = X.copy()
Xt = pd.concat([Xt, pd.get_dummies(Xt[self.PI2])], axis=1).drop(self.PI2, axis=1)
Xt.drop('Id', axis=1, inplace=True)
Xt.fillna(value=0, inplace=True)
return np.array(Xt)
class apply_NB(TransformerMixin, BaseEstimator):
def __init__(self):
self.gridCV = None
self.params = {"var_smoothing": [x*10**(-9) for x in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
0.8, 0.9, 1, 1.5, 2, 2.5, 3, 3.5,
4, 4.5, 5]]}
self.best_params = None
def fit(self, X, y):
self.gridCV = GridSearchCV(GaussianNB(), self.params, verbose=10, n_jobs=-1)
self.gridCV.fit(X, y)
self.best_params = self.gridCV.best_params_
return self
def transform(self, X, y=None):
Xt = self.gridCV.predict(X)
return Xt
nb_pipeline = Pipeline([('preprocess', preprocess),
('fit_NB', apply_NB())])
nb_pipeline.fit_transform(X_train, y_train)
当我尝试最后一行时,我会得到:
AttributeError:'numpy.ndarray'对象没有属性'fit'
答案 0 :(得分:1)
您忘记将self
放在预处理的第一个初始化中
class preprocess(TransformerMixin, BaseEstimator):
def __init__(self):
self.X = None
然后您也必须为applyNB初始化该类。
nb_pipeline = [('preprocess', preprocess()),
('fit_NB', apply_NB())]
进行这些更改后似乎可以为m工作!