全部, 尝试在pipleline中使用KNN imputer时遇到问题。我的工作流程如下所示。
我已经将数值变量和类别变量分开,并构建了一个如下所示的pipleline
numeric_transformer = Pipeline(steps=[
('imputer', KNN(k=3)),
('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='most_frequent', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, num_attr),
('cat', categorical_transformer, cat_attr)])
我想使用KNN imputer来估算数字列中的缺失值。
我进行了逻辑回归
clf_logreg = Pipeline(steps=[('preprocessor', preprocessor),
('classifier', LogisticRegression())])
clf_logreg.fit(X_train, Y_train)
上面的代码块工作正常,但是当我尝试预测X_train时,出现以下错误。请帮帮我。谢谢
train_pred_logreg = clf_logreg.predict(X_train)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-121-f17e49913947> in <module>
1 #train_pred_logreg = clf_logreg.predict(X_train)
----> 2 test_pred_logreg = clf_logreg.predict(X_test)
3
4 print(confusion_matrix(y_true=Y_train, y_pred = train_pred_logreg))
5
/opt/conda/lib/python3.6/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
114
115 # lambda, but not partial, allows help() to work with update_wrapper
--> 116 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
117 # update the docstring of the returned function
118 update_wrapper(out, self.fn)
/opt/conda/lib/python3.6/site-packages/sklearn/pipeline.py in predict(self, X, **predict_params)
419 Xt = X
420 for _, name, transform in self._iter(with_final=False):
--> 421 Xt = transform.transform(Xt)
422 return self.steps[-1][-1].predict(Xt, **predict_params)
423
/opt/conda/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in transform(self, X)
537 'remainder keyword')
538
--> 539 Xs = self._fit_transform(X, None, _transform_one, fitted=True)
540 self._validate_output(Xs)
541
/opt/conda/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted)
418 message=self._log_message(name, idx, len(transformers)))
419 for idx, (name, trans, column, weight) in enumerate(
--> 420 self._iter(fitted=fitted, replace_strings=True), 1))
421 except ValueError as e:
422 if "Expected 2D array, got 1D array instead" in str(e):
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in __call__(self, iterable)
919 # remaining jobs.
920 self._iterating = False
--> 921 if self.dispatch_one_batch(iterator):
922 self._iterating = self._original_iterator is not None
923
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
/opt/conda/lib/python3.6/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
/opt/conda/lib/python3.6/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/opt/conda/lib/python3.6/site-packages/sklearn/pipeline.py in _transform_one(transformer, X, y, weight, **fit_params)
693
694 def _transform_one(transformer, X, y, weight, **fit_params):
--> 695 res = transformer.transform(X)
696 # if we have a weight for this transformer, multiply output
697 if weight is None:
/opt/conda/lib/python3.6/site-packages/sklearn/pipeline.py in _transform(self, X)
538 Xt = X
539 for _, _, transform in self._iter():
--> 540 Xt = transform.transform(Xt)
541 return Xt
542
/opt/conda/lib/python3.6/site-packages/fancyimpute/solver.py in transform(self, X, y)
223 "doesn't support inductive mode. Only %s.fit_transform is "
224 "supported at this time." % (
--> 225 self.__class__.__name__, self.__class__.__name__))
ValueError: KNN.transform not implemented! This imputation algorithm likely doesn't support inductive mode. Only KNN.fit_transform is supported at this time.
当我尝试使用错误消息中所示的fit_transform时,出现以下错误
clf_logreg.fit_transform(X_train, Y_train)
AttributeError: 'LogisticRegression' object has no attribute 'transform'
答案 0 :(得分:0)
尝试clf_logreg.preprocessor.fit_transform(X_train, Y_train)
测试KNN是否有效。根据逻辑回归,它没有转换方法,因为它是分类方法。