我正在尝试使用GridSearchCV来获取任何回归算法的最佳参数。在本示例中,我使用KNN回归器,管道单独运行良好,但是当我将其放入GridSearchCV时,它返回错误“ ValueError:输入包含NaN” >
我尝试了许多其他算法并更改了步骤,但是仍然存在相同的问题。
而且我确信使用管道'PIPE_FULL_FLOW'转换数据后,DataFrame中没有空值。
#This is the Pipeline of Imputation & Encoding
PIPE_FULL_FLOW = Pipeline([('FEATUNION_MISSING_IMPUTATION',FEATUNION_MISSING_IMPUTATION),('FEATUNION_STEP3',FEATUNION_STEP3)])
#This Code outputs no nulls in any column
PIPE_FULL_FLOW.transform(DF_FULL_DATA).isnull().sum()
#Clonning the Pipeline to another one to add KNN as the final Estimator
PIPE_FULL_FLOW_KNN = Pipeline(PIPE_FULL_FLOW.steps.copy())
#Adding KNN Object as the last step
PIPE_FULL_FLOW_KNN.steps.append(('PREDICT_KNN',KNeighborsRegressor()))
#The Pipeline fits normally without GridSearchCV
#PIPE_FULL_FLOW_KNN.fit(DF_FULL_DATA,DF_FULL_DATA[['SalePrice']])
DICT_GRID_KNN = {'PREDICT_KNN__n_neighbors':[i for i in range(3,11)]}
GRID_KNN = GridSearchCV(PIPE_FULL_FLOW_KNN,param_grid=DICT_GRID_KNN,cv=5,error_score='raise')
#Once i try to fit the Grid Search it outputs the error Provided below
GRID_KNN.fit(DF_FULL_DATA,DF_FULL_DATA[['SalePrice']])
错误完整消息:-
ValueError Traceback (most recent call last)
<ipython-input-66-9ca843c67586> in <module>
----> 1 GRID_KNN.fit(DF_FULL_DATA,DF_FULL_DATA[['SalePrice']])
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
720 return results_container[0]
721
--> 722 self._run_search(evaluate_candidates)
723
724 results = results_container[0]
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
1189 def _run_search(self, evaluate_candidates):
1190 """Search all candidates in param_grid"""
-> 1191 evaluate_candidates(ParameterGrid(self.param_grid))
1192
1193
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
709 for parameters, (train, test)
710 in product(candidate_params,
--> 711 cv.split(X, y, groups)))
712
713 all_candidate_params.extend(candidate_params)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
915 # remaining jobs.
916 self._iterating = False
--> 917 if self.dispatch_one_batch(iterator):
918 self._iterating = self._original_iterator is not None
919
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
526 estimator.fit(X_train, **fit_params)
527 else:
--> 528 estimator.fit(X_train, y_train, **fit_params)
529
530 except Exception as e:
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
263 This estimator
264 """
--> 265 Xt, fit_params = self._fit(X, y, **fit_params)
266 if self._final_estimator is not None:
267 self._final_estimator.fit(Xt, y, **fit_params)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit(self, X, y, **fit_params)
228 Xt, fitted_transformer = fit_transform_one_cached(
229 cloned_transformer, Xt, y, None,
--> 230 **fit_params_steps[name])
231 # Replace the transformer of the step with the fitted
232 # transformer. This is necessary when loading the transformer
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\memory.py in __call__(self, *args, **kwargs)
340
341 def __call__(self, *args, **kwargs):
--> 342 return self.func(*args, **kwargs)
343
344 def call_and_shelve(self, *args, **kwargs):
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, X, y, weight, **fit_params)
612 def _fit_transform_one(transformer, X, y, weight, **fit_params):
613 if hasattr(transformer, 'fit_transform'):
--> 614 res = transformer.fit_transform(X, y, **fit_params)
615 else:
616 res = transformer.fit(X, y, **fit_params).transform(X)
<ipython-input-5-ec6c2a2a481e> in fit_transform(self, X, y, **fit_params)
14 weight=weight,
15 **fit_params)
---> 16 for name, trans, weight in self._iter())
17
18 if not result:
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
918 self._iterating = self._original_iterator is not None
919
--> 920 while self.dispatch_one_batch(iterator):
921 pass
922
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, X, y, weight, **fit_params)
612 def _fit_transform_one(transformer, X, y, weight, **fit_params):
613 if hasattr(transformer, 'fit_transform'):
--> 614 res = transformer.fit_transform(X, y, **fit_params)
615 else:
616 res = transformer.fit(X, y, **fit_params).transform(X)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in fit_transform(self, X, y, **fit_params)
298 Xt, fit_params = self._fit(X, y, **fit_params)
299 if hasattr(last_step, 'fit_transform'):
--> 300 return last_step.fit_transform(Xt, y, **fit_params)
301 elif last_step is None:
302 return Xt
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in fit_transform(self, X, y)
514 self._categorical_features, copy=True)
515 else:
--> 516 return self.fit(X).transform(X)
517
518 def _legacy_transform(self, X):
<ipython-input-7-87152cbc4d01> in fit(self, X, y, sep)
6
7 def fit(self, X, y=None,sep='_'):
----> 8 super(MyOneHotEncoder,self).fit(X)
9
10 self.LIST_FEATURES_CATEGORIES = []
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in fit(self, X, y)
425 return self
426 else:
--> 427 self._fit(X, handle_unknown=self.handle_unknown)
428 return self
429
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in _fit(self, X, handle_unknown)
59
60 def _fit(self, X, handle_unknown='error'):
---> 61 X = self._check_X(X)
62
63 n_samples, n_features = X.shape
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in _check_X(self, X)
54 if not _get_config()['assume_finite']:
55 if _object_dtype_isnan(X).any():
---> 56 raise ValueError("Input contains NaN")
57
58 return X
ValueError: Input contains NaN