Question

我正在尝试使用alpha方法为我的套索回归模型找到最佳的model_selection.GridSearchCV超参数

def run_optimization():

    data = pd.read_csv(config.TRAINING_DATA_FILE)

    X_train, _, y_train, _ = train_test_split(
        data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=42
    )

    y_train = np.log(y_train)

    params_space = {"linear_model__alpha": [1.0]}

    model = GridSearchCV(
    estimator=pipeline.price_pipe, param_grid=params_space, verbose=10, cv=5,
    )

    print(sorted(pipeline.price_pipe.get_params().keys()))
    model.fit(X_train[config.FEATURES], y_train)

我可以在linear_model__alpha的参数列表中看到pipeline.price_pipe，并且params_space中的超参数的范围在可接受的范围内，但是在出现以下错误时运行功能：

['categorical_encoder', 'categorical_encoder__variables', 'categorical_imputer', 
'categorical_imputer__variables', 'drop_features', 'drop_features__variables_to_drop', 
'linear_model', 'linear_model__alpha', 'linear_model__copy_X', 'linear_model__fit_intercept', 
'linear_model__max_iter', 'linear_model__normalize', 'linear_model__positive', 
'linear_model__precompute', 'linear_model__random_state', 'linear_model__selection', 
'linear_model__tol', 'linear_model__warm_start', 'log_transformer', 'log_transformer__variables', 
'memory', 'numerical_imputer', 'numerical_imputer__variables', 'rare_label_encoder', 
'rare_label_encoder__tol', 'rare_label_encoder__variables', 'scaler', 'scaler__copy', 
'scaler__feature_range', 'steps', 'temporal_variable', 'temporal_variable__reference_variable', 
'temporal_variable__variables', 'verbose']
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] linear_model__alpha=1.0 .........................................
C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py:552: 
FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters 
will be set to nan. Details:
Traceback (most recent call last):
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2657, in get_loc
return self._engine.get_loc(key)
File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in 
pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in 
pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: None

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "C:\Users\MY\AppData\Roaming\Python\Python37\site- 
packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 330, in 
fit
Xt = self._fit(X, y, **fit_params_steps)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 296, in 
_fit
**fit_params_steps[name])
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\joblib\memory.py", line 352, in 
__call__
return self.func(*args, **kwargs)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 740, in  
_fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\base.py", line 693, in 
fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "E:\Machine_Learning_Deployment\preprocessors.py", line 69, in transform
X[feature] = X[self.reference_variables] - X[feature]
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2927, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2659, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in 
pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in 
pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: None

FitFailedWarning)
[CV] ............... linear_model__alpha=1.0, score=nan, total=   0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[CV] linear_model__alpha=1.0 .........................................
[CV] ............... linear_model__alpha=1.0, score=nan, total=   0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[CV] linear_model__alpha=1.0 .........................................
[CV] ............... linear_model__alpha=1.0, score=nan, total=   0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[CV] linear_model__alpha=1.0 .........................................
[CV] ............... linear_model__alpha=1.0, score=nan, total=   0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[CV] linear_model__alpha=1.0 .........................................
[CV] ............... linear_model__alpha=1.0, score=nan, total=   0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s finished
Traceback (most recent call last):
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2657, in get_loc
return self._engine.get_loc(key)
File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in 
pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in 
pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: None

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File ".\optimization.py", line 39, in <module>
run_optimization()
File ".\optimization.py", line 33, in run_optimization
model.fit(X_train, y_train)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\utils\validation.py", line 
 72, in inner_f
 return f(**kwargs)
 File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_search.py", 
line 765, in fit
self.best_estimator_.fit(X, y, **fit_params)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 330, in 
fit
Xt = self._fit(X, y, **fit_params_steps)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 296, in _ 
fit
**fit_params_steps[name])
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\joblib\memory.py", line 352, in 
__call__
return self.func(*args, **kwargs)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 740, in 
_fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\base.py", line 693, in 
fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "E:\Machine_Learning_Deployment\preprocessors.py", line 69, in transform
X[feature] = X[self.reference_variables] - X[feature]
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2927, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2659, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in 
pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in 
pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: None

我无法确切指出在GridSearch期间管道中断的原因（我尝试拟合RandomSearchCV，同样的错误）。我可以使用pipeline.price.fit(X_train[config.FEATURES], y_train)将管道设置为相同的数据集。

感谢您为使该工具正常工作提供的任何帮助。

sklearn GridSearchCV管道FitFailedWarning：估算器

0 个答案: