我正在尝试使用alpha
方法为我的套索回归模型找到最佳的model_selection.GridSearchCV
超参数
def run_optimization():
data = pd.read_csv(config.TRAINING_DATA_FILE)
X_train, _, y_train, _ = train_test_split(
data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=42
)
y_train = np.log(y_train)
params_space = {"linear_model__alpha": [1.0]}
model = GridSearchCV(
estimator=pipeline.price_pipe, param_grid=params_space, verbose=10, cv=5,
)
print(sorted(pipeline.price_pipe.get_params().keys()))
model.fit(X_train[config.FEATURES], y_train)
我可以在linear_model__alpha
的参数列表中看到pipeline.price_pipe
,并且params_space
中的超参数的范围在可接受的范围内,但是在出现以下错误时运行功能:
['categorical_encoder', 'categorical_encoder__variables', 'categorical_imputer',
'categorical_imputer__variables', 'drop_features', 'drop_features__variables_to_drop',
'linear_model', 'linear_model__alpha', 'linear_model__copy_X', 'linear_model__fit_intercept',
'linear_model__max_iter', 'linear_model__normalize', 'linear_model__positive',
'linear_model__precompute', 'linear_model__random_state', 'linear_model__selection',
'linear_model__tol', 'linear_model__warm_start', 'log_transformer', 'log_transformer__variables',
'memory', 'numerical_imputer', 'numerical_imputer__variables', 'rare_label_encoder',
'rare_label_encoder__tol', 'rare_label_encoder__variables', 'scaler', 'scaler__copy',
'scaler__feature_range', 'steps', 'temporal_variable', 'temporal_variable__reference_variable',
'temporal_variable__variables', 'verbose']
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] linear_model__alpha=1.0 .........................................
C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py:552:
FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters
will be set to nan. Details:
Traceback (most recent call last):
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2657, in get_loc
return self._engine.get_loc(key)
File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in
pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in
pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: None
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-
packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 330, in
fit
Xt = self._fit(X, y, **fit_params_steps)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 296, in
_fit
**fit_params_steps[name])
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\joblib\memory.py", line 352, in
__call__
return self.func(*args, **kwargs)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 740, in
_fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\base.py", line 693, in
fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "E:\Machine_Learning_Deployment\preprocessors.py", line 69, in transform
X[feature] = X[self.reference_variables] - X[feature]
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2927, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2659, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in
pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in
pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: None
FitFailedWarning)
[CV] ............... linear_model__alpha=1.0, score=nan, total= 0.0s
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.0s remaining: 0.0s
[CV] linear_model__alpha=1.0 .........................................
[CV] ............... linear_model__alpha=1.0, score=nan, total= 0.0s
[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.0s remaining: 0.0s
[CV] linear_model__alpha=1.0 .........................................
[CV] ............... linear_model__alpha=1.0, score=nan, total= 0.0s
[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 0.0s remaining: 0.0s
[CV] linear_model__alpha=1.0 .........................................
[CV] ............... linear_model__alpha=1.0, score=nan, total= 0.0s
[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 0.0s remaining: 0.0s
[CV] linear_model__alpha=1.0 .........................................
[CV] ............... linear_model__alpha=1.0, score=nan, total= 0.0s
[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 0.0s remaining: 0.0s
[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 0.0s finished
Traceback (most recent call last):
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2657, in get_loc
return self._engine.get_loc(key)
File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in
pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in
pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: None
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File ".\optimization.py", line 39, in <module>
run_optimization()
File ".\optimization.py", line 33, in run_optimization
model.fit(X_train, y_train)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\utils\validation.py", line
72, in inner_f
return f(**kwargs)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_search.py",
line 765, in fit
self.best_estimator_.fit(X, y, **fit_params)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 330, in
fit
Xt = self._fit(X, y, **fit_params_steps)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 296, in _
fit
**fit_params_steps[name])
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\joblib\memory.py", line 352, in
__call__
return self.func(*args, **kwargs)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\pipeline.py", line 740, in
_fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "C:\Users\MY\AppData\Roaming\Python\Python37\site-packages\sklearn\base.py", line 693, in
fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "E:\Machine_Learning_Deployment\preprocessors.py", line 69, in transform
X[feature] = X[self.reference_variables] - X[feature]
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2927, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\MY\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2659, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in
pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in
pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: None
我无法确切指出在GridSearch期间管道中断的原因(我尝试拟合RandomSearchCV
,同样的错误)。我可以使用pipeline.price.fit(X_train[config.FEATURES], y_train)
将管道设置为相同的数据集。
感谢您为使该工具正常工作提供的任何帮助。