我正在尝试使用Grid Search为我的SVM找到最佳的超参数。通过以下方式进行操作:
from sklearn.model_selection import GridSearchCV
param_grid = {'coef0': [10, 5, 0.5, 0.001], 'C': [100, 50, 1, 0.001]}
poly_svm_search = SVC(kernel="poly", degree="2")
grid_search = GridSearchCV(poly_svm_search, param_grid, cv=5, scoring='f1')
grid_search.fit(train_data, train_labels)
我收到此错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-72-dadf5782618c> in <module>
8
----> 9 grid_search.fit(train_data, train_labels)
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
720 return results_container[0]
721
--> 722 self._run_search(evaluate_candidates)
723
724 results = results_container[0]
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
1189 def _run_search(self, evaluate_candidates):
1190 """Search all candidates in param_grid"""
-> 1191 evaluate_candidates(ParameterGrid(self.param_grid))
1192
1193
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params)
709 for parameters, (train, test)
710 in product(candidate_params,
--> 711 cv.split(X, y, groups)))
712
713 all_candidate_params.extend(candidate_params)
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
981 # remaining jobs.
982 self._iterating = False
--> 983 if self.dispatch_one_batch(iterator):
984 self._iterating = self._original_iterator is not None
985
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
823 return False
824 else:
--> 825 self._dispatch(tasks)
826 return True
827
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
780 with self._lock:
781 job_idx = len(self._jobs)
--> 782 job = self._backend.apply_async(batch, callback=cb)
783 # A job can complete so quickly than its callback is
784 # called before we get here, causing self._jobs to
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
543 # Don't delay the application, to avoid keeping the input
544 # arguments in memory
--> 545 self.results = batch()
546
547 def get(self):
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
259 with parallel_backend(self._backend):
260 return [func(*args, **kwargs)
--> 261 for func, args, kwargs in self.items]
262
263 def __len__(self):
~/.local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
259 with parallel_backend(self._backend):
260 return [func(*args, **kwargs)
--> 261 for func, args, kwargs in self.items]
262
263 def __len__(self):
~/.local/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
526 estimator.fit(X_train, **fit_params)
527 else:
--> 528 estimator.fit(X_train, y_train, **fit_params)
529
530 except Exception as e:
~/.local/lib/python3.6/site-packages/sklearn/svm/base.py in fit(self, X, y, sample_weight)
210
211 seed = rnd.randint(np.iinfo('i').max)
--> 212 fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
213 # see comment on the other call to np.iinfo in this file
214
~/.local/lib/python3.6/site-packages/sklearn/svm/base.py in _sparse_fit(self, X, y, sample_weight, solver_type, kernel, random_seed)
291 sample_weight, self.nu, self.cache_size, self.epsilon,
292 int(self.shrinking), int(self.probability), self.max_iter,
--> 293 random_seed)
294
295 self._warn_from_fit_status()
sklearn/svm/libsvm_sparse.pyx in sklearn.svm.libsvm_sparse.libsvm_sparse_train()
TypeError: an integer is required
我的train_labels
变量包含一个布尔值列表,因此我有一个二进制分类问题。 train_data
是<class'scipy.sparse.csr.csr_matrix'>
,基本上包含所有scaled
和One-Hot encoded
功能。
我做错了什么?对于我来说,很难找到问题所在。谢谢您提前提供的帮助;)。
答案 0 :(得分:0)
使用此行初始化SVC时:
poly_svm_search = SVC(kernel="poly", degree="2")
您正在为degree
param提供一个字符串,这是因为它周围的逗号反了。但是according to the documentation,degree
将整数作为值。
学位:int,可选(默认= 3)多项式内核的学位 功能(“ poly”)。被所有其他内核忽略。
所以您需要这样做:
poly_svm_search = SVC(kernel="poly", degree=2)
请注意,我在这里没有使用反引号。