我正在尝试进行网格搜索以优化参数,我的代码是:
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC
parameters = [{'kernel':['rbf'], 'gamma' :[1e-2, 1e-3, 1e-4 ,1e-5],
'C': [1, 10, 100, 1000]},
{'kernel': ['poly'], 'C': [1, 10, 100, 1000], 'degree':[1,2,3,4]}]
clf = GridSearchCV (SVC(C=1), parameters, cv=5, scoring='f1_macro')
clf.fit(X_train, y_train)
我的X_train, y_train
是浮点数,它们是:
x_train = [[3.30049159],[2.25226244],[1.44078451] ...,[5.63927925],[5.431458],[4.35674369]]
y_train = [[0.2681013],[0.03454225],[0.02062136]...,[0.21827915],[0.28768273,[0.27969417]]
我认为错误可能是我使用的是浮点数,如果是这种情况,也许只有整数可以传递到分类器中,如何解决?我的完整回溯错误消息是:
ValueError Traceback (most recent call last)
<ipython-input-51-fb016a0a90cc> in <module>()
11
12 clf = GridSearchCV (SVC(C=1), parameters, cv=5, scoring='f1_macro')
---> 13 clf.fit(X_train, y_train)
~/anaconda3_501/lib/python3.6/site-packages/sklearn/grid_search.py in fit(self, X, y)
836
837 """
--> 838 return self._fit(X, y, ParameterGrid(self.param_grid))
839
840
~/anaconda3_501/lib/python3.6/site-packages/sklearn/grid_search.py in _fit(self, X, y, parameter_iterable)
572 self.fit_params, return_parameters=True,
573 error_score=self.error_score)
--> 574 for parameters in parameter_iterable
575 for train, test in cv)
576
~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
777 # was dispatched. In particular this covers the edge
778 # case of Parallel used with an exhausted iterator.
--> 779 while self.dispatch_one_batch(iterator):
780 self._iterating = True
781 else:
~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
623 return False
624 else:
--> 625 self._dispatch(tasks)
626 return True
627
~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
586 dispatch_timestamp = time.time()
587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588 job = self._backend.apply_async(batch, callback=cb)
589 self._jobs.append(job)
590
109 def apply_async(self, func, callback=None):
110 """Schedule a func to be run"""
--> 111 result = ImmediateResult(func)
112 if callback:
113 callback(result)
~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
330 # Don't delay the application, to avoid keeping the input
331 # arguments in memory
--> 332 self.results = batch()
333
334 def get(self):
~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
~/anaconda3_501/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
~/anaconda3_501/lib/python3.6/site-packages/sklearn/cross_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, error_score)
1673 estimator.fit(X_train, **fit_params)
1674 else:
-> 1675 estimator.fit(X_train, y_train, **fit_params)
1676
1677 except Exception as e:
~/anaconda3_501/lib/python3.6/site-packages/sklearn/svm/base.py in fit(self, X, y, sample_weight)
148
149 X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')
--> 150 y = self._validate_targets(y)
151
152 sample_weight = np.asarray([]
~/anaconda3_501/lib/python3.6/site-packages/sklearn/svm/base.py in _validate_targets(self, y)
498 def _validate_targets(self, y):
499 y_ = column_or_1d(y, warn=True)
--> 500 check_classification_targets(y)
501 cls, y = np.unique(y_, return_inverse=True)
502 self.class_weight_ = compute_class_weight(self.class_weight, cls, y_)
~/anaconda3_501/lib/python3.6/site-packages/sklearn/utils/multiclass.py in check_classification_targets(y)
170 if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
171 'multilabel-indicator', 'multilabel-sequences']:
--> 172 raise ValueError("Unknown label type: %r" % y_type)
173
174
ValueError: Unknown label type: 'continuous'
对此将有所帮助。
答案 0 :(得分:2)
您正在使用分类器。您只能对二进制或分类变量进行分类。如果要使用支持向量但要预测数值,则应使用支持向量回归。 否则,您将必须将y值分类。
答案 1 :(得分:2)
这是回归问题,而不是分类问题。模型试图做的是-使X适应Y定义的类(连续的)。 SVC
分类器未知。使用SVR
from sklearn.svm import SVR
from sklearn.grid_search import GridSearchCV
X_train = [[3.30049159], [2.25226244], [1.44078451]]
#1. Y should be 1d array of dimensions (n_samples,)
y_train = [0.2681013, 0.03454225, 0.02062136]
#Grid Search
parameters = [{'kernel': ['rbf'], 'gamma': [1e-2, 1e-3, 1e-4, 1e-5],
'C': [1, 10, 100, 1000]},
{'kernel': ['poly'], 'C': [1, 10, 100, 1000], 'degree': [1, 2, 3, 4]}]
#2. Type of regressor
reg = SVR(C=1)
#3. Regression evaluation cannot be done using f1_macro, so updated NMSE
clf = GridSearchCV(reg, parameters, cv=5, scoring='neg_mean_squared_error')
clf.fit(X_train, y_train)