我正在尝试使用C支持向量分类对电影评论进行情感分析。以下代码返回错误
AttributeError:估算器BaggingClassifier的参数C无效。
据我所知,尝试进行网格搜索时出现错误。我尝试拉可用的参数,但是当我从那里使用某些东西时,却遇到了同样的错误。当代码运行时,似乎在遇到问题之前会打印一些作业,但是对我来说有些尝试是可行的而其他尝试却没有。我到处都看过,尽管回答了一些类似的问题,但我无法将其解决方案应用于我的问题。我没看到什么?
token = RegexpTokenizer(r'[a-zA-Z0-9]+')
en_stopwords = set(stopwords.words("english"))
tf = TfidfVectorizer(lowercase=True,
tokenizer=token.tokenize,
analyzer='word',
stop_words=en_stopwords,
ngram_range=(1, 1))
# text_tf = tf.fit_transform(data['Phrase'])
X_train, X_test, y_train, y_test = train_test_split(data['Phrase'], data['Sentiment'], test_size=0.3, random_state=1)
kfolds = StratifiedKFold(n_splits=3, shuffle=True, random_state=1)
np.random.seed(1)
n_estimators=10
svc = BaggingClassifier(SVC(probability=True, kernel="linear", class_weight="balanced"),
max_samples=1.0 / n_estimators, n_estimators=n_estimators)
pipeline_svm = Pipeline([('tf', tf), ('svc', svc)])
grid_svc = GridSearchCV(pipeline_svm,
param_grid={'svc__C': [1,10]},
cv=kfolds,
scoring='roc_auc',
verbose=50,
n_jobs=-1)
grid_svc.fit(X_train, y_train)
grid_svc.score(X_test, y_test)
sklearn.externals.joblib.externals.loky.process_executor._RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\loky\process_executor.py", line 418, in _process_worker
r = call_item()
File "C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\loky\process_executor.py", line 272, in __call__
return self.fn(*self.args, **self.kwargs)
File "C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py", line 567, in __call__
return self.func(*args, **kwargs)
File "C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py", line 225, in __call__
for func, args, kwargs in self.items]
File "C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py", line 225, in <listcomp>
for func, args, kwargs in self.items]
File "C:\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 514, in _fit_and_score
estimator.set_params(**parameters)
File "C:\Anaconda3\lib\site-packages\sklearn\pipeline.py", line 147, in set_params
self._set_params('steps', **kwargs)
File "C:\Anaconda3\lib\site-packages\sklearn\utils\metaestimators.py", line 52, in _set_params
super(_BaseComposition, self).set_params(**params)
File "C:\Anaconda3\lib\site-packages\sklearn\base.py", line 222, in set_params
valid_params[key].set_params(**sub_params)
File "C:\Anaconda3\lib\site-packages\sklearn\base.py", line 213, in set_params
(key, self))
ValueError: Invalid parameter C for estimator BaggingClassifier(base_estimator=SVC(C=1.0, cache_size=200, class_weight='balanced', coef0=0.0,
decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
kernel='linear', max_iter=-1, probability=True, random_state=None,
shrinking=True, tol=0.001, verbose=False),
bootstrap=True, bootstrap_features=False, max_features=1.0,
max_samples=0.1, n_estimators=10, n_jobs=None, oob_score=False,
random_state=None, verbose=0, warm_start=False). Check the list of available parameters with `estimator.get_params().keys()`.
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-3-5e5bd8e0d8dd>", line 1, in <module>
grid_svm.fit(X_train, y_train)
File "C:\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py", line 722, in fit
self._run_search(evaluate_candidates)
File "C:\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py", line 1191, in _run_search
evaluate_candidates(ParameterGrid(self.param_grid))
File "C:\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py", line 711, in evaluate_candidates
cv.split(X, y, groups)))
File "C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py", line 930, in __call__
self.retrieve()
File "C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py", line 833, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py", line 521, in wrap_future_result
return future.result(timeout=timeout)
File "C:\Anaconda3\lib\concurrent\futures\_base.py", line 432, in result
return self.__get_result()
File "C:\Anaconda3\lib\concurrent\futures\_base.py", line 384, in __get_result
raise self._exception
ValueError: Invalid parameter C for estimator BaggingClassifier(base_estimator=SVC(C=1.0, cache_size=200, class_weight='balanced', coef0=0.0,
decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
kernel='linear', max_iter=-1, probability=True, random_state=None,
shrinking=True, tol=0.001, verbose=False),
bootstrap=True, bootstrap_features=False, max_features=1.0,
max_samples=0.1, n_estimators=10, n_jobs=None, oob_score=False,
random_state=None, verbose=0, warm_start=False). Check the list of available parameters with `estimator.get_params().keys()`.
答案 0 :(得分:1)
svc__C
将不起作用;尽管没有记录,但是您应该将param_grid
的定义更改为:
param_grid={'base_estimator__C': [1,10]}
有关更多详细信息,请参见Tuning parameters of the classifier used by BaggingClassifier和Grid search on parameters inside the parameters of a BaggingClassifier。