我无法在sklearn.model_selection.learning_curve中自定义评分。我有一个SVR作为估计器,也有一个回归器,但是估计器应该是一个分类器,我需要实现如何将连续值转换为类。 我遵循了以下文档:https://scikit-learn.org/stable/modules/model_evaluation.html和https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.learning_curve.html#sklearn.model_selection.learning_curve
我正在使用scikit-learn-0.22和python 3.7。
这是我的代码:
def scorer(y_true, y_pred):
closest = [ y_true[i] if abs(y_true[i] - y_) <= 1.0 else y_true.flat[np.abs(y_true - y_).argmin()] for i, y_ in enumerate(y_pred)]
return accuracy_score(y_true, closest)
train_sizes, train_scores, test_scores, fit_times, _ = \
learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs,
train_sizes=train_sizes,
return_times=True, scoring=make_scorer(scorer))
我遇到了以下错误: AttributeError:“系列”对象没有属性“扁平”
<ipython-input-10-bc8ce2a8f15e> in plot_learning_curve(estimator, title, X, y, scoring, axes, ylim, cv, n_jobs, train_sizes)
90 learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs,
91 train_sizes=train_sizes,
---> 92 return_times=True, scoring=scoring)
93 train_scores_mean = np.mean(train_scores, axis=1)
94 train_scores_std = np.std(train_scores, axis=1)
~/miniconda3/envs/dtscience/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in learning_curve(estimator, X, y, groups, train_sizes, cv, scoring, exploit_incremental_learning, n_jobs, pre_dispatch, verbose, shuffle, random_state, error_score, return_times)
1265 parameters=None, fit_params=None, return_train_score=True,
1266 error_score=error_score, return_times=return_times)
-> 1267 for train, test in train_test_proportions)
1268 out = np.array(out)
1269 n_cv_folds = out.shape[0] // n_unique_ticks
~/miniconda3/envs/dtscience/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
1015
1016 with self._backend.retrieval_context():
-> 1017 self.retrieve()
1018 # Make sure that we get a last message telling us we are done
1019 elapsed_time = time.time() - self._start_time
~/miniconda3/envs/dtscience/lib/python3.7/site-packages/joblib/parallel.py in retrieve(self)
907 try:
908 if getattr(self._backend, 'supports_timeout', False):
--> 909 self._output.extend(job.get(timeout=self.timeout))
910 else:
911 self._output.extend(job.get())
~/miniconda3/envs/dtscience/lib/python3.7/site-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
560 AsyncResults.get from multiprocessing."""
561 try:
--> 562 return future.result(timeout=timeout)
563 except LokyTimeoutError:
564 raise TimeoutError()
~/miniconda3/envs/dtscience/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
433 raise CancelledError()
434 elif self._state == FINISHED:
--> 435 return self.__get_result()
436 else:
437 raise TimeoutError()
~/miniconda3/envs/dtscience/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
AttributeError: 'Series' object has no attribute 'flat'
答案 0 :(得分:0)
所以我们在y
中看不到plot_learning_curve
的值,但是从y
熊猫提供了DataFrame
行的某个地方,或者它是一个独立的{ {1}}。在Series
函数的代码中,您具有以下内容:
scorer
因为此y_true.flat[np.abs(y_true - y_).argmin()]
必须是一个小人物y
。由于ndarray
是y
,因此您需要执行以下操作:
Series
或者确保在其他脚本中按以下方式调用绘图学习曲线:
learning_curve(estimator, X, y.values, cv=cv, n_jobs=n_jobs,
train_sizes=train_sizes,
return_times=True, scoring=make_scorer(scorer))