我正在尝试让f1分数正常工作以解决多类分类问题。
我遇到ValueError:分类指标无法处理multilabel-indicator和二进制目标的混合情况
处理目标变量
print(train_ohe.shape,dummy_y_train.shape)
# (43266, 189) (43266, 5)
我的X_train和y_train数据形状
from keras import backend as K
def f1(y_true, y_pred):
def recall(y_true, y_pred):
"""Recall metric.
Only computes a batch-wise average of recall.
Computes the recall, a metric for multi-label classification of
how many relevant items are selected.
"""
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision(y_true, y_pred):
"""Precision metric.
Only computes a batch-wise average of precision.
Computes the precision, a metric for multi-label classification of
how many selected items are relevant.
"""
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
precision = precision(y_true, y_pred)
recall = recall(y_true, y_pred)
return 2*((precision*recall)/(precision+recall+K.epsilon()))
自定义F1得分指标
# basline model
def baseline_model():
model = Sequential()
model.add(Dense(8,input_dim=189,activation='relu'))
model.add(Dense(5,activation='softmax'))
# compile
model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=[f1])
return model
estimators = []
estimators.append(('mlp', KerasClassifier(build_fn=baseline_model,epochs=1,batch_size=16,verbose=1)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, shuffle=True, random_state=876)
results = cross_val_score(pipeline, train_ohe, dummy_y_train, cv=kfold, scoring='f1')
这是模型架构
Epoch 1/1
38939/38939 [==============================] - 3s 76us/step - loss: 15.9096 - f1: 0.0128
4327/4327 [==============================] - 1s 150us/step
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-79-ef9e0f41eb14> in <module>
15 kfold = KFold(n_splits=10, shuffle=True, random_state=876)
16 # results = cross_val_score(pipeline, train_ohe, dummy_y_train, cv=kfold, scoring='f1')
---> 17 results = cross_val_score(pipeline, train_ohe, dummy_y_train, cv=kfold, scoring='precision')
18
19
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
400 fit_params=fit_params,
401 pre_dispatch=pre_dispatch,
--> 402 error_score=error_score)
403 return cv_results['test_score']
404
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
238 return_times=True, return_estimator=return_estimator,
239 error_score=error_score)
--> 240 for train, test in cv.split(X, y, groups))
241
242 zipped_scores = list(zip(*scores))
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
915 # remaining jobs.
916 self._iterating = False
--> 917 if self.dispatch_one_batch(iterator):
918 self._iterating = self._original_iterator is not None
919
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
566 fit_time = time.time() - start_time
567 # _score will return dict if is_multimetric is True
--> 568 test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
569 score_time = time.time() - start_time - fit_time
570 if return_train_score:
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _score(estimator, X_test, y_test, scorer, is_multimetric)
603 """
604 if is_multimetric:
--> 605 return _multimetric_score(estimator, X_test, y_test, scorer)
606 else:
607 if y_test is None:
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _multimetric_score(estimator, X_test, y_test, scorers)
633 score = scorer(estimator, X_test)
634 else:
--> 635 score = scorer(estimator, X_test, y_test)
636
637 if hasattr(score, 'item'):
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/metrics/scorer.py in __call__(self, estimator, X, y_true, sample_weight)
96 else:
97 return self._sign * self._score_func(y_true, y_pred,
---> 98 **self._kwargs)
99
100
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/metrics/classification.py in precision_score(y_true, y_pred, labels, pos_label, average, sample_weight)
1267 average=average,
1268 warn_for=('precision',),
-> 1269 sample_weight=sample_weight)
1270 return p
1271
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/metrics/classification.py in precision_recall_fscore_support(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight)
1029 raise ValueError("beta should be >0 in the F-beta score")
1030
-> 1031 y_type, y_true, y_pred = _check_targets(y_true, y_pred)
1032 check_consistent_length(y_true, y_pred, sample_weight)
1033 present_labels = unique_labels(y_true, y_pred)
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/metrics/classification.py in _check_targets(y_true, y_pred)
79 if len(y_type) > 1:
80 raise ValueError("Classification metrics can't handle a mix of {0} "
---> 81 "and {1} targets".format(type_true, type_pred))
82
83 # We can't have more than one value on y_type => The set is no more needed
ValueError: Classification metrics can't handle a mix of multilabel-indicator and binary targets
这是我在使用f1指标时遇到的错误
{{1}}