具有交叉验证的神经网络模型的多个指标

时间:2019-07-27 10:47:39

标签: python neural-network cross-validation metrics

我正在尝试获得L1模型的F1,精度和交叉验证的回忆。

我知道如何显示精度,但是当我尝试使用cross_validate显示其他指标时,会遇到许多不同的错误。

我的代码如下:

def nn_model():
    model_lstm1 = Sequential()
    model_lstm1.add(Embedding(20000, 100, input_length=49))
    model_lstm1.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
    model_lstm1.add(Dense(2, activation='sigmoid'))
    model_lstm1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_lstm1

classifier = KerasClassifier(build_fn=nn_model, batch_size=10,nb_epoch=10)

scoring = {'precision' : make_scorer(precision_score),
           'recall' : make_scorer(recall_score), 
           'f1_score' : make_scorer(f1_score)}

results = cross_validate(classifier, X_train, y_train, cv=skf, scoring = scoring)

print("F1 score SVM: %0.2f (+/- %0.2f)" % (np.mean(results[f1_score]), np.std(results[f1_score])))

print("precision score SVM: %0.2f (+/- %0.2f)" % (np.mean(results[precision]), np.std(results[precision])))
print("recall macro SVM: %0.2f (+/- %0.2f)" % (np.mean(results[recall]), np.std(results[recall])))

我得到的错误如下:

  

Epoch 1/1 1086/1086 [=============================]-18s 17ms / step-   损失:0.6014-帐户:0.7035   -------------------------------------------------- ------------------------- ValueError追踪(最近的呼叫   最后)         6'f1_score':make_scorer(f1_score)}         7   ----> 8个结果= cross_validate(分类器,X_train,y_train,cv = skf,得分=得分)         9        10次​​打印(“ F1分数SVM:%0.2f(+/-%0.2f)”%(np.mean(结果[f1_score]),np.std(结果[f1_score])))

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/model_selection/_validation.py在cross_validate中(估算器,X,y,组,得分,简历,n_jobs,   详细,fit_params,pre_dispatch,return_train_score,   return_estimator,error_score)       229 return_times = True,return_estimator = return_estimator,       230 error_score = error_score)   -> 231用于火车,在cv.split(X,y,groups)中测试       232       233 zipped_scores = list(zip(* scores))

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/joblib/parallel.py   在通话中(自身,可迭代)       919#个剩余工作。       920 self._iterating = False   -> 921,如果self.dispatch_one_batch(迭代器):       922 self._iterating = self._original_iterator不是None       923

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/joblib/parallel.py   在dispatch_one_batch中(自己,迭代器)       757返回False       第758章   -> 759 self._dispatch(任务)       760返回真       761

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/joblib/parallel.py   在_dispatch中(自己,批量)       714具有self._lock:       第715章   -> 716作业= self._backend.apply_async(batch,callback = cb)       717#一项工作完成得比其回调要快       718#在我们到达这里之前被呼叫,导致self._jobs发生

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/joblib/_parallel_backends.py   在apply_async(self,func,callback)中       180 def apply_async(self,func,callback = None):       181“”“计划要运行的功能”“”   -> 182结果= InstantResult(func)       183,如果回调:       184回调(结果)

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/joblib/_parallel_backends.py   在初始化中(自己,批量)       547#不要延迟应用程序,以避免保持输入       第548章   -> 549 self.results = batch()       550       551 def get(self):

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/joblib/parallel.py   在通话中(自己)       223 with parallel_backend(self._backend,n_jobs = self._n_jobs):       224 return [func(* args,** kwargs)   -> self.items中的func,args,kwarg 225]       226       227 def len (自己):

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/joblib/parallel.py   在(.0)中       223 with parallel_backend(self._backend,n_jobs = self._n_jobs):       224 return [func(* args,** kwargs)   -> self.items中的func,args,kwarg 225]       226       227 def len (自己):

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator,X,y,scorer,train,test,verbose,   参数,fit_params,return_train_score,return_parameters,   return_n_test_samples,return_times,return_estimator,error_score)       552 = time.time()-start_time       553#_score如果is_multimetric为True将返回dict   -> 554个test_scores = _score(estimator,X_test,y_test,scorer,is_multimetric)       555 score_time = time.time()-start_time-fit_time       556,如果return_train_score:

     _score中的

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/model_selection/_validation.py(估算器,X_test,y_test,计分器,is_multimetric)       595“”“       第596章   -> 597 return _multimetric_score(estimator,X_test,y_test,scorer)       第598章       599,如果y_test为None:

     _multimetric_score中的

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/model_selection/_validation.py(估算器,X_test,y_test,计分器)       625得分=得分手(estimator,X_test)       626其他:   -> 627得分=得分手(估算器,X_test,y_test)       628       629如果hasattr(score,'item'):

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/scorer.py   在通话中(自身,估算器,X,y_true,sample_weight)        其他95条:        96 return self._sign * self._score_func(y_true,y_pred,   ---> 97 ** self._kwargs)        98        99

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py   在precision_score(y_true,y_pred,标签,pos_label,平均值,   sample_weight)1567
  平均=平均1568
  warn_for =('precision',),   -> 1569 sample_weight = sample_weight)1570返回p 1571

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py   在precision_recall_fscore_support(y_true,y_pred,beta,标签,   pos_label,平均值,warn_for,sample_weight)提高1413   ValueError(“ beta在F-beta分数中应> 0”)1414标签   = _check_set_wise_labels(y_true,y_pred,平均值,标签,   -> 1415 pos_label)1416 1417#计算tp_sum,pred_sum,true_sum ###

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py   在_check_set_wise_labels中(y_true,y_pred,平均值,标签,pos_label)   第1237章1238   -> 1239 y_type,y_true,y_pred = _check_targets(y_true,y_pred)1240 present_labels = unique_labels(y_true,y_pred)1241如果   平均值=='二进制':

     

/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py   在_check_targets(y_true,y_pred)中        79如果len(y_type)> 1:        80提高ValueError(“分类指标不能处理{0}的混合”   ---> 81个“和{1}个目标”。format(type_true,type_pred))        82        83#y_type =>上不能有多个值

     

ValueError:分类指标无法处理以下各项的混合问题:   多标签指标和二进制目标

我在做什么错了?

1 个答案:

答案 0 :(得分:1)

输入您的代码

  1. 您不能使用热一编码标签link。使用原始标签。您可以对原始标签使用sparse_categorical_crossentropy损失。
  2. cross_validate返回分数为test_scores。对于火车分数,请设置return_train_score

更正的代码

def nn_model():
    model_lstm1 = Sequential()
    model_lstm1.add(Embedding(200, 100, input_length=10))
    model_lstm1.add(LSTM(10, dropout=0.2, recurrent_dropout=0.2))
    model_lstm1.add(Dense(2, activation='sigmoid'))
    model_lstm1.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_lstm1

classifier = KerasClassifier(build_fn=nn_model, batch_size=10,nb_epoch=10)

scoring = {'precision' : make_scorer(precision_score),
           'recall' : make_scorer(recall_score), 
           'f1_score' : make_scorer(f1_score)}

results = cross_validate(classifier, np.random.randint(0,100,(1000,10)), 
                         np.random.np.random.randint(0,2,1000), scoring = scoring, cv=3, return_train_score=True)

print("F1 score SVM: %0.2f (+/- %0.2f)" % (np.mean(results['test_f1_score']), np.std(results['test_f1_score'])))
print("precision score SVM: %0.2f (+/- %0.2f)" % (np.mean(results['test_precision']), np.std(results['test_precision'])))
print("recall macro SVM: %0.2f (+/- %0.2f)" % (np.mean(results['test_recall']), np.std(results['test_recall'])))

输出

Epoch 1/1
666/666 [==============================] - 5s 7ms/step - loss: 0.6932 - acc: 0.5075
Epoch 1/1
667/667 [==============================] - 5s 7ms/step - loss: 0.6929 - acc: 0.5127
Epoch 1/1
667/667 [==============================] - 5s 7ms/step - loss: 0.6934 - acc: 0.5007
F1 score SVM: 0.10 (+/- 0.09)
precision score SVM: 0.43 (+/- 0.07)
recall macro SVM: 0.06 (+/- 0.06)

您可能会得到

  

UndefinedMetricWarning:....

首字母时期的警告(如果数据不足),您可以忽略。这是因为分类器将所有数据分类到一个类中,而没有数据分类到另一类中。