Python for循环仅将最后一个结果附加到列表/ DataFrame(带有虹膜数据集的示例)

时间:2019-11-28 11:00:25

标签: python for-loop scikit-learn append

我希望将使用的每个模型的结果附加到列表data中,但是函数calc仅将最后一个模型的结果附加到列表中。我确信这真的很简单,我在这里不见了!

    from sklearn import datasets
    from sklearn.linear_model import LogisticRegression
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.naive_bayes import BernoulliNB
    from sklearn.model_selection import train_test_split

    import sklearn.metrics as metrics
    import matplotlib.pyplot as plt

    classifiers =[LogisticRegression(solver='liblinear', penalty='l2', C=200),
              LogisticRegression(penalty='l2', C=1),
              DecisionTreeClassifier(),
              BernoulliNB()]

    class_names = ['Logistic Regression', 'Logistic Regression'
                    'Regularized','CART', 'Naive Bayes (Bernoulli)']

# import some data to play with
    iris = datasets.load_iris()
    Xdata = iris.data[:, :2]  # we only take the first two features
    ydata = iris.target

    def calc (classifier_names, classifier_models, Xdata, ydata):

        X_train, X_test, y_train, y_test = \
    train_test_split(Xdata, ydata,test_size = 0.50, stratify=ydata,
                  random_state = 42)

        X_scaler = StandardScaler()
        X_train = X_scaler.fit_transform(X_train)
        X_test = X_scaler.transform(X_test)

        data=[]
        for name, clf in zip(classifier_names, classifier_models):

                clf.fit(X_train, y_train)
                score = clf.score(X_test, y_test)
                y_pred = clf.predict(X_test)
                ROC_AUC =  plot_ROC_AUC(clf, X_test, y_test)
                Accuracy = metrics.accuracy_score(y_test, y_pred)
                Brier_Score = metrics.brier_score_loss(y_test, y_pred)
                data.append((ROC_AUC, 
                         Accuracy, 
                         Brier_Score))
                cols = ['ROC_AUC', 'Accuracy', 'Brier_Score']
                result = pd.DataFrame(data, columns = cols, index=classifier_names)

                return result

    output = calc(class_names, classifiers, Xdata, ydata)  

output
                                 ROC_AUC  Accuracy  Brier_Score
Logistic Regression              0.925517  0.855072     0.144928
Logistic Regression Regularized  0.925517  0.855072     0.144928
CART                             0.925517  0.855072     0.144928
Naive Bayes (Bernoulli)          0.925517  0.855072     0.144928    
#want this to change here


#function within the calc function

    def plot_ROC_AUC(fit_model, X_test, y_test):

         probs=fit_model.predict_proba(X_test)
         preds = probs[:,1]
         fpr, tpr, threshold = metrics.roc_curve(y_test, preds)
         roc_auc = metrics.auc(fpr, tpr)

         #plot ROC
         plt.title('Receiver Operating Characteristic')
         plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
         plt.legend(loc = 'lower right')
         plt.plot([0, 1], [0, 1],'r--')
         plt.xlim([0, 1])
         plt.ylim([0, 1])
         plt.ylabel('True Positive Rate')
         plt.xlabel('False Positive Rate')
         plt.show()

         return roc_auc

1 个答案:

答案 0 :(得分:-1)

我不确定您要尝试的具体内容,但我在这里看到了一个问题

 def calc (classifier_names, classifier_models, X, y):

        X_train, X_test, y_train, y_test = \
    train_test_split(Xdata, ydata,test_size = 0.50, stratify=ydata,
                  random_state = 42)

        X_scaler = StandardScaler()
        X_train = X_scaler.fit_transform(X_train)
        X_test = X_scaler.transform(X_test)

        data=[]
        for name, clf in zip(classifier_names, classifier_models):

                clf.fit(X_train, y_train)
                score = clf.score(X_test, y_test)
                y_pred = clf.predict(X_test)
                ROC_AUC =  plot_ROC_AUC(clf, X_test, y_test)
                Accuracy = metrics.accuracy_score(y_test, y_pred)
                Brier_Score = metrics.brier_score_loss(y_test, y_pred)
                data.append((ROC_AUC, 
                         Accuracy, 
                         Brier_Score))
                cols = ['ROC_AUC', 'Accuracy', 'Brier_Score']
                result = pd.DataFrame(data, columns = cols, index=classifier_names)

                return result

或简化:

 def func(something, darkside):
     for i in range(some_int):
         return some_other_func(i)

此循环仅需执行一个步骤,因为return语句将脱离该函数。

我认为您应该尝试做的是在某些DataFrame中汇总for循环的结果,然后返回汇总。在这一点上,我可以说这是一个缩进问题,但是看起来更高,我看到您也在每个循环上覆盖了result,所以我将从这里开始

也许将循环移到函数之外?而是这样做:

def func(something, darkside):
    return some_expression_of(something,darkside)

for name, clf, in zip(classifer_names, classifier_models:
    func(name,clf)