我希望将使用的每个模型的结果附加到列表data
中,但是函数calc
仅将最后一个模型的结果附加到列表中。我确信这真的很简单,我在这里不见了!
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
classifiers =[LogisticRegression(solver='liblinear', penalty='l2', C=200),
LogisticRegression(penalty='l2', C=1),
DecisionTreeClassifier(),
BernoulliNB()]
class_names = ['Logistic Regression', 'Logistic Regression'
'Regularized','CART', 'Naive Bayes (Bernoulli)']
# import some data to play with
iris = datasets.load_iris()
Xdata = iris.data[:, :2] # we only take the first two features
ydata = iris.target
def calc (classifier_names, classifier_models, Xdata, ydata):
X_train, X_test, y_train, y_test = \
train_test_split(Xdata, ydata,test_size = 0.50, stratify=ydata,
random_state = 42)
X_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
X_test = X_scaler.transform(X_test)
data=[]
for name, clf in zip(classifier_names, classifier_models):
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
y_pred = clf.predict(X_test)
ROC_AUC = plot_ROC_AUC(clf, X_test, y_test)
Accuracy = metrics.accuracy_score(y_test, y_pred)
Brier_Score = metrics.brier_score_loss(y_test, y_pred)
data.append((ROC_AUC,
Accuracy,
Brier_Score))
cols = ['ROC_AUC', 'Accuracy', 'Brier_Score']
result = pd.DataFrame(data, columns = cols, index=classifier_names)
return result
output = calc(class_names, classifiers, Xdata, ydata)
output
ROC_AUC Accuracy Brier_Score
Logistic Regression 0.925517 0.855072 0.144928
Logistic Regression Regularized 0.925517 0.855072 0.144928
CART 0.925517 0.855072 0.144928
Naive Bayes (Bernoulli) 0.925517 0.855072 0.144928
#want this to change here
#function within the calc function
def plot_ROC_AUC(fit_model, X_test, y_test):
probs=fit_model.predict_proba(X_test)
preds = probs[:,1]
fpr, tpr, threshold = metrics.roc_curve(y_test, preds)
roc_auc = metrics.auc(fpr, tpr)
#plot ROC
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
return roc_auc
答案 0 :(得分:-1)
我不确定您要尝试的具体内容,但我在这里看到了一个问题
def calc (classifier_names, classifier_models, X, y):
X_train, X_test, y_train, y_test = \
train_test_split(Xdata, ydata,test_size = 0.50, stratify=ydata,
random_state = 42)
X_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
X_test = X_scaler.transform(X_test)
data=[]
for name, clf in zip(classifier_names, classifier_models):
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
y_pred = clf.predict(X_test)
ROC_AUC = plot_ROC_AUC(clf, X_test, y_test)
Accuracy = metrics.accuracy_score(y_test, y_pred)
Brier_Score = metrics.brier_score_loss(y_test, y_pred)
data.append((ROC_AUC,
Accuracy,
Brier_Score))
cols = ['ROC_AUC', 'Accuracy', 'Brier_Score']
result = pd.DataFrame(data, columns = cols, index=classifier_names)
return result
或简化:
def func(something, darkside):
for i in range(some_int):
return some_other_func(i)
此循环仅需执行一个步骤,因为return
语句将脱离该函数。
我认为您应该尝试做的是在某些DataFrame中汇总for循环的结果,然后返回汇总。在这一点上,我可以说这是一个缩进问题,但是看起来更高,我看到您也在每个循环上覆盖了result
,所以我将从这里开始
也许将循环移到函数之外?而是这样做:
def func(something, darkside):
return some_expression_of(something,darkside)
for name, clf, in zip(classifer_names, classifier_models:
func(name,clf)