我传递了正确的参数,但我仍然收到以下错误:
Starting classification
Classification running ...
Traceback (most recent call last):
File "C:/Classify/classifier.py", line 95, in <module>
train_avg, test_avg, cms = train_model(X, y, "ceps", plot=True)
File "C:/Classify/classifier.py", line 25, in train_model
cv = ShuffleSplit(n=len(X), n_iterations=1, test_fraction=0.3, indices=True, random_state=0)
TypeError: __init__() got an unexpected keyword argument 'test_fraction'
Process finished with exit code 1
我的代码是:
def train_model(X, Y, name, plot=False):
labels = np.unique(Y)
cv = ShuffleSplit(n=len(X), n_iterations=1, test_fraction=0.3, indices=True, random_state=0)
train_errors = []
test_errors = []
scores = []
pr_scores = defaultdict(list)
precisions, recalls, thresholds = defaultdict(list), defaultdict(list), defaultdict(list)
roc_scores = defaultdict(list)
tprs = defaultdict(list)
fprs = defaultdict(list)
clfs = [] # for the median
cms = []
for train, test in cv:
X_train, y_train = X[train], Y[train]
X_test, y_test = X[test], Y[test]
clf = LogisticRegression()
clf.fit(X_train, y_train)
clfs.append(clf)
train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)
scores.append(test_score)
train_errors.append(1 - train_score)
test_errors.append(1 - test_score)
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cms.append(cm)
for label in labels:
y_label_test = np.asarray(y_test == label, dtype=int)
proba = clf.predict_proba(X_test)
proba_label = proba[:, label]
fpr, tpr, roc_thresholds = roc_curve(y_label_test, proba_label)
roc_scores[label].append(auc(fpr, tpr))
tprs[label].append(tpr)
fprs[label].append(fpr)
if plot:
for label in labels:
scores_to_sort = roc_scores[label]
median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]
desc = "%s %s" % (name, genre_list[label])
plot_roc_curves(roc_scores[label][median], desc, tprs[label][median],fprs[label][median], label='%s vs rest' % genre_list[label])
all_pr_scores = np.asarray(pr_scores.values()).flatten()
summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores))
#print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
#save the trained model to disk
joblib.dump(clf, 'saved_model/model_ceps.pkl')
return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)
答案 0 :(得分:1)
参数n_iterations
的名称是n_iter
,ShuffleSplit
documentation中对此进行了解释。更改参数名称,错误将不复存在。
考虑到我们使用 scikit-learn 0.15 ,您可以使用sklearn.cross_validation.ShuffleSplit
发送以下参数:
n
n_iter
test_size
train_size
indices
random_state
n_iterations