我收到了这个神秘的错误,我不确定如何解决它:
KeyError Traceback(最近一次调用最后一次)
<ipython-input-15-135347c898ae> in <module>() 24 for i, samples in enumerate([samples_1, samples_10, samples_100]): 25 print("Freak") ---> 26 results[clf_name][i] = train_predict(clf, samples, X_train, y_train, X_test, y_test) 27 28 # Run metrics visualization for the three supervised learning models chosen
<ipython-input-13-79ba3acd16d9> in train_predict(learner, sample_size, X_train, y_train, X_test, y_test) 35 truths = 0 36 for i in range(0,len(predictions_train)): ---> 37 truths += predictions_train[i] == y_test[i] 38 results['acc_train'] = truths / len(predictions_train) 39
~/miniconda3/lib/python3.6/site-packages/pandas/core/series.py in getitem(self, key) 621 key = com._apply_if_callable(key, self) 622 try: --> 623 result = self.index.get_value(self, key) 624 625 if not is_scalar(result):
~/miniconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_value(self, series, key) 2558 try: 2559 return self._engine.get_value(s, k, -> 2560
这是我的代码:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
# TODO: Initialize the three models
clf_A = KNeighborsClassifier()
clf_B = DecisionTreeClassifier()
clf_C = LogisticRegression()
# TODO: Calculate the number of samples for 1%, 10%, and 100% of the training data
# HINT: samples_100 is the entire training set i.e. len(y_train)
# HINT: samples_10 is 10% of samples_100
# HINT: samples_1 is 1% of samples_100
samples_100 = len(X_train)
samples_10 = len(X_train) * .1
samples_1 = len(X_train) * .01
# Collect results on the learners
results = {}
for clf in [clf_A, clf_B, clf_C]:
clf_name = clf.__class__.__name__
results[clf_name] = {}
for i, samples in enumerate([samples_1, samples_10, samples_100]):
print("Freak")
results[clf_name][i] = train_predict(clf, samples, X_train, y_train, X_test, y_test)
# Run metrics visualization for the three supervised learning models chosen
vs.evaluate(results, accuracy, fscore)
编辑:啊哈!我忘了,train_predict是我项目的一部分。这是代码:
def train_predict(learner, sample_size, X_train, y_train, X_test, y_test):
'''
inputs:
- learner: the learning algorithm to be trained and predicted on
- sample_size: the size of samples (number) to be drawn from training set
- X_train: features training set
- y_train: income training set
- X_test: features testing set
- y_test: income testing set
'''
results = {}
# TODO: Fit the learner to the training data using slicing with 'sample_size' using .fit(training_features[:], training_labels[:])
start = time() # Get start time
learner.fit(X_train[:],y_train[:])
end = time() # Get end time
# TODO: Calculate the training time
results['train_time'] = end - start
# TODO: Get the predictions on the test set(X_test),
# then get predictions on the first 300 training samples(X_train) using .predict()
start = time() # Get start time
predictions_test = learner.predict(X_test)
predictions_train = learner.predict(X_train)
end = time() # Get end time
# TODO: Calculate the total prediction time
results['pred_time'] = end - start
# TODO: Compute accuracy on the first 300 training samples which is y_train[:300]
# Calculate how many true predictions we made.
truths = np.sum(np.equals(predictions_train[:300],y_train[:300]))
# TODO: Compute accuracy on test set using accuracy_score()
truths = np.sum(np.equals(predictions_test[:300],y_test[:300]))
results['acc_test'] = truths / len(predictions_test)
# TODO: Compute F-score on the the first 300 training samples using fbeta_score()
TP = 0
TN = 0
for i in range(0,len(predictions_train)):
if (predictions_train[i] == 1 & y_train[i] == 1):
TP += 1
elif (predictions_train[i] == y_train[i]):
TN += 1
results['f_train'] = (1 + beta**2) * (TN * TP) / (beta**2 * TN + TP)
# TODO: Compute F-score on the test set which is y_test
TP = 0
TN = 0
for i in range(0,len(predictions_test)):
if (predictions_test[i] == 1 & y_test[i] == 1):
TP += 1
elif (predictions_test[i] == y_test[i]):
TN += 1
results['f_test'] = (1 + beta**2) * (TN * TP) / (beta**2 * TN + TP)
# Success
print ("Learner trained on " + str(len(predictions_train)) + " samples")
# Return the results
return results
无论如何,我认为问题在于这部分代码:
# Calculate how many true predictions we made.
truths = np.sum(np.equals(predictions_train[:300],y_train[:300]))
# TODO: Compute accuracy on test set using accuracy_score()
truths = np.sum(np.equals(predictions_test[:300],y_test[:300]))
但看起来我正在做这本书,所以我真的不确定如何继续。