我正在尝试创建自己的分类器,以下代码从理论上讲应该达到〜90%+的准确性,但是,它只能达到〜30%。 我意识到我可以只使用kneighborsclassifier,但是,我正在尝试编写自己的。 请帮助我确定这里出了什么问题。
以前是从库中导入分类器的。 ...这次,我们将编写自己的内容。
from scipy.spatial import distance
# a=trainingPoint & b=testingPoint
def euc(a,b):
return distance.euclidean(a,b)
# Implement a class for our classifier:
class ScrappyKNN():
def fit(self, X_train, y_train):
# Store the training data in this class
self.X_train = X_train
self.y_train = y_train
def predict(self, X_test):
predicitons = []
for row in X_test:
# Method to find the closest point
label = self.closest(row)
predictions.append(label)
return predictions
# Loop over all training points and keep track of nearest ones, so far
def closest(self, row):
#calc dist from train point to nearest test point
best_dist = euc(row, self.X_train[0])
# Keep track of index of closest point
best_index = 0
# Iterate over all other training points and update when you find a closer one
for i in range(1, len(self.X_train)):
dist = euc(row, self.X_train[i])
if dist < best.dist:
best_dist = dist
# use index to return the closest point
best_index = i
return self.y_train[best_index]
# Import a dataset
from sklearn import datasets
iris = datasets.load_iris()
# Feature--> f(x)=y <--Label
X = iris.data
y = iris.target
# Import the data, then partition into 1. _train and 2. _test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .5)
# Comment out our classifier and write our own...
# from sklearn.neighbors import KNeighborsClassifier
# Change our pipeline to use our new classifier
# my_classifier = KNeighborsClassifier()
my_classifier = ScrappyKNN()
# Use training data on classifier
my_classifier.fit(X_train, y_train)
# Use predict method to classify test data
# predictions = my_classifier.predict(X_test)
# print(predictions)
# To find accuracy, compare predicted labels to true values
from sklearn.metrics import accuracy_score
# In this case, accuracy should be ~%90
print(accuracy_score(y_test, predictions))