我正在编写一个取自here的KNN分类器,用于加速度计和陀螺仪数据的字符识别。但是,以下功能无法正常工作且预测没有发生。下面的代码中是否有任何错误?请指导我。
trainingset->用20个样本训练数据(10 = A,10 = B)。
testset->现场阅读被认可。
#-- KNN Classifier Functions ----------
def loaddataset():
global trainingset
with open('imudata.csv','rb') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for x in range(len(dataset)):
trainingset.append(dataset[x])
def euclideandistance(instance1,instance2,length):
distance = 0
for x in range(length-1):
instance1[x] = float(instance1[x])
instance2[x] = float(instance2[x])
for x in range(length-1):
distance += pow((instance1[x]-instance2[x]),2)
return math.sqrt(distance)
def getneighbours(trainingset,testinstance,k):
distances = []
length = len(testinstance)-1
for x in range(len(trainingset)):
dist = euclideandistance(testinstance, trainingset[x],length)
#print(trainingset[x][-1],dist)
distances.append((trainingset[x],dist))
#print(distances)
distances.sort(key=operator.itemgetter(1))
#print(distances)
neighbours = []
print('k='+repr(k)+'length of distances='+repr(len(distances)))
for x in range(k):
neighbours.append(distances[x][0])
return neighbours
def getresponse(neighbours):
classvotes = {}
for x in range(len(neighbours)):
response = neighbours[x][-1]
if response in classvotes:
classvotes[response] += 1
else:
classvotes[response] = 1
sortedvotes = sorted(classvotes.iteritems(), key=operator.itemgetter(1), reverse=True)
return sortedvotes[0][0]
def getaccuracy(testset, predictions):
correct = 0
for x in range(len(testset)):
if testset[x][-1] is predictions[x]:
correct +=1
return ((correct/float(len(testset))) * 100.0)
#------- END of KNN Classifier Functions -------------
我的主要比较功能是
def compare():
loaddataset()
testset.append(testdata)
print 'Train set: '+ repr(len(trainingset))
print 'Test set: '+ repr(len(testset))
predictions=[]
k = len(trainingset)
for x in range(len(testset)):
neighbours = getneighbours(trainingset,testset[x],k)
result = getresponse(neighbours)
predictions.append(result)
print('>Predicted=' +repr(result)+', actual=' + repr(testset[x][-1]))
accuracy = getaccuracy(testset, predictions)
print('Accuracy: '+repr(accuracy)+'%')
我的输出是
Train set: 20
Test set: 1
k=20 length of distance=20
>Predicted='A', actual='B'
Accuracy: 0.0%
我的示例数据包:
-1.1945864763443935e-16,1.0000000000000031,0.81335962823925234,1.2678119727931405,4.6396523259663871,3,1.0000000000000013,108240.99999999988,328.99999999999966,4.3008487686466931e-16,1.000000000000002,0.73006871826334618,0.88693535629714804,4.3903300136708818,15,1.0000000000000011,108240.99999999977,328.99999999999932,1.990977460573989e-16,1.0000000000000009,0.8120281400849243,1.3556881217171162,4.2839744646260876,9,1.0000000000000004,108240.99999999994,328.99999999999983,-3.4217816017322454e-16,1.0000000000000009,0.7842111273340705,1.0882622268942712,4.4762484049613418,4,1.0000000000000004,108241.00000000038,329.00000000000114,2.6996304550155782e-18,1.000000000000004,0.76504908035654873,1.1890598964371606,4.2138613873737967,7,1.000000000000002,108241.0000000001,329.00000000000028,7.154020705791282e-17,1.0,0.83945423805187047,1.4309844267934049,3.7008217934312198,6,1.0,108240.99999999983,328.99999999999949,-0.66014932688009009,0.48967404184734276,0.083592048161537938,A
我来自硬件并且对KNN知之甚少,为什么我要求在我的代码中进行更正(如果有的话)。我添加了我的数据集here。
答案 0 :(得分:0)
我可以从您的数据中看出,样本数量非常少。特征可能会影响预测的准确性,并且样本数量需要非常高。你不能指望正确地预测一切,算法有自己的准确性。尝试使用任何其他众所周知的数据集(如iris)来检查此代码的正确性。或尝试使用scikit learn python中的内置knn分类器。