Python KeyError:1.0

时间:2016-07-01 13:31:48

标签: python-2.7 numpy pandas

我正在尝试运行此代码

from math import sqrt
import numpy as np
import warnings
from collections import Counter
import pandas as pd
import random

def k_nearest_neighbors(data,predict, k =3):
 if len(data) >= k:
  warnings.warn('K is set to a value less than total voting groups')
 distances = []
 for group in data:
  for features in data[group]:
   eucliden_distance = np.linalg.norm(np.array(features)-np.array(predict))
   distances.append([eucliden_distance,group])
 votes = [i[1] for i in sorted(distances)[:k]]
 print(Counter(votes).most_common(1))
 vote_result = Counter(votes).most_common(1)[0][0]
 return vote_result

df = pd.read_csv('bc2.txt')
df.replace('?',-99999,inplace=True)
df.drop(['id'],1,inplace = True)
full_data = df.astype(float).values.tolist()

random.shuffle(full_data)
test_size = 0.2
train_set = {2:[],4:[]}
test_set = {2:[],4:[]}
train_data = full_data[:-int(test_size*len(full_data))]
test_data = full_data[-int(test_size*len(full_data)):]


for i in train_data:
 train_set[i[-1]].append(i[:-1])

for i in train_data:
 test_set[i[-1]].append(i[:-1])

correct = 0
total = 0

for group in test_set:
 for data in test_set[group]:
  vote = k_nearest_neighbors(train_set,data, k=5)
  if group == vote:
   correct += 1
  total += 1

print ('Accuracy:',correct/total)

它出现了这个错误消息

File "ml8.py", line 38, in <module>
    train_set[i[-1]].append(i[:-1])
KeyError: 1.0

文件m18.py就是上面的代码文件

下面是txt文件的样本

id,clump_thickness,unif_cell_size,unif_cell_shape,marg_adhesion,single_epith_cell_size,bare_nuclei,bland_chrom,norm_nucleoli,mitoses,class
1000025,2,5,1,1,1,2,1,3,1,1
1002945,2,5,4,4,5,7,10,3,2,1
1015425,2,3,1,1,1,2,2,3,1,1
1016277,2,6,8,8,1,3,4,3,7,1
1017023,2,4,1,1,3,2,1,3,1,1
1017122,4,8,10,10,8,7,10,9,7,1
1018099,2,1,1,1,1,2,10,3,1,1
1018561,2,2,1,2,1,2,1,3,1,1
1033078,2,2,1,1,1,2,1,1,1,5
1033078,2,4,2,1,1,2,1,2,1,1
1035283,2,1,1,1,1,1,1,3,1,1
1036172,2,2,1,1,1,2,1,2,1,1
1041801,4,5,3,3,3,2,3,4,4,1

我正在使用2.7.11版本

1 个答案:

答案 0 :(得分:2)

您的train_set仅包含密钥24,而该示例中的类别为15

而不是使用

train_set = {2:[],4:[]}

defaultdict可能会更好运:

from collections import defaultdict
train_set = defaultdict(list)

这样,不存在的密钥将在首次访问时初始化为新的空列表。