我正在计算前k个检索对象的平均平均精度。这是我的代码。在此阶段,我正在计算 R @ K 。
该代码从csv
文件中读取了两个列表,然后从一个列表中获取一个样本,与其他列表中的所有样本一起计算出欧几里得距离,对它们进行排序,最后获取前k个对象,以查看所检索到的对象是否可用样品。
import csv
from scipy.spatial import distance
from sklearn.utils import shuffle
from numpy import dot
from numpy.linalg import norm
from sklearn.preprocessing import StandardScaler
import numpy as np
from numpy import array
def parse_features_from_csv(csv_file):
feat_lst = []
id_lst = []
row_lst = []
with open(csv_file) as fr:
reader = csv.reader(fr, delimiter=',')
for row in reader:
s_feat = row[:-1]
identifier = row[-1]
s_feat = [float(i) for i in s_feat]
feat_lst.append(s_feat)
id_lst.append(identifier)
row_lst.append(row[-1])
return feat_lst, id_lst,row_lst
def compute_distances(et_item, feat_lst, id_lst):
dist_list = []
for id_img_item, img_item in enumerate(feat_lst):
dist = distance.euclidean(img_item,et_item)
#print (dist)
dist_list.append((id_lst[id_img_item], dist))
return dist_list
def main():
top_k = 10
feat_file = "list_1.csv"
test_file = "list_2.csv"
et_feat_lst, et_id_list, _ = parse_features_from_csv(test_file)
feat_list, id_list,row_lst_et = parse_features_from_csv(feat_file)
print (len(feat_list))
print (len(et_feat_lst))
correct = 0
for id_et_item, et_item in enumerate(et_feat_lst):
distances = compute_distances(et_item, feat_list, row_lst_et)
sort_dst = sorted(distances, key=lambda x: x[1])
#print("Target: " + et_id_list[id_et_item] + ", Distances: " + str(sort_dst[:top_k]))
eucl_dist = sort_dst[:top_k]
gt = et_id_list[id_et_item]
for idx in eucl_dist:
tar = idx[0]
if gt == tar:
correct+= 1
break
print ("correct", str(correct) + '/' + str(id_et_item))
if __name__ == '__main__':
main()
有人可以告诉我如何使用sklearn.metrics.average_precision_score
函数计算前K个检索到的对象的平均平均精度。
我对(y_true, y_scores)
感到困惑。如果有人可以解释函数的这两个参数,我将不胜感激。