在下面的代码中,如果我将函数sim.append(similarity(doc,query))
中的similarityForCorpus(documentList,query)
更改为sim.append([similarity(doc,query),doc])
或sim.append((similarity(doc,query),doc))
,则最后一个语句为similarity(doc,query)
打印的值会更改为少量,我不知道为什么会发生这种情况,如何纠正?
#python code to calculate similarity of two equal sized vectors
#global variables and libraries
import numpy as np
sim=[]
#returns cosine similairty of two vectors
def similarity(document,query):
vectorA=np.array(document)
vectorB=np.array(query)
return np.dot(vectorA,vectorB)/(np.linalg.norm(vectorA)*np.linalg.norm(vectorB))
#calculate document and query similarities for all documents in corpus
def similarityForCorpus(documentList,query):
global sim
for doc in documentList:
sim.append(similarity(doc,query))
similarityForCorpus([[3,4,5],[0,8,10]],[3,4,5])
#rank similarities highest to lowest
sim.sort(reverse=True)
#print similarities
for s in sim:
print s
答案 0 :(得分:1)
两种情况下存储的数字相同。唯一的区别是正在打印的内容。考虑一下:
>>> a=1.234567890123456789
>>> print a
1.23456789012
>>> print [a]
[1.2345678901234567]
在第一种情况下,调用等效的str(a)
,但不显示完整的精度。在第二种情况下,使用repr(a)
。
答案 1 :(得分:0)
问题是你以两种不同的方式看待相同的价值。两个print
函数的行为方式不同。其中一个给出浮点值,另一个给出一个tupple。
值完全相同,但不是它们正是您想要的。查看this链接,它会解释发生了什么。
另外,对于您的代码,请查看:
#global variables and libraries
import numpy as np
#returns cosine similairty of two vectors
def similarity(document,query):
vectorA=np.array(document)
vectorB=np.array(query)
return np.dot(vectorA,vectorB)/(np.linalg.norm(vectorA)*np.linalg.norm(vectorB))
#calculate document and query similarities for all documents in corpus
def similarityForCorpus(documentList,query):
global sim
for doc in documentList:
sim.append([similarity(doc,query),doc])
sim=[]
similarityForCorpus([[3,4,5],[0,8,10]],[3,4,5])
#rank similarities highest to lowest
sim.sort(reverse=True)
#print similarities
for s in sim:
print('%0.16f %s' % (s[0], s[1]))
def similarityForCorpus(documentList,query):
global sim
for doc in documentList:
sim.append(similarity(doc,query))
sim=[]
similarityForCorpus([[3,4,5],[0,8,10]],[3,4,5])
#rank similarities highest to lowest
sim.sort(reverse=True)
for s in sim:
print('%0.16f' % (s))
输出将是:
0.9999999999999999 [3, 4, 5]
0.9055385138137416 [0, 8, 10]
0.9999999999999999
0.9055385138137416