我确定我正在忽略某些东西,但是为什么这些输出不同?
scikit学习
from sklearn.metrics.pairwise import cosine_similarity
cosine_similarity([[3,5,1]],[[1,2,3]])
### output `array([[0.72280632]])`
scipy
from scipy.spatial.distance import cosine
cosine([3,5,1],[1,2,3])
### output 0.27719367767579906
为什么这些不一样?根据我的计算,分母使用L1或L2范数似乎并不相同
答案 0 :(得分:2)
它们使用的余弦距离定义不同。
1 class NamedShape {
2 var numberOfSides: Int = 0
3 var name: String
4
5 init(name: String) {
6 self.name = name
7 }
8
9 func simpleDescription() -> String {
10 return "A shape with \(numberOfSides) sides."
11 }
12 }
13
14 class Square: NamedShape {
15 var sideLength: Double
16
17 init(sideLength: Double, name: String) {
18 self.sideLength = sideLength
19 super.init(name: name)
20 numberOfSides = 4
21 }
22
23 func area() -> Double {
24 return sideLength * sideLength
25 }
26
27 override func simpleDescription() -> String {
28 return "A square with sides of length \(sideLength)."
29 }
30 }
31
32 class EquilateralTriangle: NamedShape {
33 var sideLength: Double = 0.0
34
35 init(sideLength: Double, name: String) {
36 self.sideLength = sideLength
37 super.init(name: name)
38 numberOfSides = 3
39 }
40
41 var perimeter: Double {
42 get {
43 return 3.0 * sideLength
44 }
45 set {
46 sideLength = newValue / 3.0
47 }
48 }
49
50 override func simpleDescription() -> String {
51 return "An equilateral triangle with sides of length \ (sideLength)."
52 }
53 }
54
55 class TriangleAndSquare {
56 var triangle: EquilateralTriangle {
57 willSet {
58 square.sideLength = newValue.sideLength
59 }
60 }
61
62 var square: Square {
63 willSet {
64 triangle.sideLength = newValue.sideLength
65 }
66 }
67
68 init(size: Double, name: String) {
69 square = Square(sideLength: size, name: name)
70 triangle = EquilateralTriangle(sideLength: size, name: name)
71 }
72 }
的文档字符串说:
计算X和Y中样本之间的余弦相似度。余弦相似度或余弦内核将相似度计算为X和Y的标准化点积:
$ cosine(X,Y)=
/(|| X || * || Y ||)$
sklearn.metrics.pairwise.cosine_similarity
说:
X和Y之间的余弦距离定义为
$ cosine(X,Y)= 1-
/(|| X || * || Y ||)$。
其中$
(我对文档字符串进行了一些更改,以使用相同的变量名和数学约定,以便于进行比较。)
基本上,您有scipy.spatial.distance.cosine
。