我是ML领域的初学者,我想自己使用python实现k means算法。但是我的算法总是倾向于趋于收敛,我想知道这是否合理或者我的代码有问题。这是我的代码,任何帮助将不胜感激。
# using Euclidean distance to determine the similarity
def getDistance(vector1, vector2):
return np.linalg.norm(vector1-vector2, ord=2)
def k_means(k):
samples = np.random.random((50, 16))
temp = random.sample(list(samples), k)
centers = {}
for i in range(len(temp)):
centers[chr(ord('A')+i)] = temp[i]
j, rounds = 0, 10
while j < rounds:
bags = {}
print("-------round {}----------".format(j+1))
print("The center is: ")
print(centers)
for feat in samples:
minVal = -1
kind = None
for bag, center in centers.items():
tempVal = getDistance(center, feat)
if bag not in bags:
bags[bag] = []
if minVal < 0:
minVal = tempVal
if tempVal <= minVal:
kind = bag
minVal = tempVal
if kind:
bags[kind].append(feat)
# update the center point in next round
for i in range(k):
total = np.zeros(16, dtype=float)
count = len(bags[chr(ord('A')+i)])
for feat in bags[chr(ord('A')+i)]:
total = np.add(total, feat)
centers[chr(ord('A')+i)] = total/count if count != 0 else centers[chr(ord('A')+i)]
j += 1
print("-----------------")