Question

我是ML领域的初学者，我想自己使用python实现k means算法。但是我的算法总是倾向于趋于收敛，我想知道这是否合理或者我的代码有问题。这是我的代码，任何帮助将不胜感激。

    # using Euclidean distance to determine the similarity
    def getDistance(vector1, vector2):
        return np.linalg.norm(vector1-vector2, ord=2)

    def k_means(k):
        samples = np.random.random((50, 16))
        temp = random.sample(list(samples), k)
        centers = {}
        for i in range(len(temp)):
            centers[chr(ord('A')+i)] = temp[i]
        j, rounds = 0, 10
        while j < rounds:
            bags = {}

            print("-------round {}----------".format(j+1))
            print("The center is: ")
            print(centers)
            for feat in samples:
                minVal = -1
                kind = None
                for bag, center in centers.items():
                    tempVal = getDistance(center, feat)
                    if bag not in bags:
                        bags[bag] = []
                    if minVal < 0:
                        minVal = tempVal
                    if tempVal <= minVal:
                        kind = bag
                        minVal = tempVal
                if kind:
                    bags[kind].append(feat)
            # update the center point in next round
            for i in range(k):
                total = np.zeros(16, dtype=float)
                count = len(bags[chr(ord('A')+i)])
                for feat in bags[chr(ord('A')+i)]:
                    total = np.add(total, feat)
                centers[chr(ord('A')+i)] = total/count if count != 0 else centers[chr(ord('A')+i)]
            j += 1
            print("-----------------")

k意味着算法总是立即收敛吗？

0 个答案: