Question

我一直在做家庭作业，以实现K-Means聚类算法从头开始。我正在聚集Iris Flower数据集。在程序开始时，将提示用户输入的数量他们想要的质心（簇）。然后程序创建一个范围选择从基于文件中的数据创建质心。

我遇到了'空切片'错误的问题，所以我决定尝试使用另一个while循环和条件语句来控制所有这些。基本上阻止我得到错误。它检查是否全部质心是好的，如果不是，则产生新的质心。但我发现在这个过程中会失去质心。发生了什么事？

Python代码：

import numpy as np
from pprint import pprint
import random
import sys

dataPointsFromFile = np.array(np.loadtxt('iris.txt', delimiter = ','))

NoOfCentroids = input('How Many Centrouds? ')

为质心创建范围

dataRange = ([])
dataRange.append(round(np.amin(dataPointsFromFile),1))
dataRange.append(round(np.amax(dataPointsFromFile),1))
dataRange = np.asarray(dataRange)

dataPoints = np.array(dataPointsFromFile)

centroids = 0

功能制作质心

def CentroidMaker(number):
    global centroids
    centroids = 0
    i=0
    randomCentroids = []
    templist = []
    while i<NoOfCentroids:
        for j in range(len(dataPointsFromFile[1,:])):
            cat = round(random.uniform(np.amin(dataPointsFromFile),np.amax(dataPointsFromFile)),1)
            templist.append(cat)
        randomCentroids.append(templist)
        templist = []
        i = i+1
    centroids = np.asarray(randomCentroids)
    return centroids

K-Means函数用于运行聚类算法 def kMeans（数据）：打印'在K意味着功能'

    ConvergenceCounter = 1
    keepGoing = True
    StillKeepGoing = True

#Check to make sure centroids were passed into the function
    print NoOfCentroids
    CentroidMaker(NoOfCentroids)
    print centroids

    StartingCentroids = np.copy(centroids)
    #print 'Starting Centroiuds:\n {}'.format(StartingCentroids)
    while keepGoing:


 #Where I think the problem resides  


        while StillKeepGoing:
            StartingCentroids = np.copy(centroids)
            #--------------Find The new means---------#
            t0 = StartingCentroids[None, :, :] - dataPoints[:, None, :]
            t1 = np.linalg.norm(t0, axis=-1)
            t2 = np.argmin(t1, axis=-1)
            #------Push the new means to a new array for comparison---------#
            CentroidMeans = []
            for x in range(len(StartingCentroids)):
                #if they are all true, get outta the loop!
                if np.all(t2==[x]):
                    CentroidMeans.append(np.mean(dataPoints[t2 == [x]], axis=0))
                    StillKeepGoing = False
                #If they are all not true, generate new ones!
            if np.any(t2!=[x]):
                CentroidMaker(NoOfCentroids)


        #--------Convert to a numpy array--------#
        NewMeans = np.asarray(CentroidMeans)
        #------Compare the New Means with the Starting Means------#
        if np.array_equal(NewMeans,StartingCentroids):
            print ('Convergence has been reached after {} moves'.format(ConvergenceCounter))
            print ('Starting Centroids:\n{}'.format(centroids))
            print ('Final Means:\n{}'.format(NewMeans))
            print ('Final Cluster assignments: {}'.format(t2))
            for x in xrange(len(StartingCentroids)):
                print ('Cluster {}:\n'.format(x)), dataPoints[t2 == [x]]
            for x in xrange(len(StartingCentroids)):
                print ('Size of Cluster {}:'.format(x)), len(dataPoints[t2 == [x]])
            keepGoing = False
        else:
            ConvergenceCounter  = ConvergenceCounter +1
            StartingCentroids =np.copy(NewMeans)

kMeans(dataPoints)

我的函数调用if语句搞砸了吗？

0 个答案: