我一直在做家庭作业,以实现K-Means聚类 算法从头开始。我正在聚集Iris Flower数据集。 在程序开始时,将提示用户输入的数量 他们想要的质心(簇)。然后程序创建一个范围 选择从基于文件中的数据创建质心。
我遇到了'空切片'错误的问题,所以我决定 尝试使用另一个while循环和条件语句来控制所有这些。 基本上阻止我得到错误。它检查是否全部 质心是好的,如果不是,则产生新的质心。但我发现在这个过程中会失去质心。发生了什么事?
Python代码:
import numpy as np
from pprint import pprint
import random
import sys
dataPointsFromFile = np.array(np.loadtxt('iris.txt', delimiter = ','))
NoOfCentroids = input('How Many Centrouds? ')
为质心创建范围
dataRange = ([])
dataRange.append(round(np.amin(dataPointsFromFile),1))
dataRange.append(round(np.amax(dataPointsFromFile),1))
dataRange = np.asarray(dataRange)
dataPoints = np.array(dataPointsFromFile)
centroids = 0
功能制作质心
def CentroidMaker(number):
global centroids
centroids = 0
i=0
randomCentroids = []
templist = []
while i<NoOfCentroids:
for j in range(len(dataPointsFromFile[1,:])):
cat = round(random.uniform(np.amin(dataPointsFromFile),np.amax(dataPointsFromFile)),1)
templist.append(cat)
randomCentroids.append(templist)
templist = []
i = i+1
centroids = np.asarray(randomCentroids)
return centroids
K-Means函数用于运行聚类算法 def kMeans(数据): 打印'在K意味着功能'
ConvergenceCounter = 1
keepGoing = True
StillKeepGoing = True
#Check to make sure centroids were passed into the function
print NoOfCentroids
CentroidMaker(NoOfCentroids)
print centroids
StartingCentroids = np.copy(centroids)
#print 'Starting Centroiuds:\n {}'.format(StartingCentroids)
while keepGoing:
#Where I think the problem resides
while StillKeepGoing:
StartingCentroids = np.copy(centroids)
#--------------Find The new means---------#
t0 = StartingCentroids[None, :, :] - dataPoints[:, None, :]
t1 = np.linalg.norm(t0, axis=-1)
t2 = np.argmin(t1, axis=-1)
#------Push the new means to a new array for comparison---------#
CentroidMeans = []
for x in range(len(StartingCentroids)):
#if they are all true, get outta the loop!
if np.all(t2==[x]):
CentroidMeans.append(np.mean(dataPoints[t2 == [x]], axis=0))
StillKeepGoing = False
#If they are all not true, generate new ones!
if np.any(t2!=[x]):
CentroidMaker(NoOfCentroids)
#--------Convert to a numpy array--------#
NewMeans = np.asarray(CentroidMeans)
#------Compare the New Means with the Starting Means------#
if np.array_equal(NewMeans,StartingCentroids):
print ('Convergence has been reached after {} moves'.format(ConvergenceCounter))
print ('Starting Centroids:\n{}'.format(centroids))
print ('Final Means:\n{}'.format(NewMeans))
print ('Final Cluster assignments: {}'.format(t2))
for x in xrange(len(StartingCentroids)):
print ('Cluster {}:\n'.format(x)), dataPoints[t2 == [x]]
for x in xrange(len(StartingCentroids)):
print ('Size of Cluster {}:'.format(x)), len(dataPoints[t2 == [x]])
keepGoing = False
else:
ConvergenceCounter = ConvergenceCounter +1
StartingCentroids =np.copy(NewMeans)
kMeans(dataPoints)