我必须检查是否有任何一个集群只有一个点与之关联,如果需要,则需要计算所有集群中每个其他点的欧氏距离,并将最小距离点添加到长度为1的集群中。我已经计算了最小距离,但没有找到方法将最小距离点添加到长度为1的簇中,并且在更新中心和标签之后.MY代码是
def ClusterIndicesComp(clustNum, labels_array): #list comprehension to extract features from the solutions
return np.array([i for i, x in enumerate(labels_array) if x == clustNum])
def newsol(max_gen,population,data):
Slist = []
#print('VAlue of NewSol Population is',population)
for i in range(max_gen):
cluster1=5
K1.insert(i,cluster1)
print('value of K1',K1)
u,label,t,l=Kmeans_clu(cluster1, population)
plab.insert(i,label)
pcenter.insert(i,u)
k2=Counter(l.labels_) #Count number of elements in each cluster
k1=[t for (t, v) in k2.items() if v == 1] #Checking the cluster of length one by iterating through k2
t1= np.array(k1)
for b in range(len(t1)):
print("Value in NEW_SOL is of 1 length cluster\n",t1[b])
plot1=data[ClusterIndicesComp(t1[b], l.labels_)] # Extracting features of length one cluster and storing in plot1
print("Values are in sol of plot1",plot1)
z=[t for (t, v) in k2.items() if v >2]#Finding cluster which have more than one point associated with them
for d in range(len(z)):#Calculating distance with only thoes point which have more than 2 points associated with them
print("Value in NEW_SOL is of more than 2 length cluster\n", z[d])
plot2 = data[ClusterIndicesComp(z[d], l.labels_)]#Extracting their feature and store in plot2
for i in range(len(plot2)): # To get one element at a time from plot2
plotk = plot2[i]
S = np.linalg.norm(np.array(plot1) - np.array(plotk))
print("Distance between plot1 and plotk is", plot1, plotk,
np.linalg.norm(np.array(plot1) - np.array(plotk))) # euclidian distance is calculated
Slist.append(S)
Smin=min(Slist)#minimum euclidean distance is calculated
print("VAlues of Slist with min \n",plotk,Smin)
Slist=[]
这是我的kmeans代码
from sklearn.cluster import KMeans
import numpy as np
def Kmeans_clu(K, data):
"""
:param K: Number of cluster
:param data:
:return:
"""
kmeans = KMeans(n_clusters=K, init='random', max_iter=1, n_init=1).fit(data) ##Apply k-means clustering
labels = kmeans.labels_
clu_centres = kmeans.cluster_centers_
z={i: np.where(kmeans.labels_ == i)[0] for i in range(kmeans.n_clusters)} #TO find points through which labels are associated
return clu_centres, labels ,z,kmeans