更新通过KMEANS获得的Cluster Center和标签(sci kit learn)

时间:2018-06-08 09:05:38

标签: python scikit-learn k-means unsupervised-learning

我必须检查是否有任何一个集群只有一个点与之关联,如果需要,则需要计算所有集群中每个其他点的欧氏距离,并将最小距离点添加到长度为1的集群中。我已经计算了最小距离,但没有找到方法将最小距离点添加到长度为1的簇中,并且在更新中心和标签之后.MY代码是

def ClusterIndicesComp(clustNum, labels_array):   #list comprehension to extract features from the solutions
    return np.array([i for i, x in enumerate(labels_array) if x == clustNum])

def newsol(max_gen,population,data):
    Slist = []
    #print('VAlue of NewSol Population is',population)
    for i in range(max_gen):
        cluster1=5
        K1.insert(i,cluster1)
        print('value of K1',K1)
        u,label,t,l=Kmeans_clu(cluster1, population)
        plab.insert(i,label)
        pcenter.insert(i,u)
        k2=Counter(l.labels_)  #Count number of elements in each cluster
        k1=[t for (t, v) in k2.items() if v == 1] #Checking the cluster of length one by iterating through k2
        t1= np.array(k1)
        for b in range(len(t1)):
            print("Value in NEW_SOL is of 1 length cluster\n",t1[b])
            plot1=data[ClusterIndicesComp(t1[b], l.labels_)] # Extracting features of length one cluster and storing in plot1 
            print("Values are in sol of plot1",plot1)
            z=[t for (t, v) in k2.items() if v >2]#Finding cluster which have more than one point associated with them 
            for d in range(len(z)):#Calculating distance with only thoes point which have more than 2 points associated with them 
                print("Value in NEW_SOL is of more than 2 length cluster\n", z[d])
                plot2 = data[ClusterIndicesComp(z[d], l.labels_)]#Extracting their feature and store in plot2 
                for i in range(len(plot2)):  # To get one element at a time from plot2
                    plotk = plot2[i]
                    S = np.linalg.norm(np.array(plot1) - np.array(plotk))
                    print("Distance between plot1 and plotk is", plot1, plotk,
                              np.linalg.norm(np.array(plot1) - np.array(plotk)))  # euclidian distance is calculated
                    Slist.append(S)
                    Smin=min(Slist)#minimum euclidean distance is calculated 
                print("VAlues of Slist with min  \n",plotk,Smin)
                Slist=[]

这是我的kmeans代码

from sklearn.cluster import KMeans
import numpy as np 

def Kmeans_clu(K, data):
    """
    :param K: Number of cluster
    :param data: 
    :return: 
    """
    kmeans = KMeans(n_clusters=K, init='random', max_iter=1, n_init=1).fit(data) ##Apply k-means clustering
    labels = kmeans.labels_
    clu_centres = kmeans.cluster_centers_
    z={i: np.where(kmeans.labels_ == i)[0] for i in range(kmeans.n_clusters)} #TO find points through which labels are associated 

    return clu_centres, labels ,z,kmeans

0 个答案:

没有答案