使用类中的方法时出现TypeError -

时间:2016-08-01 12:47:41

标签: python k-means

我有一个改进的kmeans算法(KPlusPlus),它基于类kmeans。 Detk是从KPlusPlus继承的另一个类。

KPlusPlus类的目标是找出找到kmeans质心的最佳种子(Source

Detk计算间隙统计量以找到最佳簇数。我从here

找到了此代码
# kmeans class 
class KMeans():
def __init__(self, K, X=None, N=0):
    self.K = K
    if X == None:
        if N == 0:
            raise Exception("If no data is provided, \
                             a parameter N (number of points) is needed")
        else:
            self.N = N
            self.X = self._init_board_gauss(N, K)
    else:
        self.X = X
        self.N = len(X)
    self.mu = None
    self.clusters = None
    self.method = None

def _init_board_gauss(self, N, k):
    n = float(N)/k
    X = []
    for i in range(k):
        c = (random.uniform(-1,1), random.uniform(-1,1))
        s = random.uniform(0.05,0.15)
        x = []
        while len(x) < n:
            a,b = np.array([np.random.normal(c[0],s),np.random.normal(c[1],s)])
            # Continue drawing points from the distribution in the range [-1,1]
            if abs(a) and abs(b)<1:
                x.append([a,b])
        X.extend(x)
    X = np.array(X)[:N]
    return X

def plot_board(self):
    X = self.X
    fig = plt.figure(figsize=(5,5))
    plt.xlim(-1,1)
    plt.ylim(-1,1)
    if self.mu and self.clusters:
        mu = self.mu
        clus = self.clusters
        K = self.K
        for m, clu in clus.items():
            cs = cm.spectral(1.*m/self.K)
            plt.plot(mu[m][0], mu[m][1], 'o', marker='*', \
                     markersize=12, color=cs)
            plt.plot(zip(*clus[m])[0], zip(*clus[m])[1], '.', \
                     markersize=8, color=cs, alpha=0.5)
    else:
        plt.plot(zip(*X)[0], zip(*X)[1], '.', alpha=0.5)
    if self.method == '++':
        tit = 'K-means++'
    else:
        tit = 'K-means with random initialization'
    pars = 'N=%s, K=%s' % (str(self.N), str(self.K))
    plt.title('\n'.join([pars, tit]), fontsize=16)
    plt.savefig('kpp_N%s_K%s.png' % (str(self.N), str(self.K)), \
                bbox_inches='tight', dpi=200)

def _cluster_points(self):
    mu = self.mu
    clusters  = {}
    for x in self.X:
        bestmukey = min([(i[0], np.linalg.norm(x-mu[i[0]])) \
                         for i in enumerate(mu)], key=lambda t:t[1])[0]
        try:
            clusters[bestmukey].append(x)
        except KeyError:
            clusters[bestmukey] = [x]
    self.clusters = clusters

def _reevaluate_centers(self):
    clusters = self.clusters
    newmu = []
    keys = sorted(self.clusters.keys())
    for k in keys:
        newmu.append(np.mean(clusters[k], axis = 0))
    self.mu = newmu

def _has_converged(self):
    K = len(self.oldmu)
    return(set([tuple(a) for a in self.mu]) == \
           set([tuple(a) for a in self.oldmu])\
           and len(set([tuple(a) for a in self.mu])) == K)

def find_centers(self,K, method='random'):
    self.method = method
    X = self.X
    K = self.K
    self.oldmu = random.sample(X, K)
    if method != '++':
        # Initialize to K random centers
        self.mu = random.sample(X, K)
    while not self._has_converged():
        self.oldmu = self.mu
        # Assign all points in X to clusters
        self._cluster_points()
        # Reevaluate centers
        self._reevaluate_centers()

KPlusPlus类继承kmeans以找到最佳种子

class KPlusPlus(KMeans):
def _dist_from_centers(self):
    cent = self.mu
    X = self.X
    D2 = np.array([min([np.linalg.norm(x-c)**2 for c in cent]) for x in X])
    self.D2 = D2

def _choose_next_center(self):
    self.probs = self.D2/self.D2.sum()
    self.cumprobs = self.probs.cumsum()
    r = random.random()
    ind = np.where(self.cumprobs >= r)[0][0]
    return(self.X[ind])

def init_centers(self,K):
    self.K = K
    self.mu = random.sample(self.X, 1)
    while len(self.mu) < self.K:
        self._dist_from_centers()
        self.mu.append(self._choose_next_center())

def plot_init_centers(self):
    X = self.X
    fig = plt.figure(figsize=(5,5))
    plt.xlim(-1,1)
    plt.ylim(-1,1)
    plt.plot(zip(*X)[0], zip(*X)[1], '.', alpha=0.5)
    plt.plot(zip(*self.mu)[0], zip(*self.mu)[1], 'ro')
    plt.savefig('kpp_init_N%s_K%s.png' % (str(self.N),str(self.K)), \
                bbox_inches='tight', dpi=200)

Detk类继承自KPlusPlus,根据差距统计信息找到有效的聚类数

class DetK(KPlusPlus):
def fK(self, thisk, Skm1=0):
    X = self.X
    Nd = len(X[0])
    a = lambda k, Nd: 1 - 3/(4*Nd) if k == 2 else a(k-1, Nd) + (1-a(k-1, Nd))/6
    self.find_centers(thisk, method='++')
    mu, clusters = self.mu, self.clusters
    Sk = sum([np.linalg.norm(mu[i]-c)**2 \
             for i in range(thisk) for c in clusters[i]])
    if thisk == 1:
        fs = 1
    elif Skm1 == 0:
        fs = 1
    else:
        fs = Sk/(a(thisk,Nd)*Skm1)
    return fs, Sk  

def _bounding_box(self):
    X = self.X
    xmin, xmax = min(X,key=lambda a:a[0])[0], max(X,key=lambda a:a[0])[0]
    ymin, ymax = min(X,key=lambda a:a[1])[1], max(X,key=lambda a:a[1])[1]
    return (xmin,xmax), (ymin,ymax)        

def gap(self, thisk):
    X = self.X
    (xmin,xmax), (ymin,ymax) = self._bounding_box()
    self.init_centers(thisk)
    self.find_centers(thisk, method='++')
    mu, clusters = self.mu, self.clusters
    Wk = np.log(sum([np.linalg.norm(mu[i]-c)**2/(2*len(c)) \
                for i in range(thisk) for c in clusters[i]]))
    # Create B reference datasets
    B = 10
    BWkbs = zeros(B)
    for i in range(B):
        Xb = []
        for n in range(len(X)):
            Xb.append([random.uniform(xmin,xmax), \
                      random.uniform(ymin,ymax)])
        Xb = np.array(Xb)
        kb = DetK(thisk, X=Xb)
        kb.init_centers(thisk)
        kb.find_centers(thisk, method='++')
        ms, cs = kb.mu, kb.clusters
        BWkbs[i] = np.log(sum([np.linalg.norm(ms[j]-c)**2/(2*len(c)) \
                          for j in range(thisk) for c in cs[j]]))
    Wkb = sum(BWkbs)/B
    sk = np.sqrt(sum((BWkbs-Wkb)**2)/float(B))*np.sqrt(1+1/B)
    return Wk, Wkb, sk

def run(self, maxk, which='both'):
    ks = range(1,maxk)
    fs = zeros(len(ks))
    Wks,Wkbs,sks = zeros(len(ks)+1),zeros(len(ks)+1),zeros(len(ks)+1)
    # Special case K=1
    self.init_centers(1)
    if which == 'f':
        fs[0], Sk = self.fK(1)
    elif which == 'gap':
        Wks[0], Wkbs[0], sks[0] = self.gap(1)
    else:
        fs[0], Sk = self.fK(1)
        Wks[0], Wkbs[0], sks[0] = self.gap(1)
    # Rest of Ks
    for k in ks[1:]:
        self.init_centers(k)
        if which == 'f':
            fs[k-1], Sk = self.fK(k, Skm1=Sk)
        elif which == 'gap':
            Wks[k-1], Wkbs[k-1], sks[k-1] = self.gap(k)
        else:
            fs[k-1], Sk = self.fK(k, Skm1=Sk)
            Wks[k-1], Wkbs[k-1], sks[k-1] = self.gap(k)
    if which == 'f':
        self.fs = fs
    elif which == 'gap':
        G = []
        for i in range(len(ks)):
            G.append((Wkbs-Wks)[i] - ((Wkbs-Wks)[i+1]-sks[i+1]))
        self.G = np.array(G)
    else:
        self.fs = fs
        G = []
        for i in range(len(ks)):
            G.append((Wkbs-Wks)[i] - ((Wkbs-Wks)[i+1]-sks[i+1]))
        self.G = np.array(G)

当我尝试在给定数量的点(locArray

上运行以下程序时
locArray = np.array(locArrayMaster[counter])

kmeanscluster = DetK(2, X = locArray)
kmeanscluster.run(5)
noClusters[counter] = np.where(kmeanscluster.fs == min(kmeanscluster.fs))[0][0]+ 1  

它返回以下错误

File "C:\Users\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 714, in runfile
execfile(filename, namespace)

File "C:\Users\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 74, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)

File "C:/Users/Documents/SUMOTraffic/kplusplus.py", line 355, in <module>
kmeanscluster.run(5)

File "C:/Users/Documents/SUMOTraffic/kplusplus.py", line 217, in run
Wks[0], Wkbs[0], sks[0] = self.gap(1)

File "C:/Users/Documents/SUMOTraffic/kplusplus.py", line 200, in gap
for j in range(thisk) for c in cs[j]]))

TypeError: 'NoneType' object has no attribute '__getitem__'

感谢您的帮助。

1 个答案:

答案 0 :(得分:0)

错误是由于当群集数量仅为1时,kmeans算法无法找到群集中心。因此,不会为此情况创建群集字典。因此,在类TRUE中添加了一行额外的代码,用于检查群集字典的类型是否为class DetK(KPlusPlus): def fK(self, thisk, Skm1=0): X = self.X Nd = len(X[0]) a = lambda k, Nd: 1 - 3/(4*Nd) if k == 2 else a(k-1, Nd) + (1-a(k-1, Nd))/6 self.find_centers(thisk, method='++') ,如果它返回 while type(self.clusters) is not dict: self.find_centers(thisk, method = '++') ,则会再次重新计算群集中心。

        mu, clusters = self.mu, self.clusters
        Sk = sum([np.linalg.norm(mu[i]-c)**2 \
                 for i in range(thisk) for c in clusters[i]])
        if thisk == 1:
            fs = 1
        elif Skm1 == 0:
            fs = 1
        else:
            fs = Sk/(a(thisk,Nd)*Skm1)
        return fs, Sk 
if ( $( "#myDiv" ).length ) {
    //id id ( "#myDiv" ) is exist this will perform
    $( "#myDiv" ).show();

}
    $( "#myDiv" ).length && $( "#myDiv" ).show();