RecursionError:使用kmeans时超出了最大递归深度

时间:2019-07-18 03:15:20

标签: python pandas numpy machine-learning

我的代码-

import xlrd,statistics,numpy

loc= r"Re.xlsx"

wb = xlrd.open_workbook(loc) 
sheet = wb.sheet_by_index(0); 



x=[]
def get_col_index(u):
    for j in range(sheet.ncols):
        if sheet.cell_value(0, j)== u:
            return j

def get_model_list():
    p=[]
    f= get_col_index('bikeModel')
    print(f)
    for j in range(sheet.nrows):
        c= sheet.cell_value(j,f)
        if c not in p:
            if c!='':
                p.append(c)


    return p                    
def averag(x):
    bd= sum(x)
    ct= len(x)
    av= bd/ct
    return av

def k_means(x):
    serviceType_len= len(x)
    if len(x)<=2:
        print([numpy.average(x), numpy.std(x)])
        return [numpy.average(x), numpy.std(x)]    
    #arrange data list and stored in 'y'
    y=sorted(x)
    mini=min(y)
    maxi=max(y)
    ran=maxi-mini

    #get initial centroids c1, c2, c3, c4 in C
    q1=[]
    q2=[]
    q3=[]
    q4=[]
    for i in y:
        if i<= (ran/4+mini):
            q1.append(i)
        elif i<=(mini+ran/2):
             q2.append(i)
        elif i<=(mini+3*ran/4):
            q3.append(i)
        else:
            q4.append(i)

    Cd=[]        
    if len(q1)!=0:
        q1.sort()
        c1=[]
        Cd.append(q1[0])
    else:
        c1='NULL'
    if len(q2)!=0:
        q2.sort()
        c2=[]
        Cd.append(q2[0])
    else:

        c2='NULL'    
    if len(q3)!=0:
        q3.sort()
        c3=[]
        Cd.append(q3[0])
    else:

        c3='NULL'
    if len(q4)!=0:
        q4.sort()
        c4=[]
        Cd.append(q4[0])
    else:

        c4='NULL'


    print(Cd)
    cl=[]
    rty= cluster(y,Cd,cl)

    return rty

def cluster(y,Cd,cl):
    if len(y)<=2:
        return numpy.average(y)    
    else:
        ncl=[]
        ocl=cl
        ncl1=[]
        ncl2=[]
        ncl.append(ncl1)
        ncl.append(ncl2)

        if len(Cd)>2:

            ncl3=[]

            ncl.append(ncl3)
        if len(Cd)>3:

            ncl4=[]    
            ncl.append(ncl4)

        pc=0
        for i in y:
            print(Cd[0])
            diff=abs(i-Cd[0])

            for k in Cd:
                if abs(i-k)<diff:
                    diff= abs(i-k)
                    pc=Cd.index(k)
            ncl[pc].append(i)
        print("appended")
        if len(Cd)>=1:

            Cd[0]= averag(ncl[0])
            print(Cd[0])
        if len(Cd)>=2:    
            Cd[1]= averag(ncl2)
        if len(Cd)>2:
            Cd[2]= averag(ncl3)
            if len(Cd)>3:
                Cd[3]=averag(ncl4)
        if ocl==ncl:

            ncl.sort(key=len)
            ncl.reverse()
            ncl[0].sort()


            ncl[1].sort()

            if len(Cd)>2:
                ncl[2].sort()



            if len(Cd)>3:
                ncl[3].sort()

            ll=[ncl[0]]
            for we in ncl:
                if we!=ncl[0]:
                    if ((len(ncl[0])- len(we)))<20*len(y)/100:
                        ll.append(ncl[ncl.index(we)])

            finl=[]

            for h in ll:
                for b in h:
                    finl.append(b)
            finl.sort()

            countr=0
            if  numpy.average(finl) <= numpy.std(finl)  or numpy.std(finl)>= 0.50 * numpy.average(finl):
                print("kmeans called again")
                if countr<=5:
                    k_means(finl)
                    countr=countr+1

            else:
                print([numpy.average(finl),numpy.std(finl)] )    


        else:
            cluster(y,Cd,ncl)   


serviceTypeIndex= get_col_index('jobType')
totalAmountIndex= get_col_index('rate')
bikeModelIndex=get_col_index('bikeModel')
serviceTypeList= ['Paid Job','Warranty Repair','Free Service']           

gml= get_model_list()
n=len(gml)

gml.remove('bikeModel')

for k in gml:

    cost1=[]
    cost2=[];cost3=[];cost4=[];cost5=[];cost6=[];cost7=[];cost8=[]
    r=[cost1,cost2,cost3,cost4,cost5,cost6,cost7,cost8]

    for i in range(sheet.nrows):
        if sheet.cell_value(i,bikeModelIndex)==k:

            if sheet.cell_value(i,serviceTypeIndex) in serviceTypeList:

                ind= serviceTypeList.index(sheet.cell_value(i,serviceTypeIndex))

                c= sheet.cell_value(i,totalAmountIndex)

                r[ind].append(c)

    print( "Information of "+k+" :")
    print("")
    for h in serviceTypeList:
        index= serviceTypeList.index(h)

        z= r[index]
        if len(z)!=0 :
            print("Is the effective cost and standard deviation of "+h+"  "+ str(k_means(r[index])))

    print("")            

我正在尝试使用递归方法来实现k均值。我认为问题在于else条件簇(y,Cd,ncl)中的簇函数,直到达到深度为止,它会不断被调用。

回溯-

print("Is the effective cost and standard deviation of "+h+"  "+ str(k_means(r[index])))
  File "abcdef.py", line 92, in k_means
    rty= cluster(y,Cd,cl)
  File "abcdef.py", line 180, in cluster
    cluster(y,Cd,ncl)
  File "abcdef.py", line 180, in cluster
    cluster(y,Cd,ncl)
  File "abcdef.py", line 180, in cluster
    cluster(y,Cd,ncl)   

从回溯中我们可以看到,簇(y,Cd,ncl)再次被再次调用n。

0 个答案:

没有答案