我想知道压缩任何字符串的方法

时间:2015-08-05 19:12:23

标签: algorithm performance string-comparison substring

我想知道方法,如果我给出一些字符串compress,我怎么能ABCABCABC字符串,而不是因为我认为我可以找到子字符串ABCfrequently occurs因此它将被压缩为3ABC。如果给出ABCABCBC之类的字符串而不是此处ABC,则另一种方法是frequently occurring子字符串,因此它压缩为2ABC1BC。如您所见,我只考虑adjacent substrings

1 个答案:

答案 0 :(得分:0)

s='FLFLAFLAFLAF'


def check(str_check,str):

    try:
        index=str.index(str_check)
    except Exception:
        index=9999
    return index

def max_index(i,j,sub_str):
    if(i>=0 and j<=len(count)):
        try:
            j=count.index(max(count[i:j]))
        except Exception:
            j=-1
        try:
            sub_str_index=int(sub_str.index(sub_list[j]))
        except Exception:
            sub_str_index=-1
        temp=int(count[j])
        if len(sub_str)==0:
            return ""
        elif(count[j]==1 or int(count[j])*len(sub_list[j])>len(sub_str)):
            for i in range (len(sub_str)):
                count[j+i]=0
            return "1"+ sub_str
        else:
            #count[j]=0
            return max_index(0,sub_str_index,sub_str[0:sub_str_index]) + str(temp) + sub_list[j] + max_index(len(sub_str[0:sub_str_index])+(int(temp)*len(sub_list[j])),len(sub_str),sub_str[len(sub_str[0:sub_str_index])+(int(temp)*len(sub_list[j])): len(sub_str)])







sub_list=sorted(set([s[i:i+j] for j in range(1,len(s)+1) for i in range(len(s)-j+1)]))
length=len(s)
length1=len(sub_list)
count=[]
for i in range (length1):
    cnt=0
    j=0
    while(j<length):
        k=check(sub_list[i],s[j:])
        if k==9999:
            break
        if (s[j+k:j+k+len(sub_list[i])]==sub_list[i]):
            if(cnt>=1 and s[j+k-len(sub_list[i]):j+k]==sub_list[i]):
                j=j+k+1
                cnt=cnt+1
            elif (cnt==0):
                j=j+k+1
                cnt=cnt+1
            else:
                j=length
        else:
            j=length
    count.append(cnt)


print "SUBLIST"
print sub_list
print "count"
count1=count
print count1
j=len(count)
i=0
max_ele=max(count)
while(max_ele in count):
    max_in=count.index(max(count[i:j]))
    print max_index(i,j,s)
    count=count1
    count[max_in]=0