马尔可夫模型 - 随机词/乱码发生器

时间:2013-12-03 01:48:34

标签: python random markov-models

我的代码工作正常,直到产生随机字。有时它会产生单词/乱码,有时则不会(可能会经历无限循环)。然而,每当它确实产生单词/乱码时,它似乎不是那么“随机”。这些单词要么重复,要么大多数单词会产生接近相同的字符长度。

问题在于def genRandomWord

import random

def getTransitions(astring):            
    d = {}
    for i in range(len(astring)):
        if astring[i:i+2] in d:
            d[astring[i:i+2]] += 1
        else:
            d[astring[i:i+2]] = 1
    #h = tuple(d.items())    #gets the indexes of the dictionary
    #print(h[2][1])
    if ' ' in d:
        del d[' ']
    return d

def getFirstLetters(astring):
    d = []
    for i in astring:
        if i not in d:
            d.append(i)
    d.remove(' ')
    return d


def letterCount(astring):
    d = {}
    for i in astring:
        if i not in d.keys():
            d[i] = 1
        else:
            d[i] +=1
    d[' ']-= 1
    return d

def getProb(astring):
    d = {}
    h = tuple(getTransitions(astring).items())
    j = tuple(letterCount(astring).items())
    #print("h", h)
    #print()
    #print()
    #print("j", j)
    for i in h:
        for n in j:
            if i[0][0] == n[0]:
                d[i[0]] = i[1]/n[1]
    return d

def genFletter(astring):
    d = {}
    r = random.random()
    fl ='*'
    #print("r",r)
    a = getProb(astring)
    suma = 0
    count = -1
    for i in a:
        if i[0][0] == ' ':
            d[i[1]] = a[i]
    d = sorted(tuple(d.items()))
    #print(d)
    while suma < r:
        count += 1
        suma += d[count][1]
        fl = d[count][0]
        #print(suma)
    return fl

def genRandomWord(astring):
    h = getProb(The_List)
    htrans = tuple(getProb(The_List).keys())
    hprob = tuple(getProb(The_List).values())
    #print(hprob)
    z = genFletter(The_List)
    word = z
    #print(word)
    fletterprob = h[' '+z]
    r = random.random()
    while word[-1]!= ' ':
        index = 0
        suma = 0
        for i in range(len(htrans)):
            if htrans[i][0] == word[-1]:
                index = i
        suma += hprob[index]
        for j in range(len(hprob)):
            if suma >= r:
                word += htrans[index][1]
                break
            else:
                suma += hprob[index]
    return word









The_List = ' steam teams meets teems eat ate state tease test mast mates '

trans = getTransitions(The_List)
lcount = letterCount(The_List)
fletter = getFirstLetters(The_List)
transProb = getProb(The_List)


#Sorting
#print('LETTER TRANSITIONS'+'\n'+str(sorted(trans.items()))+'\n')
#print('LETTER COUNT'+'\n'+str(sorted(lcount.items()))+'\n')
#print('FIRST LETTERS'+'\n'+str(sorted(fletter))+'\n')
#print('TRANSITION PROBABILITIES'+'\n'+str(sorted(transProb.items()))+'\n')

print('LETTER TRANSITIONS'+'\n'+str(trans)+'\n')
print('LETTER COUNT'+'\n'+str(lcount)+'\n')
print('FIRST LETTERS'+'\n'+str(fletter)+'\n')
print('TRANSITION PROBABILITIES'+'\n'+str(transProb)+'\n')


#print(genFletter(The_List))
for i in range(10):
    print("'"+genRandomWord(The_List)+"'")

0 个答案:

没有答案