Question

我的维特比代码程序变得呈指数级。你可以帮我找到我可以改变的地方，让它成为动态的程序。我需要记住并仅使用之前的2个单词标记。

非常感谢。

from collections import defaultdict
import sys
import re
import feature_maker as fm 


bla = '' 

all_states = set()
#distirbuition over all of the corpus
POS_probability = fm.load_obj('probas')
POS_probability['START'] = 1.0

def cpd_tagwords(words, tag):
pattern = re.compile("\W")# to check for .,: etc.
if pattern.match(words) and tag == words:
    return 1
elif pattern.match(tag):
    return 0
for word in emle.split("\n"):
    if word.__contains__(words) and word.__contains__(tag):
        return word[word.index(":") + 2:]

#if we dont have data about the word with the tag,just retturn the probability
#to get the tag over all of the word in the corpus.
return POS_probability[tag]

def cpd_tags(early, prev, current):
lambda1 = 0
lambda3 = 0
lambda6 = 0

for word in qmle.split("\n"):
    word1 = word.split()
    if len(word1) > 0:
      if word1[0].__contains__(current): #for tuple of 1
        if len(word1) == 2:
            lambda1 = word[word.index("]:") + 3:]
        if len(word1) > 2 and word1[1].__contains__(prev): #for tuple of 2
            if len(word1) == 3:
                lambda3 = word[word.index("]:") + 3:]
            if len(word1) > 3 and word1[2].__contains__(early): #for tuple of 3
                if len(word1) == 4:
                    lambda6 = word[word.index("]:") + 3:]
return (0.6*float(lambda6)) + (0.3*float(lambda3)) + (0.1*float(lambda1))



#map: popular_copuler['POS'] = list of all pos that can come before it.
popular_copules = fm.load_obj('popular_copules')
# Viterbi Algo
def viterbi(sentence, tags1):

def findSet(index,tag):
    if tag == 'ALL':
        return tags1
    if index in range(1, len(sentence) + 1):
        possible_tags = set(popular_copules[tag])
        if possible_tags == set([]):

            return tags1

        return set(popular_copules[tag])

    elif index == 0 or index == -1:
        return {'START'}



# stores (word:tag) in this whole sentence
sentence_with_tag = defaultdict(str)

# inner function to commpute pi values--start
def pi_viterbi(k, u, v, sentence):#here is the start of the bad sequence
    prob = defaultdict(float)
    # initialization
    if k == 0 and u == 'START' and v == 'START':
        return (1., 'START')
    else:

        for w in findSet(k - 2,u):
            prev = pi_viterbi(k - 1, w, u, sentence)[0]
            # tuple((w,u,v))
            q = cpd_tags(w, u, v)**
            e = cpd_tagwords(sentence[k - 1].lower(), v)
            probability = float(prev) * q * float(e)
            prob[tuple((w, u))] = probability**
        #here is the end of the bad sequence

        max_tuple = max(prob.items(), key=lambda x: x[1])

        # print (max_tuple[1],max_tuple[0][0])
        return max_tuple[1], max_tuple[0][0]

# inner function to commpute pi values--end

sentence_with_tag = list()
backpointer = defaultdict(str)
tags = defaultdict(str)
k = len(sentence)
u_glob = ''
v_glob = ''
glob = 0.
for i in range(1, k + 1):
    prob = defaultdict(float)
    #for current word we check all the tags
    """ changed from for u in findSet(i - 1):"""
    for u in findSet(i ,'ALL'):

        #going backwards we call findset with u so it gives us only 
        # tags v that go togeter alot with u(this is purnnig)
        """ changed from for v in findSet(i)"""
        for v in findSet(i-1,u_glob):
            #siwtched u and v
            value, w = pi_viterbi(i, v, u, sentence)#the v recursion in the algorithm
            prob[tuple((i, u, v))] = value
            backpointer[tuple((i, u, v))] = w #bp from the algorithm
    max_tuple = max(prob.items(), key=lambda x: x[1])

    backpointer[tuple((i, max_tuple[0][1], max_tuple[0][-1]))] = max_tuple[0][1]  # bp (k,u,v)= tag w

    # sentence_with_tag.append(max_tuple[0][-1])
    u_glob = max_tuple[0][-2]
    v_glob = max_tuple[0][-1]
    glob = max_tuple[1]
    print ('Max', max_tuple)
tags[k - 1] = u_glob
tags[k] = v_glob

for i in range((k - 2), 0, -1):
    tag = backpointer[tuple(((i + 2), tags[i + 1], tags[i + 2]))]
    tags[i] = tag

tag_list = list()
for i in range(1, len(tags) + 1):
    tag_list.append(tags[i])

file = open(sys.argv[4], 'w')
file.truncate()
for word in tag_list:
    file.write(word)
# tag list as results
return tag_list

file=open(sys.argv[1],"r+")
fQ = open(sys.argv[2], 'r')
qmle = fQ.read()
fQ.close()
f = open("tags.txt",'r+')
tags = f.read()
f.close()
fe = open(sys.argv[3], 'r')
emle = fe.read()
distinct_tags = set()

# what is the list of all tags?
for word in tags.split():
distinct_tags.add(word)


sentence = []
sentence1 = []
sentence1 = file.read()
sentence = sentence1.split()
file.close()

file = open(sys.argv[4], 'w')
file.truncate()
viterbi(sentence, distinct_tags)

如何减少时间复杂度？

我的计划的时间复杂性问题

0 个答案: