我正在使用Python中的拉普拉斯平滑构建一个Bigram语言模型。我写了下面的类,但是当我尝试在得分函数中打印时它没有打印任何东西,因为该函数正在计算对数概率值,为什么呢?代码有什么问题? 我正在给出一个标记为输入的语料库。示例语料库:STOP要停止
class BiGramModel:
def __init__(self, corpus):
"""Initialize your data structures in the constructor."""
self.unigramCounts = {}
self.bigramCounts = {}
self.train(corpus)
def train(self, corpus):
for sentence in corpus.corpus:
previous_token = ""
for datum in sentence.data:
token = datum.word
if token in self.unigramCounts:
self.unigramCounts[token] = self.unigramCounts[token] + 1.0
else:
self.unigramCounts[token] = 1.0
if previous_token != "":
bigram = previous_token + " | " + token
if bigram in self.bigramCounts:
self.bigramCounts[bigram] = self.bigramCounts[bigram] + 1.0
else:
self.bigramCounts[bigram] = 1.0
previous_token = token
def score(self, sentence):
"It takes a list of strings as argument and returns the log-probability of the
sentence using the bigram language model."
score = 1.0
vocabulary = len(self.bigramCounts) + 0.0
previous_token = ""
for token in sentence:
unigram_find = self.unigramCounts[token] if token in self.unigramCounts else 0.0
bigram = previous_token + " | " + token
bigram_find = self.bigramCounts[bigram] if bigram in self.bigramCounts else 0.0
score += math.log(bigram_find + 1.0)
score -= math.log(unigram_find + vocabulary)
previous_token = token
return score
为什么不给我任何输出?