import math, functools
def splitPairs(word):
return [(word[:i+1], word[i+1:]) for i in range(len(word))]
def segment(word):
if not word: return []
allSegmentations = [[first] + segment(rest)
for (first, rest) in splitPairs(word)]
return max(allSegmentations, key = wordSegFitness)
class OneGramDist(dict):
def __init__(self):
self.gramCount = 0
for line in open('Norvig Word Library.txt'):
(word, count) = line[:-1].split('\t')
self[word] = int(count)
self.gramCount += self[word]
def __call__(self, word):
if word in self:
return float(self[word]) / self.gramCount
else:
return 1.0/ self.gramCount
singleWordProb = OneGramDist()
def wordSegFitness(words):
return functools.reduce(lambda x,y: x+y),
(math.log10(singleWordProblem(w)) for w in words)
我正在尝试改进一些文本文件的分词。这些文件中的某些单词已加入(例如'howmuchdoesthecarcost
或'helloworld'
),我正在运行朴素贝叶斯过程来将单词分开。
但是,当我运行类似segment("helloworld")
的内容时,我收到以下错误:TypeError: reduce expected at least 2 arguments, got 1
。如何在不失去wordSegFitness()
的效力的情况下更改reduce中的参数?