Simplified Lesk算法在尝试消除给定句子中单词含义的歧义时,执行以下操作:
context
< - 除了句子中的目标词之外的所有单词。signature
< - 出现在目标词的字典定义中的单词+用于说明单词用法的示例中出现的任何单词。context
和signature
之间存在最大常用字数的含义我的问题是,如果给出具有相同count值的单词time
(例如)的两个不同含义,是否有办法获得正确的含义?
我的代码用于句子“时间飞得像箭头”无法返回对单词的正确解释。
import string
from nltk.corpus import wordnet as wn
def printMeaning(valid, word, sense, maxCount):
if valid == "Found":
print("The word '" + word + "' has meaning :'" + sense + "' and count :" +str(maxCount))
return
elif valid =="Default":
print("The word '" + word + "' has meaning :'" + sense +". Default meaning selected since results were inconclusive" )
return
elif valid =="Standard":
print("The word '" + word + "' has no disambiguity")
return
def findSense(sentence):
for word in sentence.split(" "):
# Strip word of any punctuations and make it lower case
exclude = set(string.punctuation)
word = ''.join(ch for ch in word if ch not in exclude)
word.lower()
# If word is a stop word, continue to next iteration.
skip_words = ['a', 'an', 'the', 'this', 'that', 'and']
if skip_words.__contains__(word):
printMeaning("Standard", word, "", 0)
continue
# Get the context
context = sentence.split(" ")
if word in context:
context.remove(word)
# Initialize maxCount and sense of the word to default values.
maxCount = 0
count = 0
sense = "Nothing makes sense"
example_words = []
# For every meaning of the word,
for meaning in wn.synsets(word):
# Generate 'signature' by adding words from definition and examples.
for each_usage in meaning.examples():
example_words += each_usage.split(" ")
definition_words = meaning.definition().split(" ")
signature = example_words + definition_words
# How many times do the context words appear in the signature?
for w in context:
count += signature.count(w)
# If count exceeds current maxCount then, update the sense of the word
if count > maxCount:
sense = meaning.definition()
maxCount = count
count = 0
# If the count remained 0 for the word, pick the most common meaning from WordNet if it exists
if sense == "Nothing makes sense":
firstMeaning = wn.synsets(word)
if len(firstMeaning) == 0: # word doesn't exist in our dictionary
printMeaning("Default", word, "word not found in our dictionary", 0)
continue
else:
printMeaning("Default", word, firstMeaning[0].definition(), 0)
continue
# Print the calculated meaning of the word
printMeaning("Found", word, sense, maxCount)
if __name__ == '__main__':
sentence = input("Enter sentence: ")
findSense(sentence)