import nltk
from nltk.corpus import cess_esp #corpus of spanish texts
new_tagged = [w for w in cess_esp.tagged_words() if w[1][0] == 'v'] #extract all verbs from the corpus
lowers = sorted([(w[0].lower(), w[1]) for w in new_tagged]) #lowercase the words, sort them alphabetically
uniends = ['ar','er','as','ad','ed',
'id','ase','an','en','es','\xc3\xa9is','emos', 'o'] #endings of conjugated verbs (it is actually much longer, but the endings are irrelevant to my question here)
uniends.sort(key=len, reverse=True) #rearrange in decreasing length, so the correct endings get cut off
def lem(list): #lemmatize a list of conjugated spanish verbs
lems = [] #create an empty list for lemmas
for t in list: #loop through the list
if t[0] == 'ir': #first, account for some irregulars that would disappear
stem = 'ir'
if t[0] == 's\xc3\xa9':
stem = 'sab'
lems.append(stem) #add stems to lems
for end in uniends: #loop through the list of endings
if t[0].endswith(end): #if the verb has an ending (which they all do)
stem = t[0][0:-len(end)] #cut off the ending
if stem == 'pued' or stem == 'pud': #account for common irregulars.
stem = 'pod'
if stem == 'estuv':
stem = 'est'
if stem == 'cuent':
stem = 'cont'
if stem == 'tien' or stem == 'tuv':
stem = 'ten'
if stem == 'hiz' or stem == 'hag':
stem = 'hac'
if stem == 'dij':
stem = 'dec'
if stem == 'vist':
stem = 'v'
if stem == 'jueg':
stem = 'jug'
if stem == 'sup':
stem = 'sab'
if stem == 'veng':
stem = 'ven'
if stem =='hub' or stem == 'h':
stem = 'hab'
lems.append(stem) #add the stem to the list of lemmas
return lems
该函数返回一个列表,lems,但是如果我在函数运行后尝试对lems做任何事情,我会收到一个错误:name'lems'没有定义。我以为我把它定义为函数lem(list)的一部分,在第二行中,然后通过追加它来填充它。如何在列表中运行该函数后,将我的函数设置为可以使用的列表?
答案 0 :(得分:0)
lems只在函数lem中定义,当你返回时,你没有将它分配给任何一个,所以一种方法是将lems作为gloabl项,另一种方法是将len结果分配给某人。
类似的东西:
lems = []
def lem(list): #lemmatize a list of conjugated spanish verbs
global lems
...
或
lems2 = lem(...)
我不想
lems = lem(...)
因为我不知道你代码的其他部分。