我编写了一个代码,用于计算字典中值的情绪,该字典的项来自语义标签解析器。字典项嵌套在多个列表中我写了这个递归代码(有些单词是用葡萄牙语写的)
def walk(lst):
sentimentos=[]
new_sentiment={}
#saida={}
for ele in lst:
if type(ele) is list:
walk(ele)
else:
if isinstance(ele,dict):
for k, v in ele.items():
sentimentos.append(calcula_sentimento(str(ele.values())))
print(sentimentos)
flat_sentimentos = [item for sublist in sentimentos for item in sublist]
new_sentiment = {i: [j,k] for i, j, k in zip(ele.keys(), ele.values(), flat_sentimentos) }
print(new_sentiment)
return new_sentiment
执行此函数时,输出有些不稳定,但通常输出为None。数据是这样的:
[[{'A0': 'Up to 140 asylum seekers', 'V': 'overpowered', 'A1':
'security guards', 'AM-LOC': 'at the tunnel s entrance', 'AM-TMP': 'as
trains passed by dangerously close'}, {'A0': 'Up to 140 asylum
seekers', 'V': 'swarmed on', 'A1': 'to the tracks', 'AM-TMP': 'as
trains passed by dangerously close'}, {'A1': 'trains', 'V': 'passed',
'A2': 'by dangerously close'}]]
Expeced output是精简版:
[{'A0': ['Up to 140 asylum seekers', 0.34], 'V':
['overpowered',-0.034], ...}
在这里改编自git hub的calcula_sentimentos函数代码:
def calcula_sentimento(doc):
sentences = nltk.sent_tokenize(doc)
stokens = [nltk.word_tokenize(sent) for sent in sentences]
taggedlist=[]
for stoken in stokens:
taggedlist.append(nltk.pos_tag(stoken))
wnl = nltk.WordNetLemmatizer()
score_list=[]
for idx,taggedsent in enumerate(taggedlist):
score_list.append([])
for idx2,t in enumerate(taggedsent):
newtag=''
lemmatized=wnl.lemmatize(t[0])
if t[1].startswith('NN'):
newtag='n'
elif t[1].startswith('JJ'):
newtag='a'
elif t[1].startswith('V'):
newtag='v'
elif t[1].startswith('R'):
newtag='r'
else:
newtag=''
if(newtag!=''):
synsets = list(swn.senti_synsets(lemmatized, newtag))
score=0
if(len(synsets)>0):
for syn in synsets:
score+=syn.pos_score()-syn.neg_score()
score_list[idx].append(score/len(synsets))
#print(score_list)
sentence_sentiment=[]
for score_sent in score_list:
try:
sentence_sentiment.append(sum([word_score for word_score in score_sent])/len(score_sent))
except ZeroDivisionError as err:
pass #print("Escore do sentimento para:"+doc[1])
return sentence_sentiment
为什么它不稳定并且没有发出完整的输出并且没有返回?