Question

我无法从我输入的句子中生成语义情绪。我用pos标记。我在yml文件中有一些单词，如happy，positive，sad等。我想在yml文件中添加每个单词的计数，并显示最大的计数作为心情。通过在函数def value_of（情绪）中分别返回+1和-1，我可以产生正极性，如果它是负的或正的，但我不想要那样。我想要的是我无法产生的情绪。请帮忙。这是整个代码：

from pprint import pprint
import nltk
import yaml
import sys
import os
import re
import json
import ast



class Splitter(object):

def __init__(self):
    self.nltk_splitter = nltk.data.load('tokenizers/punkt/english.pickle')
    self.nltk_tokenizer = nltk.tokenize.TreebankWordTokenizer()

def split(self, text):

    sentences = self.nltk_splitter.tokenize(text)
    tokenized_sentences = [self.nltk_tokenizer.tokenize(sent) for sent in sentences]
    return tokenized_sentences


class POSTagger(object):

def __init__(self):
    pass

def pos_tag(self, sentences):


    pos = [nltk.pos_tag(sentence) for sentence in sentences]
    #adapt format
    pos = [[(word, word, [postag]) for (word, postag) in sentence] for sentence in pos]
    return pos

class DictionaryTagger(object):

def __init__(self, dictionary_paths):
    files = [open(path, 'r') for path in dictionary_paths]
    dictionaries = [yaml.load(dict_file) for dict_file in files]
    map(lambda x: x.close(), files)
    self.dictionary = {}
    self.max_key_size = 0
    for curr_dict in dictionaries:
        for key in curr_dict:
            if key in self.dictionary:
                self.dictionary[key].extend(curr_dict[key])
            else:
                self.dictionary[key] = curr_dict[key]
                self.max_key_size = max(self.max_key_size, len(key))

def tag(self, postagged_sentences):
    return [self.tag_sentence(sentence) for sentence in postagged_sentences]

def tag_sentence(self, sentence, tag_with_lemmas=False):

    tag_sentence = []
    N = len(sentence)
    if self.max_key_size == 0:
        self.max_key_size = N
    i = 0
    while (i < N):
        j = min(i + self.max_key_size, N) #avoid overflow
        tagged = False
        while (j > i):
            expression_form = ' '.join([word[0] for word in sentence[i:j]]).lower()
            expression_lemma = ' '.join([word[1] for word in sentence[i:j]]).lower()
            if tag_with_lemmas:
                literal = expression_lemma
            else:
                literal = expression_form
            if literal in self.dictionary:
                #self.logger.debug("found: %s" % literal)
                is_single_token = j - i == 1
                original_position = i
                i = j
                taggings = [tag for tag in self.dictionary[literal]]

                tagged_expression = (expression_form, expression_lemma, taggings)
                if is_single_token: #if the tagged literal is a single token, conserve its previous taggings:
                    original_token_tagging = sentence[original_position][2]
                    tagged_expression[2].extend(original_token_tagging)
                tag_sentence.append(tagged_expression)
                tagged = True
            else:
                j = j - 1
        if not tagged:
            tag_sentence.append(sentence[i])
            i += 1
    return tag_sentence



def value_of(sentiment):
global count_happy
global count_positive
global count_negative
count_positive = 0
count_negative = 0
count_happy = 0
if sentiment == 'happy' : return count_happy+1

if sentiment == 'positive' : return count_positive+1

if sentiment == 'negative' : return count_negative+1

return 0



def sentence_score(sentence_tokens, previous_token, acum_score):
if not sentence_tokens:
    return acum_score
else:
    current_token = sentence_tokens[0]
    tags = current_token[2]
    token_score = sum([value_of(tag) for tag in tags])
    if previous_token is not None:
        previous_tags = previous_token[2]
        if 'inc' in previous_tags:
            token_score *= 2.0
        elif 'dec' in previous_tags:
            token_score /= 2.0
        elif 'inv' in previous_tags:
            token_score *= -1.0
    return sentence_score(sentence_tokens[1:], current_token, acum_score + token_score)


def sentiment_score(review):
return sum([sentence_score(sentence, None, 0.0) for sentence in review])

if __name__ == "__main__":
text = """This school is expensive. i am very excited. cool"""

splitter = Splitter()
postagger = POSTagger()
dicttagger = DictionaryTagger([ 'dicts/positive.yml', 'dicts/negative.yml', 
                                'dicts/inc.yml', 'dicts/dec.yml', 'dicts/inv.yml', 'dicts/happy.yml'])

splitted_sentences = splitter.split(text)
pprint(splitted_sentences)

pos_tagged_sentences = postagger.pos_tag(splitted_sentences)
pprint(pos_tagged_sentences)

dict_tagged_sentences = dicttagger.tag(pos_tagged_sentences)
pprint(dict_tagged_sentences)

print("analyzing sentiment...")
score = sentiment_score(dict_tagged_sentences)
print(score)
print(count_negative)
print(count_positive)
print(count_happy)
if count_happy > count_positive and count_happy>count_negative: print('Happy')
if count_positive> count_negative and count_positive>count_happy: print("positive")
if count_negative > count_positive and count_negative > count_happy: print("negative")
else: print ("Neutral")

如何从下面的代码生成语义情绪？

0 个答案: