文本的情感分析

时间:2019-03-20 05:45:05

标签: python sentiment-analysis naivebayes

我有一个使用朴素贝叶斯进行情感分析的代码。 我在

中有三个文件
  1. classifier.py->有一个朴素贝叶斯分类器功能。
  2. preprocessor.py->有一些函数可用于标记和 对否定和肯定的文本进行分类
  3. main.py->创建一个界面以显示输出。

当我尝试执行它们时,有时会显示奇怪的输出,例如对于肯定的陈述为负o / p,对于负面的陈述为正o / p。 这是我的代码。


classifier.py

import random
import preprocess
import nltk

def get_classifier():

    data = preprocess.get_data()
    random.shuffle(data)

    split = int(0.8 * len(data))

    train_set = data[:split]
    test_set =  data[split:]

    classifier = nltk.NaiveBayesClassifier.train(train_set)

    accuracy = nltk.classify.util.accuracy(classifier, test_set)
    print("Generated Classifier")
    print('-'*70)
    print("Accuracy: ", accuracy)
    return classifier

preprocess.py

import nltk.classify
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

stop_words = stopwords.words("english")

def create_word_features_pos(words):
    useful_words = [word for word in words if word not in stop_words]
    my_list = [({word: True}, 'positive') for word in useful_words]
    return my_list


def create_word_features_neg(words):
    useful_words = [word for word in words if word not in stop_words]
    my_list = [({word: True}, 'negative') for word in useful_words]
    return my_list


def create_word_features(words):
    useful_words = [word for word in words if word not in stopwords.words("english")]

    pos_txt = get_tokenized_file(u"positive-words.txt")
    neg_txt = get_tokenized_file(u"negative-words.txt")

    my_dict = dict([(word, True) for word in pos_txt if word in useful_words])
    my_dict1 = dict([(word, False) for word in neg_txt if word in useful_words])

    my_dict.update(my_dict1)


    return my_dict

def get_tokenized_file(file):
    return word_tokenize(open(file, 'r').read())

def get_data():
    print("Collecting Negative Words")
    neg_txt = get_tokenized_file(u"negative-words.txt")
    neg_features = create_word_features_neg(neg_txt)

    print("Collecting Positive Words")
    pos_txt = get_tokenized_file(u"positive-words.txt")
    pos_features = create_word_features_pos(pos_txt)
    return pos_features + neg_features

def process(data):
    return [word.lower() for word in word_tokenize(data)]

main.py

  from preprocess import create_word_features, create_word_features_neg
from preprocess import create_word_features_pos, process
from classifier import get_classifier
import nltk.classify
from tkinter import *


print("Designing UI")
root = Tk()
root.wm_title('Sentiment Analysis Application')

top_frame = Frame(root)
top_frame.pack()

bottom_frame = Frame(root)
bottom_frame.pack(side=BOTTOM)

l1 = Label(top_frame, text='Enter a review:')
l1.pack(side=LEFT)

w = Text(top_frame, height=4 )
w.pack(side=LEFT)

print("UI COMPLETE")
clf = get_classifier()

def main_op():
    review_spirit = w.get('1.0',END)
    demo = process(review_spirit)

    demo1 = create_word_features(demo)
    demo2 = ('review is ' + clf.classify(demo1))
    l2 = Label(bottom_frame, text=demo2)
    l2.pack()

button = Button(bottom_frame, text='Analyse', command=main_op )
button.pack(side=BOTTOM)

root.mainloop()

我将在Anaconda Spyder中运行此代码。 请帮助我整理此代码以获得准确的输出。 谢谢。

0 个答案:

没有答案