我有一个使用朴素贝叶斯进行情感分析的代码。 我在
中有三个文件classifier.py
->有一个朴素贝叶斯分类器功能。preprocessor.py
->有一些函数可用于标记和
对否定和肯定的文本进行分类main.py
->创建一个界面以显示输出。当我尝试执行它们时,有时会显示奇怪的输出,例如对于肯定的陈述为负o / p,对于负面的陈述为正o / p。 这是我的代码。
classifier.py
import random
import preprocess
import nltk
def get_classifier():
data = preprocess.get_data()
random.shuffle(data)
split = int(0.8 * len(data))
train_set = data[:split]
test_set = data[split:]
classifier = nltk.NaiveBayesClassifier.train(train_set)
accuracy = nltk.classify.util.accuracy(classifier, test_set)
print("Generated Classifier")
print('-'*70)
print("Accuracy: ", accuracy)
return classifier
preprocess.py
import nltk.classify
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
stop_words = stopwords.words("english")
def create_word_features_pos(words):
useful_words = [word for word in words if word not in stop_words]
my_list = [({word: True}, 'positive') for word in useful_words]
return my_list
def create_word_features_neg(words):
useful_words = [word for word in words if word not in stop_words]
my_list = [({word: True}, 'negative') for word in useful_words]
return my_list
def create_word_features(words):
useful_words = [word for word in words if word not in stopwords.words("english")]
pos_txt = get_tokenized_file(u"positive-words.txt")
neg_txt = get_tokenized_file(u"negative-words.txt")
my_dict = dict([(word, True) for word in pos_txt if word in useful_words])
my_dict1 = dict([(word, False) for word in neg_txt if word in useful_words])
my_dict.update(my_dict1)
return my_dict
def get_tokenized_file(file):
return word_tokenize(open(file, 'r').read())
def get_data():
print("Collecting Negative Words")
neg_txt = get_tokenized_file(u"negative-words.txt")
neg_features = create_word_features_neg(neg_txt)
print("Collecting Positive Words")
pos_txt = get_tokenized_file(u"positive-words.txt")
pos_features = create_word_features_pos(pos_txt)
return pos_features + neg_features
def process(data):
return [word.lower() for word in word_tokenize(data)]
main.py
from preprocess import create_word_features, create_word_features_neg
from preprocess import create_word_features_pos, process
from classifier import get_classifier
import nltk.classify
from tkinter import *
print("Designing UI")
root = Tk()
root.wm_title('Sentiment Analysis Application')
top_frame = Frame(root)
top_frame.pack()
bottom_frame = Frame(root)
bottom_frame.pack(side=BOTTOM)
l1 = Label(top_frame, text='Enter a review:')
l1.pack(side=LEFT)
w = Text(top_frame, height=4 )
w.pack(side=LEFT)
print("UI COMPLETE")
clf = get_classifier()
def main_op():
review_spirit = w.get('1.0',END)
demo = process(review_spirit)
demo1 = create_word_features(demo)
demo2 = ('review is ' + clf.classify(demo1))
l2 = Label(bottom_frame, text=demo2)
l2.pack()
button = Button(bottom_frame, text='Analyse', command=main_op )
button.pack(side=BOTTOM)
root.mainloop()
我将在Anaconda Spyder中运行此代码。 请帮助我整理此代码以获得准确的输出。 谢谢。