您好我正在尝试使用python 2.x中的Naive Bayes分类器进行情感分析。它使用txt文件读取情绪,然后根据样本txt文件情绪将输出显示为正数或负数。 我想输出与输入相同的形式,例如我有一个文本文件让我们坐在1000个原始情绪中,我希望输出显示每个情绪的正面或负面。 请帮忙。 以下是我正在使用的代码
import math
import string
def Naive_Bayes_Classifier(positive, negative, total_negative, total_positive, test_string):
y_values = [0,1]
prob_values = [None, None]
for y_value in y_values:
posterior_prob = 1.0
for word in test_string.split():
word = word.lower().translate(None,string.punctuation).strip()
if y_value == 0:
if word not in negative:
posterior_prob *= 0.0
else:
posterior_prob *= negative[word]
else:
if word not in positive:
posterior_prob *= 0.0
else:
posterior_prob *= positive[word]
if y_value == 0:
prob_values[y_value] = posterior_prob * float(total_negative) / (total_negative + total_positive)
else:
prob_values[y_value] = posterior_prob * float(total_positive) / (total_negative + total_positive)
total_prob_values = 0
for i in prob_values:
total_prob_values += i
for i in range(0,len(prob_values)):
prob_values[i] = float(prob_values[i]) / total_prob_values
print prob_values
if prob_values[0] > prob_values[1]:
return 0
else:
return 1
if __name__ == '__main__':
sentiment = open(r'C:/Users/documents/sample.txt')
#Preprocessing of training set
vocabulary = {}
positive = {}
negative = {}
training_set = []
TOTAL_WORDS = 0
total_negative = 0
total_positive = 0
for line in sentiment:
words = line.split()
y = words[-1].strip()
y = int(y)
if y == 0:
total_negative += 1
else:
total_positive += 1
for word in words:
word = word.lower().translate(None,string.punctuation).strip()
if word not in vocabulary and word.isdigit() is False:
vocabulary[word] = 1
TOTAL_WORDS += 1
elif word in vocabulary:
vocabulary[word] += 1
TOTAL_WORDS += 1
#Training
if y == 0:
if word not in negative:
negative[word] = 1
else:
negative[word] += 1
else:
if word not in positive:
positive[word] = 1
else:
positive[word] += 1
for word in vocabulary.keys():
vocabulary[word] = float(vocabulary[word])/TOTAL_WORDS
for word in positive.keys():
positive[word] = float(positive[word])/total_positive
for word in negative.keys():
negative[word] = float(negative[word])/total_negative
test_string = raw_input("Enter the review: \n")
classifier = Naive_Bayes_Classifier(positive, negative, total_negative, total_positive, test_string)
if classifier == 0:
print "Negative review"
else:
print "Positive review"
答案 0 :(得分:1)
我已经在评论中查看了您发布的github回购。我试图运行该项目,但我有一些错误。
无论如何,我已经检查了项目结构和用于训练朴素贝叶斯算法的文件,我认为可以使用以下代码将结果数据写入Excel文件(即.xls)< / p>
with open("test11.txt") as f:
for line in f:
classifier = naive_bayes_classifier(positive, negative, total_negative, total_positive, line)
result = 'Positive' if classifier == 0 else 'Negative'
data_to_be_written += ([line, result],)
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('test.xls')
worksheet = workbook.add_worksheet()
# Start from the first cell. Rows and columns are zero indexed.
row = 0
col = 0
# Iterate over the data and write it out row by row.
for item, cost in data_to_be_written:
worksheet.write(row, col, item)
worksheet.write(row, col + 1, cost)
row += 1
workbook.close()
Sorthly,对于包含要测试的句子的文件的每一行,我调用分类器并准备一个将在csv文件中写入的结构。
然后循环结构并编写xls文件。
为此,我使用了一个名为xlsxwriter的python站点包。
正如我之前告诉过你的那样,我在运行项目时遇到了一些问题,所以这段代码也没有经过测试。它应该运作良好,无论如何,如果你遇到麻烦,请告诉我。
此致
答案 1 :(得分:0)
> with open("test11.txt") as f:
> for line in f:
> classifier = Naive_Bayes_Classifier(positive, negative, total_negative, total_positive, line) if classifier == 0:
> f.write(line + 'Negative') else:
> f.write(line + 'Positive')
>
> # result = 'Positive' if classifier == 0 else 'Negative'
> # data_to_be_written += ([line, result],)
>
> # Create a workbook and add a worksheet. workbook = xlsxwriter.Workbook('test.xls') worksheet = workbook.add_worksheet()
>
> # Start from the first cell. Rows and columns are zero indexed. row = 0 col = 0
>
> # Iterate over the data and write it out row by row. for item, cost in f: worksheet.write(row, col, item) worksheet.write(row, col +
> 1, cost) row += 1
>
> workbook.close()