以下是电影简评的代码。
documents = []
all_words = []
allowed_words_types = ['J']
for p in short_pos.split('\n'):
documents.append((p,"pos"))
words = word_tokenize(p)
pos = nltk.pos_tag(words)
for w in pos:
if w[1][0] in allowed_words_types:
all_words.append(w[0].lower())
for p in short_neg.split('\n'):
documents.append((p,"neg"))
words = word_tokenize(p)
pos = nltk.pos_tag(words)
for w in pos:
if w[1][0] in allowed_words_types:
all_words.append(w[0].lower())
all_words = nltk.FreqDist(all_words)
words_features = list(all_words.keys())[:5000]
def find_features(document):
words = word_tokenize(document)
features = {}
for w in words_features:
features[w] = (w in words)
return features
featuresets = [(find_features(rev),category) for (rev,category) in documents]
random.shuffle(featuresets)
print(len(featuresets))
training_set = featuresets[:100]
testing_set = featuresets[100:]
classifier = nltk.NaiveBayesClassifier.train(training_set)
我只是找到了准确性,但我无法找到roc和混淆矩阵,这将非常有帮助,任何人都可以帮助我。感谢。
print(" Original Naive Bayes Algo accuracy percent : ",(nltk.classify.accuracy(classifier,testing_set))*100)
MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(training_set)
print("MNB_classifier accuracy percent : ",(nltk.classify.accuracy(MNB_classifier,testing_set))*100)
voted_classifier = VoteClassifier(classifier,
MNB_classifier)
def sentiment(text):
feats = find_features(text)
return voted_classifier.classify(feats),voted_classifier.confidence(feats)
答案 0 :(得分:0)
您可以使用sklearn的度量标准包
获取混淆矩阵from sklearn.metrics import confusion_matrix
confusion_matrix(y_true, y_pred)
更多信息 http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html
同样,对于ROC曲线
http://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_curve.html