我正在尝试打印出我的朴素贝叶斯分类器模型的指标,但代码继续为所有打印行返回“无”。我使用以下代码来打印我的指标,但无法确定为什么它返回我需要的指标值,任何帮助表示赞赏!
import collections
from nltk.metrics.scores import (precision, recall, f_measure)
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(train_set):
refsets[label].add(i)
observed = nb_classifier.classify(feats)
testsets[observed].add(i)
print('pos precision:', precision(refsets['pos'], testsets['pos']))
print('pos recall:', recall(refsets['pos'], testsets['pos']))
print('pos F-measure:', f_measure(refsets['pos'], testsets['pos']))
print('neg precision:', precision(refsets['neg'], testsets['neg']))
print('neg recall:', recall(refsets['neg'], testsets['neg']))
print('neg F-measure:', f_measure(refsets['neg'], testsets['neg']))
答案 0 :(得分:0)
import random
from collections import Counter
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier
from nltk.metrics.scores import precision, recall, f_measure
documents = [(list(movie_reviews.words(fileid)), category)
for category in movie_reviews.categories()
for fileid in movie_reviews.fileids(category)]
all_words = []
for w in movie_reviews.words():
all_words.append(w.lower())
all_words = Counter(all_words)
def find_features(document, top_n=3000):
word_features = list(all_words.keys())[:top_n]
words = set(document)
features = {}
for w in word_features:
features[w] = (w in words)
return features
def train_test_split(documents, random_seed=0, split_on=0.95, top_n=3000):
custom_random = random.Random(random_seed)
custom_random.shuffle(documents)
featuresets = [(find_features(rev, top_n), category) for (rev, category) in documents]
split_on_int = int(len(featuresets) * split_on)
training_set = featuresets[:split_on_int]
testing_set = featuresets[split_on_int:]
return training_set, testing_set
training_set, testing_set = train_test_split(documents)
实际的分类器培训和评估:
nb = NaiveBayesClassifier.train(training_set)
predictions, gold_labels = defaultdict(set), defaultdict(set)
for i, (features, label) in enumerate(testing_set):
predictions[nb.classify(features)].add(i)
gold_labels[label].add(i)
for label in predictions:
print(label, 'Precision:', precision(gold_labels[label], predictions[label]))
print(label, 'Recall:', recall(gold_labels[label], predictions[label]))
print(label, 'F1-Score:', f_measure(gold_labels[label], predictions[label]))
print()
[OUT]:
neg Precision: 0.803921568627451
neg Recall: 0.9534883720930233
neg F1-Score: 0.8723404255319148
pos Precision: 0.9591836734693877
pos Recall: 0.8245614035087719
pos F1-Score: 0.8867924528301887