为什么不打印NB分类器的指标? (nltk.metrics.scores)

时间:2018-03-25 01:36:19

标签: python-3.x nltk naivebayes

我正在尝试打印出我的朴素贝叶斯分类器模型的指标,但代码继续为所有打印行返回“无”。我使用以下代码来打印我的指标,但无法确定为什么它返回我需要的指标值,任何帮助表示赞赏!

import collections
from nltk.metrics.scores import (precision, recall, f_measure)
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)

for i, (feats, label) in enumerate(train_set):
    refsets[label].add(i)
    observed = nb_classifier.classify(feats)
    testsets[observed].add(i)

print('pos precision:', precision(refsets['pos'], testsets['pos']))
print('pos recall:', recall(refsets['pos'], testsets['pos']))
print('pos F-measure:', f_measure(refsets['pos'], testsets['pos']))
print('neg precision:', precision(refsets['neg'], testsets['neg']))
print('neg recall:', recall(refsets['neg'], testsets['neg']))
print('neg F-measure:', f_measure(refsets['neg'], testsets['neg']))

1 个答案:

答案 0 :(得分:0)

TL; DR

import random
from collections import Counter

from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier
from nltk.metrics.scores import precision, recall, f_measure

documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

all_words = []
for w in movie_reviews.words():
    all_words.append(w.lower())

all_words = Counter(all_words)

def find_features(document, top_n=3000):
    word_features = list(all_words.keys())[:top_n]
    words = set(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)
    return features

def train_test_split(documents, random_seed=0, split_on=0.95, top_n=3000):
    custom_random = random.Random(random_seed)
    custom_random.shuffle(documents)
    featuresets = [(find_features(rev, top_n), category) for (rev, category) in documents]
    split_on_int = int(len(featuresets) * split_on)
    training_set = featuresets[:split_on_int]
    testing_set = featuresets[split_on_int:]
    return training_set, testing_set

training_set, testing_set = train_test_split(documents)

实际的分类器培训和评估:

nb = NaiveBayesClassifier.train(training_set)

predictions, gold_labels = defaultdict(set), defaultdict(set)

for i, (features, label) in enumerate(testing_set):
    predictions[nb.classify(features)].add(i)
    gold_labels[label].add(i) 

for label in predictions:
    print(label, 'Precision:', precision(gold_labels[label], predictions[label]))
    print(label, 'Recall:', recall(gold_labels[label], predictions[label]))
    print(label, 'F1-Score:', f_measure(gold_labels[label], predictions[label]))
    print()

[OUT]:

neg Precision: 0.803921568627451
neg Recall: 0.9534883720930233
neg F1-Score: 0.8723404255319148

pos Precision: 0.9591836734693877
pos Recall: 0.8245614035087719
pos F1-Score: 0.8867924528301887