执行以下操作时,我收到了错误" AttributeError:' GaussianNB'对象没有属性&precision ;score' "
import sys
from time import time
sys.path.append("../tools/")
from email_preprocess import preprocess
features_train, features_test, labels_train, labels_test = preprocess()
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(features_train, labels_train)
pred=clf.predict(features_test)
from sklearn.metrics import accuracy_score
print clf.accuracy_score(pred, labels_test)
和email_preprocess.py如下
import pickle
import cPickle
import numpy
from sklearn import cross_validation
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_selection import SelectPercentile, f_classif
def preprocess(words_file = "../tools/word_data.pkl",
authors_file="../tools/email_authors.pkl"):
authors_file_handler = open(authors_file, "r")
authors = pickle.load(authors_file_handler)
authors_file_handler.close()
words_file_handler = open(words_file, "r")
word_data = cPickle.load(words_file_handler)
words_file_handler.close()
features_train, features_test, labels_train, labels_test =
cross_validation.train_test_split(word_data, authors,
test_size=0.5,random_state=42)
vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
stop_words='english')
features_train_transformed = vectorizer.fit_transform(features_train)
features_test_transformed = vectorizer.transform(features_test)
selector = SelectPercentile(f_classif, percentile=10)
selector.fit(features_train_transformed, labels_train)
features_train_transformed =
selector.transform(features_train_transformed).toarray()
features_test_transformed =
selector.transform(features_test_transformed).toarray()
print "no. of Chris training emails:", sum(labels_train)
print "no. of Sara training emails:", len(labels_train)-
sum(labels_train)
return features_train_transformed, features_test_transformed,
labels_train, labels_test
任何人都可以帮我吗?
答案 0 :(得分:0)
你有:
from sklearn.metrics import accuracy_score
print clf.accuracy_score(pred, labels_test)
您需要删除在clf.
实例中尝试查找accuracy_score
的前导GaussianNB
。
尝试:
from sklearn.metrics import accuracy_score
print accuracy_score(pred, labels_test)