我正在使用python 3.6.3。所有包都已成功导入。我使用的是32位版本。我收到了这个内存错误:
Traceback (most recent call last):
File "C:/Users/Lokesh/PycharmProjects/NLP/Training.py", line 103, in <module>
print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_classifier, testing_set)) * 100)
File "C:\Users\Lokesh\AppData\Local\Programs\Python\Python36-32\lib\site-packages\nltk\classify\util.py", line 87, in accuracy
results = classifier.classify_many([fs for (fs, l) in gold])
File "C:\Users\Lokesh\AppData\Local\Programs\Python\Python36-32\lib\site-packages\nltk\classify\scikitlearn.py", line 85, in classify_many
X = self._vectorizer.transform(featuresets)
File "C:\Users\Lokesh\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\feature_extraction\dict_vectorizer.py", line 291, in transform
return self._transform(X, fitting=False)
File "C:\Users\Lokesh\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\feature_extraction\dict_vectorizer.py", line 171, in _transform
indices.append(vocab[f])
的MemoryError
这是代码。使用mnb分类器
时,在第三行的最后一行发生错误class VoteClassifier(ClassifierI):
def __init__(self, *classifiers):
self._classifiers = classifiers
def classify(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
return mode(votes)
def confidence(self, features):
votes = []
for c in self._classifiers:
v = c.classify(features)
votes.append(v)
choice_votes = votes.count(mode(votes))
conf = choice_votes / len(votes)
return conf
for p in short_pos.split('\n'):
documents.append((p, "pos"))
words = word_tokenize(p)
pos = nltk.pos_tag(words)
for w in pos:
if w[1][0] in allowed_word_types:
all_words.append(w[0].lower())
for p in short_neg.split('\n'):
documents.append((p, "neg"))
words = word_tokenize(p)
pos = nltk.pos_tag(words)
for w in pos:
if w[1][0] in allowed_word_types:
all_words.append(w[0].lower())
all_words = nltk.FreqDist(all_words)
word_features = list(all_words.keys())[:5000]
def find_features(document):
words = word_tokenize(document)
features = {}
for w in word_features:
features[w] = (w in words)
return features
featuresets = [(find_features(rev), category) for (rev, category) in documents]
random.shuffle(featuresets)
print(len(featuresets))
testing_set = featuresets[500:]
training_set = featuresets[:500]
classifier = nltk.NaiveBayesClassifier.train(training_set)
print("Original Naive Bayes Algo accuracy percent:", (nltk.classify.accuracy(classifier, testing_set)) * 100)
classifier.show_most_informative_features(15)
MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(training_set)
print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_classifier, testing_set)) * 100)