我在python 3.6.3中导入scikit学习包。我收到内存错误

时间:2017-10-18 14:27:40

标签: python-3.x scikit-learn

我正在使用python 3.6.3。所有包都已成功导入。我使用的是32位版本。我收到了这个内存错误:

Traceback (most recent call last):
  File "C:/Users/Lokesh/PycharmProjects/NLP/Training.py", line 103, in <module>

    print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_classifier, testing_set)) * 100)

File "C:\Users\Lokesh\AppData\Local\Programs\Python\Python36-32\lib\site-packages\nltk\classify\util.py", line 87, in accuracy
    results = classifier.classify_many([fs for (fs, l) in gold])

File "C:\Users\Lokesh\AppData\Local\Programs\Python\Python36-32\lib\site-packages\nltk\classify\scikitlearn.py", line 85, in classify_many
    X = self._vectorizer.transform(featuresets)

File "C:\Users\Lokesh\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\feature_extraction\dict_vectorizer.py", line 291, in transform
    return self._transform(X, fitting=False)

File "C:\Users\Lokesh\AppData\Local\Programs\Python\Python36-32\lib\site-packages\sklearn\feature_extraction\dict_vectorizer.py", line 171, in _transform
    indices.append(vocab[f])

的MemoryError

这是代码。使用mnb分类器

时,在第三行的最后一行发生错误
class VoteClassifier(ClassifierI):
    def __init__(self, *classifiers):
        self._classifiers = classifiers

    def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)

    def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf

for p in short_pos.split('\n'):
    documents.append((p, "pos"))
    words = word_tokenize(p)
    pos = nltk.pos_tag(words)
    for w in pos:
        if w[1][0] in allowed_word_types:
            all_words.append(w[0].lower())

for p in short_neg.split('\n'):
    documents.append((p, "neg"))
    words = word_tokenize(p)
    pos = nltk.pos_tag(words)
    for w in pos:
        if w[1][0] in allowed_word_types:
            all_words.append(w[0].lower())


all_words = nltk.FreqDist(all_words)

word_features = list(all_words.keys())[:5000]

def find_features(document):
    words = word_tokenize(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)

    return features


featuresets = [(find_features(rev), category) for (rev, category) in documents]

random.shuffle(featuresets)
print(len(featuresets))

testing_set = featuresets[500:]
training_set = featuresets[:500]

classifier = nltk.NaiveBayesClassifier.train(training_set)
print("Original Naive Bayes Algo accuracy percent:", (nltk.classify.accuracy(classifier, testing_set)) * 100)
classifier.show_most_informative_features(15)

MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(training_set)
print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_classifier, testing_set)) * 100)

0 个答案:

没有答案