这是我第一次使用gensim包运行LDA模型,问题发生如下:
我昨晚训练并将lda模型保存到本地文件'lda.model',当我尝试使用run()函数在新的,看不见的文档上进行主题分发:
self.lda = models.LdaMulticore.load('./lda_model/lda.model')
print self.lda[self.corpus_tfidf]
然后发生错误!
AttributeError: 'LdaMulticore' object has no attribute 'minimum_probability'
以下是我的源代码,你能帮助我吗?
# -*- coding: utf-8 -*-
#@author: chenbjin
#@time: 2015-08-3
import jieba, os, logging
from base_func import *
from gensim import corpora, models, similarities
class LDAModel(object):
"""docstring for LDAModel"""
def __init__(self, train_data='./data/train_set.txt'):
super(LDAModel, self).__init__()
self.dic = None
self.corpus = None
self.tfidf_model = None
self.corpus_tfidf = None
self.lda_model = None
self.train_set = load_train_set(train_data)
self.test_set = None
def train(self):
self.dic = corpora.Dictionary(self.train_set)
self.corpus = [ self.dic.doc2bow(text) for text in self.train_set ]
self.tfidf_model = models.TfidfModel.load('./tfidf_model/tfidf.model')
#self.tfidf_model = models.TfidfModel(self.corpus)
#self.tfidf_model.save('./tfidf_model/tfidf.model')
self.corpus_tfidf = self.tfidf_model[self.corpus]
self.lda_model = models.LdaMulticore(self.corpus_tfidf, id2word = self.dic, num_topics = 50)
self.lda_model.save('./lda_model/lda.model')
def run(self, test_set='./data/test_set.txt'):
self.test_set = load_train_set(test_set)
self.dic = corpora.Dictionary(self.test_set)
self.corpus = [ self.dic.doc2bow(text) for text in self.test_set ]
self.tfidf_model = models.TfidfModel.load('./tfidf_model/tfidf.model')
self.corpus_tfidf = self.tfidf_model[self.corpus]
self.lda = models.LdaMulticore.load('./lda_model/lda.model')
print self.lda[self.corpus_tfidf]
def main():
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
lda = LDAModel(train_data=None)
lda.run()
if __name__ == '__main__':
main()