我尝试在实例化类后调用_get_tfidf
应用程序。
考虑以下脚本:
text2vec.py
class text2vec():
def __init__(self, doc_list):
#Initialize
self.doc_list = doc_list
self.nlp, self.docs, self.docs_dict = self._preprocess(self.doc_list)
def get_tfidf(self):
docs_corpus = [self.docs_dict.doc2bow(doc) for doc in self.docs]
model_tfidf = TfidfModel(docs_corpus, id2word=self.docs_dict)
docs_tfidf = model_tfidf[docs_corpus]
docs_vecs = np.vstack([sparse2full(c, len(self.docs_dict)) for c in docs_tfidf])
return docs_vecs
def _get_docs_dict(self, docs):
docs_dict = Dictionary(docs)
#CAREFUL: For small corpus please carefully modify the parameters for filter_extremes, or simply comment it out.
docs_dict.filter_extremes(no_below=5, no_above=0.2)
docs_dict.compactify()
return docs_dict
# Preprocess docs
def _preprocess(self, doc_list):
#Load spacy model
nlp = spacy.load('en_core_web_sm')
#lemmatise docs
docs = [self._lemmatize_doc(nlp(doc)) for doc in doc_list]
#Get docs dictionary
docs_dict = self._get_docs_dict(docs)
return nlp, docs, docs_dict
main.py
import text2vec
t2v = text2vec.text2vec(doc_list)
# Use TFIDF
docs_tfidf = t2v.get_tfidf()
但是我得到这个错误:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-23-04f280ed20cf> in <module>()
1 # Use TFIDF
----> 2 docs_tfidf = t2v.get_tfidf()
~/SageMaker/sentences matching/text2vec.py in get_tfidf(self)
84 # Get TF-IDF vector for document list
85 def get_tfidf(self):
---> 86 docs_corpus = [self.docs_dict.doc2bow(doc) for doc in self.docs]
87 model_tfidf = TfidfModel(docs_corpus, id2word=self.docs_dict)
88 docs_tfidf = model_tfidf[docs_corpus]
AttributeError: 'text2vec' object has no attribute 'docs'
有什么想法吗?
谢谢