file0 = open('wiki0.txt','r')
content= file0.readlines()
file1 = open('wiki1.txt','r')
content= file1.readlines()
file2 = open('wiki2.txt','r')
content= file2.readlines()
file3 = open('wiki3.txt','r')
content= file3.readlines()
file4 = open('wiki4.txt','r')
content= file4.readlines()
doc_complete = [file0, file1, file2, file3, file4]
import sklearn
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2,
max_features=no_features, stop_words='english')
tf = tf_vectorizer.fit_transform(doc_complete)
tf_feature_names = tf_vectorizer.get_feature_names()
from sklearn.decomposition import NMF, LatentDirichletAllocation,
TruncatedSVD
from sklearn.feature_extraction.text import CountVectorizer
NUM_TOPICS = 4
vectorizer = CountVectorizer(min_df=5, max_df=0.9,
stop_words='english', lowercase=True,
token_pattern='[a-zA-Z\-][a-zA-Z\-]{2,}')
data_vectorized = vectorizer.fit_transform(doc__complete)
lda_model = LatentDirichletAllocation(n_topics=NUM_TOPICS, max_iter=10,
learning_method='online')
lda_Z = lda_model.fit_transform(data_vectorized)
print(lda_Z.shape)
我收到以下错误。请你帮帮我。
ImportError Traceback(最近一次调用 最后)in() ----> 1来自sklearn.feature_extraction.text导入TfidfVectorizer,CountVectorizer 2 tf_vectorizer = CountVectorizer(max_df = 0.95,min_df = 2,max_features = no_features,stop_words =' english') 3 tf = tf_vectorizer.fit_transform(doc_complete) 4 tf_feature_names = tf_vectorizer.get_feature_names()
C:\用户\律 annu \应用程序数据\本地\ Enthought \雨棚\用户\ LIB \站点包\ sklearn__init __。PY in() 131#进程,因为它可能尚未编译 132其他: - > 133来自。 import __check_build 来自.base导入克隆的134 135 __check_build#avoid flakes unused variable error
ImportError:无法导入名称__check_build