我收到错误索引超出我的行 doctopic = clf.fit_transform(dtm),在我的Data文件夹中我有两个CSV文件,有人可以解释如何修复此索引错误。
import os
print (os.getcwd())
import numpy as np
import langdetect
from stop_words import get_stop_words
CORPUS_PATH = os.path.join('C:\\Users\\mike120\\Downloads\\TM 09-25\\Data')
filenames = sorted([os.path.join(CORPUS_PATH, fn) for fn in
os.listdir(CORPUS_PATH)])
len(filenames)
filenames[:5]
import sklearn.feature_extraction.text as text
#lang = langdetect.detect(CORPUS_PATH)
lang_stop = get_stop_words('en')
vectorizer = text.CountVectorizer(input='filename', stop_words=lang_stop, min_df=2)
dtm = vectorizer.fit_transform(filenames).toarray()
vocab = np.array(vectorizer.get_feature_names())
dtm.shape
from sklearn import decomposition
num_topics = 20
num_top_words = 20
clf = decomposition.NMF(n_components=num_topics, random_state=1)
doctopic = clf.fit_transform(dtm)