如何将Sklearn LDA模型输出保存到csv? 它没有将show_topics命令作为genism lDA模型。
def selected_topics(model, vectorizer, top_n=10):
for idx, topic in enumerate(model.components_):
print("Topic %d:" % (idx))
print([(vectorizer.get_feature_names()[i], topic[i])
for i in topic.argsort()[:-top_n - 1:-1]])
这对打印很有帮助,但是如何将这些结果保存到csv?
答案 0 :(得分:0)
def selected_topics(model, vectorizer, top_n=10):
results={}
for idx, topic in enumerate(model.components_):
topicId='Topic'+str(idx)
print("Topic %d:" % (idx))
topic_name = " ".join([(vectorizer.get_feature_names()[i]
for i in topic.argsort()[:-top_n - 1:-1]])
results[topicId]=topic_name
return results
您可以将结果写入Json,然后写入CSV文件
import json,csv
results = selected_topics(model, vectorizer, top_n=10)
res_file = open(outputFile,'w')
res_file.write(json.dumps(results))
res_file.close()
input = open(res_file)
data = json.load(input)
input.close()
output = csv.writer("output_csv.csv")
output.writerow(data[0].keys()) # header row
for item in data:
output.writerow(item.values())
让我知道这是否对您没有帮助
答案 1 :(得分:0)
我自己找到了一个解决方案。运行循环对我有用。
def show_topics(vectorizer=vectorizer, lda_model=lda, n_words=20):
keywords = np.array(vectorizer.get_feature_names())
topic_keywords = []
for topic_weights in lda_model.components_:
top_keyword_locs = (-topic_weights).argsort()[:n_words]
topic_keywords.append(keywords.take(top_keyword_locs))
return topic_keywords
topic_keywords = show_topics(vectorizer=vectorizer, lda_model=lda, n_words=15)
# Topic - Keywords Dataframe
df_topic_keywords = pd.DataFrame(topic_keywords)
df_topic_keywords.columns = ['Word '+str(i) for i in range(df_topic_keywords.shape[1])]
df_topic_keywords.index = ['Topic '+str(i) for i in range(df_topic_keywords.shape[0])]
df_topic_keywords
答案 2 :(得分:-1)
您可以通过首先创建一个熊猫数据框并将LDA模型结果保存到该数据框(通过循环)来导出结果。稍后将其导出到csv文件。
import pandas as pd
import csv
pd.DataFrame(savedresults).to_csv("all_model_ouput.csv")