我有以下代码:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
titles1 = open("user1/user1_complete_titles.txt",'r')
vectorizer = TfidfVectorizer(max_df = 0.9, max_features = 500, min_df = 0, use_idf = True)
X1 = vectorizer.fit_transform(titles1)
#idf = vectorizer.idf_
print(X1.shape)
terms = vectorizer.get_feature_names()
dist = 1 - cosine_similarity(X1)
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
#from scipy import cluster
from matplotlib import pyplot
true_k = 10
model = KMeans(n_clusters = true_k, init = 'k-means++', max_iter = 500, n_init = 100)
model.fit(X1)
print("Top terms per cluster:")
order_centroids = model.cluster_centers_.argsort()[:, ::-1]
terms = vectorizer.get_feature_names()
for i in range(true_k):
print( "Cluster %d:" % i),
for ind in order_centroids[i, :10]:
print( ' %s' % terms[ind]) #file = open("user1/user1_clusters.csv",'a')),
print
此代码成功打印了10个群集,每个群集中有10个项目。但是,我将如何绘制这些簇?此外,是否有办法绘制群集达到最佳状态的过程?谢谢