我一直在项目中使用K-Means群集,但是要花很多时间才能获得响应,下面的代码显示了用法,任何人都可以帮助我。预先感谢!
for i in range(n_sentence):
enc_email = enc_sentence[i]
n_clusters = int(np.ceil(len(enc_email)**0.5))
kmeans = KMeans(n_clusters=n_clusters, random_state=0)
kmeans = kmeans.fit(enc_email)
avg = []
closest = []
for j in range(n_clusters):
idx = np.where(kmeans.labels_ == j)[0]
avg.append(np.mean(idx))
closest, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_,\
enc_email)
ordering = sorted(range(n_clusters), key=lambda k: avg[k])
summary[i] = ' '.join([sentence[i][closest[idx]] for idx in ordering])
print('Clustering Finished')