以下代码测试KMeans中的多个n_clusters,并尝试通过惯性准则找到“最佳” n_clusters。但是,它是不可复制的:即使固定random_state,每次我在同一数据集上调用kmeans(df)时,它也会生成不同的聚类-甚至是不同的n_clusters。我在这里想念东西吗?
from sklearn.cluster import KMeans
from tqdm import tqdm_notebook
def kmeans(df):
inertia = []
models = {}
start = 3
end = 40
for i in tqdm_notebook(range (start, end)):
k = KMeans(n_clusters=i, init='k-means++', n_init=50, random_state=10, n_jobs=-1).fit(df.values)
inertia.append(k.inertia_)
models[i] = k
ep = np.argmax(np.gradient(np.gradient(np.array(inertia)))) + start
return models[ep]