我正在尝试使用SparkML Kmeans构建肘形图
我尝试设置种子,但是当我绘制曲线时,我发现SumOfSquaredDistance没有正确减小。
这是我当前的曲线current_elbow
有人有什么解释吗?
def getKmeansModel(k: Int, df_with_features: DataFrame,features_colname: String) : KMeansModel = {
// Trains a k-means model.
println("Trains a k-means model with n of clusters: "+k)
val kmeans = new KMeans().setK(k).setSeed(123456789L).setFeaturesCol(features_colname)
val model = kmeans.fit(df_with_features)
model
}
def getKmeanWithinSetSumSquaredErrors(model: KMeansModel): (Int,Double) = {
val training_summary = model.summary
(training_summary.k,training_summary.trainingCost)
}
def getElbowDf(min_k: Int, max_k: Int, df_with_features: DataFrame,features_colname: String) : List[(Int, Double)] =
{
val range_list: List[Int] = min_k until max_k+1 toList
var result = scala.collection.mutable.Map[Int, Double]()
@tailrec
def iter(range_list:List[Int], result:scala.collection.mutable.Map[Int,Double]):List[(Int, Double)] =
if(range_list.isEmpty) result.toList
else iter(range_list.tail,result + getKmeanWithinSetSumSquaredErrors(getKmeansModel(range_list.head,df_with_features,features_colname)))
iter(range_list, result)
}