使用javascript的k均值聚类算法收敛但不稳定

时间:2019-06-04 15:55:31

标签: javascript

尽管如此,我还是收敛了,刷新算法时结果永远不会相同。即使数据观测值是同一数据集,也会发生这种情况。谁能告诉我我的方法在哪里错误?对于我一生,我无法弄清楚该过程在哪里出错。

validateForm = () => this.state.username.length > 0 && this.state.password.length > 0;

实施     '''     var k50 = new jsi.kmeans2(5,Array50,canvas,function(con,centroids){             var count50 = 0;

'''
function kmeans2(k, data, canvas, converge) {
    this.canvas = jsHS.GetDimensions(canvas);
    this.k = k;
    this.centroids = []; // Array of centroids
    this.centroids2compare = [];
    this.data = data;
    this.converge = converge;
    this.init();
}
kmeans2.prototype.distance = function () {
    var dif = 0,
        iArray = jsHS.isArray(arguments);
    if (iArray) {
        if (arguments.length > 2) {
            for (var i = 0; i < arguments.length; i+2) {
                var p0 = arguments[i],
                    p1 = arguments[i + 1];
                dif += Math.pow(p0[0] - p1[0], 2);
                dif += Math.pow(p0[1] - p1[1], 2);
            }
        }
        else {
            var pd0 = arguments[0],
                pd1 = arguments[1];
            dif += Math.pow(pd0[0] - pd1[0], 2);
            dif += Math.pow(pd0[1] - pd1[1], 2);
        }
    }
    return Math.sqrt(dif);
};
kmeans2.prototype.Means = function (Array) {
    var bin = 0;
    [].forEach.call(Array, function(a){
        bin += a;
    });
    return bin / Array.length;
};
kmeans2.prototype.init = function () {
    for (var l = 0; l < this.k; l++) {
        var dataItem = this.data[Math.floor(Math.random() * this.data.length)];
        this.centroids.push(dataItem);
    }
    for (var i = 0; i < this.centroids.length; i++) {
        if (i > 0) {
            var distance = this.distance(this.centroids[i], this.centroids[i - 1]);
            console.log(distance);
        }
    }
    this.clusterCentroids(); // return centroid center after calculating means.
};
kmeans2.prototype.clusterCentroids = function () {
    var points0 = [];
    this.centroids2compare = this.centroids;

    // Find distances between centroid and observations.
    for (var d = 0; d < this.data.length; d++) {
        var cinbin = [];
        for (var c0 = 0; c0 < this.k; c0++) {
            var dis = this.distance(this.centroids[c0], this.data[d]);
            cinbin.push({ 'cid': c0, 'distance': dis });
        }
        var minResult = cinbin.reduce((cid, obj) => {
            return obj.distance < cid.distance ? obj : cid;
        });
        points0.push({ 'id': d, 'datapoint': this.data[d], 'centroid': minResult.cid });
    }

    // Assign observations their appropriate centroid.
    var centroidBin = [];
    for (var c = 0; c < this.k; c++) {
        var cb = [];
        for (var p = 0; p < points0.length; p++) {
            if (c === points0[p].centroid) {
                cb.push(points0[p]);
            }
        }
        centroidBin.push(cb);
    }

    // Calculate the mean distance between centroids and its assigned observations.
    this.centroids = [];
    for (var bin = 0; bin < centroidBin.length; bin++) {
        var xAxis = [],
            yAxis = [],
            cb0 = centroidBin[bin];
        [].forEach.call(cb0, function (dp) {
            xAxis.push(dp.datapoint[0]);
            yAxis.push(dp.datapoint[1]);
        });

        var xMean = this.Means(xAxis);
        var yMean = this.Means(yAxis);
        this.centroids.push([xMean, yMean]);
    }

    // Test for convergence. If stored centroids equal new centroids then convergence is achieved.
    if (JSON.stringify(this.centroids2compare) !== JSON.stringify(this.centroids)) {
        this.centroids2compare = [];
        points0 = [];
        this.clusterCentroids();
    }
    else {
        this.converge(centroidBin, this.centroids);
    }
};
window['jsHS']['kmeans2'] = kmeans2;
'''

'''

此示例将质心绘制在画布区域上足够精细,但是当浏览器刷新质心更改时。

1 个答案:

答案 0 :(得分:0)

我没看太多代码,但是我知道k-means算法在您多次运行时会给出不同的结果。这是因为它高度依赖于第一个质心(随机选择)的位置。 该算法可以找到一个局部最小值并在那里“卡住”并终止。 无法保证首次运行时会找到全局最小值。