获得每个集群的成员 - mahout

时间:2012-11-04 21:22:47

标签: mahout

    KMeansExample kmE = new KMeansExample();
    ArrayList<Integer> data = kmE.readData("c2010.csv"); // List of number


    int k = 3;
    List<Vector> dividedData = divideData(data, k); //divides data into list of k vectors

    List<Cluster> clusters = new ArrayList<>();


    /*   initial cluatersusing mahout */
    int clusterId = 0;
    for (Vector v : dividedData) {
        clusters.add(new Cluster(v, clusterId++, new EuclideanDistanceMeasure()));
    }


    /* final cluster using mahout KMeansClusterer */
    List<List<Cluster>> finalClusters = KMeansClusterer.clusterPoints(
            dividedData, clusters, new EuclideanDistanceMeasure(), 3, 50);
    kmE.printFinalClusters(finalClusters);


    where 

    private void printFinalClusters(List<List<Cluster>> fc) {
    for (List<Cluster> l : fc) {
        for (Cluster c : l) {
            System.out.println(c.asFormatString());
        }
    }
}

结果是: -

  

CL-0:   {67:247.0,66:193.0,65:271.0,64:171.0,63:158.0,62:396.0,61:407.0,60:432.0,59:200.0,58:791.0,57:406.0,56:205.0,55 :55.0,54:394.0,53:230.0,52:38.0,51:508.0,50:414.0,49:52.0,48:572.0,47:121.0,46:724.0,45:1421.0,44:706.0,43:603.0 ,42:525.0,41:181.0,40:476.0,39:642.0,38:761.0,37:340.0,36:1273.0,35:1692.0,34:687.0,33:497.0,32:91.0,31:309.0,30 :1534.0,29:299.0,28:961.0,27:656.0,26:310.0,25:483.0,24:796.0,23:247.0,22:682.0,21:1348.0,20:1394.0,19:368.0,18:491.0 ,17:263.0,16:1117.0,15:1792.0,14:398.0,13:185.0,12:457.0,11:505.0,10:297.0,9:919.0,8:723.0,7:203.0,6:2566.0,5 :450.0,4:404.0,3:459.0,2:254.0,1:177.0,0:269.0} CL-1:   {67:193.0,66:271.0,65:171.0,64:158.0,63:396.0,62:407.0,61:432.0,60:200.0,59:791.0,58:406.0,57:205.0,56:55.0,55 :394.0,54:230.0,53:38.0,52:5​​08.0,51:414.0,50:52.0,49:572.0,48:121.0,47:724.0,46:1421.0,45:706.0,44:603.0,43:525.0 ,42:181.0,41:476.0,40:642.0,39:761.0,38:340.0,37:1273.0,36:1692.0,35:687.0,34:497.0,33:91.0,32:309.0,31:1534.0,30 :299.0,29:961.0,28:656.0,27:310.0,26:483.0,25:796.0,24:247.0,23:682.0,22:1348.0,21:1394.0,20:368.0,19:491.0,18:263.0 ,17:1117.0,16:1792.0,15:398.0,14:185.0,13:457.0,12:505.0,11:297.0,10:919.0,9:723.0,8:203.0,7:2566.0,6:450.0,5 :404.0,4:459.0,3:254.0,2:177.0,1:269.0,0:247.0} CL-2:   {67:271.0,66:171.0,65:158.0,64:396.0,63:407.0,62:432.0,61:200.0,60:791.0,59:406.0,58:205.0,57:55.0,56:394.0,55 :230.0,54:38.0,53:508.0,52:414.0,51:52.0,50:572.0,49:121.0,48:724.0,47:1421.0,46:706.0,45:603.0,44:525.0,43:181.0 ,42:476.0,41:642.0,40:761.0,39:340.0,38:1273.0,37:1692.0,36:687.0,35:497.0,34:91.0,33:309.0,32:1534.0,31:299.0,30 :961.0,29:656.0,28:310.0,27:483.0,26:796.0,25:247.0,24:682.0,23:1348.0,22:1394.0,21:368.0,20:491.0,19:263.0,18:1117.0 ,17:1792.0,16:398.0,15:185.0,14:457.0,13:505.0,12:297.0,11:919.0,10:723.0,9:203.0,8:2566.0,7:450.0,6:404.0,5 :459.0,4:254.0,3:177.0,2:269.0,1:247.0,0:193.0} VL-0:   {67:247.0,66:193.0,65:271.0,64:171.0,63:158.0,62:396.0,61:407.0,60:432.0,59:200.0,58:791.0,57:406.0,56:205.0,55 :55.0,54:394.0,53:230.0,52:38.0,51:508.0,50:414.0,49:52.0,48:572.0,47:121.0,46:724.0,45:1421.0,44:706.0,43:603.0 ,42:525.0,41:181.0,40:476.0,39:642.0,38:761.0,37:340.0,36:1273.0,35:1692.0,34:687.0,33:497.0,32:91.0,31:309.0,30 :1534.0,29:299.0,28:961.0,27:656.0,26:310.0,25:483.0,24:796.0,23:247.0,22:682.0,21:1348.0,20:1394.0,19:368.0,18:491.0 ,17:263.0,16:1117.0,15:1792.0,14:398.0,13:185.0,12:457.0,11:505.0,10:297.0,9:919.0,8:723.0,7:203.0,6:2566.0,5 :450.0,4:404.0,3:459.0,2:254.0,1:177.0,0:269.0} VL-1:   {67:193.0,66:271.0,65:171.0,64:158.0,63:396.0,62:407.0,61:432.0,60:200.0,59:791.0,58:406.0,57:205.0,56:55.0,55 :394.0,54:230.0,53:38.0,52:5​​08.0,51:414.0,50:52.0,49:572.0,48:121.0,47:724.0,46:1421.0,45:706.0,44:603.0,43:525.0 ,42:181.0,41:476.0,40:642.0,39:761.0,38:340.0,37:1273.0,36:1692.0,35:687.0,34:497.0,33:91.0,32:309.0,31:1534.0,30 :299.0,29:961.0,28:656.0,27:310.0,26:483.0,25:796.0,24:247.0,23:682.0,22:1348.0,21:1394.0,20:368.0,19:491.0,18:263.0 ,17:1117.0,16:1792.0,15:398.0,14:185.0,13:457.0,12:505.0,11:297.0,10:919.0,9:723.0,8:203.0,7:2566.0,6:450.0,5 :404.0,4:459.0,3:254.0,2:177.0,1:269.0,0:247.0} VL-2:   {67:271.0,66:171.0,65:158.0,64:396.0,63:407.0,62:432.0,61:200.0,60:791.0,59:406.0,58:205.0,57:55.0,56:394.0,55 :230.0,54:38.0,53:508.0,52:414.0,51:52.0,50:572.0,49:121.0,48:724.0,47:1421.0,46:706.0,45:603.0,44:525.0,43:181.0 ,42:476.0,41:642.0,40:761.0,39:340.0,38:1273.0,37:1692.0,36:687.0,35:497.0,34:91.0,33:309.0,32:1534.0,31:299.0,30 :961.0,29:656.0,28:310.0,27:483.0,26:796.0,25:247.0,24:682.0,23:1348.0,22:1394.0,21:368.0,20:491.0,19:263.0,18:1117.0 ,17:1792.0,16:398.0,15:185.0,14:457.0,13:505.0,12:297.0,11:919.0,10:723.0,9:203.0,8:2566.0,7:450.0,6:404.0,5 :459.0,4:254.0,3:177.0,2:269.0,1:247.0,0:193.0}

但是如何获得每个群集的成员?

1 个答案:

答案 0 :(得分:0)

上述方法能够找到聚类的质心,但是要为每个向量分配聚类质心的最后一步是完成的。

下一步,您可以逐个获取每个向量并为其指定最近的质心。