Python Kruskal的聚类算法

时间:2017-08-27 22:52:40

标签: python algorithm python-3.x graph-algorithm kruskals-algorithm

我试图找出如何通过为一系列(x,y)坐标确定具有修改的Kruskal的聚类来找到不同聚类中的点之间的最小距离。在过去的半个星期里,我每天花费3-4个小时,但我不觉得我那么接近。我的代码如下:

#Uses python3
import sys
import math

class Point(object):
    x = ""
    y = ""
    index = ""
    set = ""
    neighbors = None

    def __init__(self, x, y, index, set):
        self.x = x
        self.y = y
        self.index = index
        self.set = set

    def setSet(self, set):
        self.set = set

    def getSet(self):
        return self.set

    def addNeighbor(self, point):
        if self.neighbors == None:
            self.neighbors = [point]
        else:
            self.neighbors.append(point)

    def getNeighbors(self):
        return self.neighbors

class Edge(object):
    point1 = None
    point2 = None
    length = None

    def __init__(self, pt1, pt2):
        length = (((pt1.x - pt2.x)**2) + ((pt1.y - pt2.y)**2))**(.5)
        # print("Pt 1: " + str(pt1.x) + ", " + str(pt1.y))
        # print("Pt 2: " + str(pt2.x) + ", " + str(pt2.y))
        # print(length)
        self.point1 = pt1
        self.point2 = pt2
        self.length = length

    def getLength(self):
        return self.length

def clustering(x, y, k):
    points = []
    numSets = len(x)
    edges = []
    #write your code here
    for i in range(len(x)):
        new_point = Point(x[i], y[i], i, i)
        points.append(new_point)
    for point in points:
        for neighbor in points:
            if not point is neighbor:
                if point.neighbors == None:
                    # print(point + ": " + neighbor)
                    edge = Edge(point, neighbor)
                    edges.append(edge)
                    point.addNeighbor(neighbor)
                    neighbor.addNeighbor(point)
                    # print (edge.getLength())
                else:
                    if neighbor not in point.neighbors:
                        edge = Edge(point, neighbor)
                        edges.append(edge)
                        point.addNeighbor(neighbor)
                        neighbor.addNeighbor(point)
    edges.sort(key=lambda edge: edge.getLength())
    if k == len(points):
        smallestEdge = edges[0]
        return smallestEdge.length
    while numSets > k:
        last_edge = 0
        for edge in edges:
            # print("edge" + edge)
            pointA = edge.point1
            pointB = edge.point2
            if pointA.set != pointB.set:
                # print("merge points")
                if pointA.set < pointB.set:
                    oldSet = pointB.set
                    pointB.setSet(pointA.set)
                    for point in points:
                        if point.set == oldSet:
                            point.setSet(pointA.set)
                    numSets = numSets - 1
                if pointB.set < pointA.set:
                    oldSet = pointA.set
                    pointA.setSet(pointB.set)
                    for point in points:
                        if point.set == oldSet:
                            point.setSet(pointB.set)
                    numSets = numSets - 1
            # print(numSets)s
            # print("Point A: (" + str(pointA.x) + ", " + str(pointA.y) + ")/Point B: (" + str(pointB.x) + ", " + str(pointB.y) + "), Length: " + str(edge.length))
            last_edge += 1
            if numSets <= k:
                break

        # print(len(sets)
    nextEdge = edges[last_edge + 1]
    pointA = nextEdge.point1
    pointB = nextEdge.point2
    # print("Point A: (" + str(pointA.x) + ", " + str(pointA.y) + ")/Point B: (" + str(pointB.x) + ", " + str(pointB.y) + "), Length: " + str(nextEdge.length))

    return nextEdge.length


if __name__ == '__main__':
    input = sys.stdin.read()
    data = list(map(int, input.split()))
    n = data[0]
    data = data[1:]
    x = data[0:2 * n:2]
    y = data[1:2 * n:2]
    data = data[2 * n:]
    k = data[0]
    print("{0:.9f}".format(clustering(x, y, k)))

输入的形式为:

Number of points
x1, y1
x2, y2
.
.
.
.
Number of clusters

它适用于我提供的测试用例(这是针对一个类),但是由于测试用例我不确定它是什么时尝试验证它然后失败,所以我真的不是确定导致此失败的原因是什么并且无法找出它是什么。我在这里做错了什么/多少事(我确定有很多)?

0 个答案:

没有答案