我的数据格式如下:
Group Item-1 Item-2
0 7 13
0 10 4
1 2 8
1 3 1
1 4 3
1 6 28
1 8 6
...
我需要Python 2.7为每个组输出Item-1和Item-2之间的增长链/树/连接。因此对于除0-13和10-4之外的第0组,没有链/连接,但对于第1组,输出将类似于:
1 (2, 8, 6, 28), (4, 3, 1)
答案 0 :(得分:0)
这似乎是union-find或disjoint-set算法的一种情况。这是我用来保存在工具箱中的一个实现:
from collections import defaultdict
class UnionFind:
def __init__(self):
self.leaders = defaultdict(lambda: None)
def find(self, x):
l = self.leaders[x]
if l is not None:
l = self.find(l)
self.leaders[x] = l
return l
return x
def union(self, x, y):
lx, ly = self.find(x), self.find(y)
if lx != ly:
self.leaders[lx] = ly
def get_groups(self):
groups = defaultdict(set)
for x in self.leaders:
groups[self.find(x)].add(x)
return groups
以下是如何将其应用于您的数据:
# parse data
data = """Group Item-1 Item-2
0 7 13
0 10 4
1 2 8
1 3 1
1 4 3
1 6 28
1 8 6"""
data = [[int(x) for x in line.split()] for line in data.splitlines()[1:]]
# get mapping {group_number: [list of pairs]}
groups = defaultdict(list)
for g, x, y in data:
groups[g].append((x, y))
# for each group, add pairs to union find structure and get groups
for group, links in groups.items():
union = UnionFind()
for x, y in links:
union.union(x, y)
print group, union.get_groups().values()
输出是:
0 [set([10, 4]), set([13, 7])]
1 [set([1, 3, 4]), set([8, 2, 28, 6])]