我有一个元组列表:
[(3,4), (18,27), (4,14)]
并且需要合并具有重复数字的元组的代码,制作另一个列表,其中所有列表元素将仅包含唯一数字。列表应按元组的长度排序,即:
>>> MergeThat([(3,4), (18,27), (4,14)])
[(3,4,14), (18,27)]
>>> MergeThat([(1,3), (15,21), (1,10), (57,66), (76,85), (66,76)])
[(57,66,76,85), (1,3,10), (15,21)]
我理解它类似于层次聚类算法,我已经读过了,但无法解决它们。
MergeThat()函数的代码是否相对简单?
答案 0 :(得分:5)
我努力想到这一点,但只有在我尝试了Ian的答案之后(谢谢!)建议我意识到理论问题是什么:输入是一个边缘列表并定义了一个图形。我们正在寻找此图的强连接组件。这很简单。
虽然你可以do this efficiently,但实际上没有理由自己实施!只需导入good graph library:
import networkx as nx
# one of your examples
g1 = nx.Graph([(1,3), (15,21), (1,10), (57,66), (76,85), (66,76)])
print nx.connected_components(g1) # [[57, 66, 76, 85], [1, 10, 3], [21, 15]]
# my own test case
g2 = nx.Graph([(1,2),(2,10), (20,3), (3,4), (4,10)])
print nx.connected_components(g2) # [[1, 2, 3, 4, 10, 20]]
答案 1 :(得分:4)
import itertools
def merge_it(lot):
merged = [ set(x) for x in lot ] # operate on sets only
finished = False
while not finished:
finished = True
for a, b in itertools.combinations(merged, 2):
if a & b:
# we merged in this iteration, we may have to do one more
finished = False
if a in merged: merged.remove(a)
if b in merged: merged.remove(b)
merged.append(a.union(b))
break # don't inflate 'merged' with intermediate results
return merged
if __name__ == '__main__':
print merge_it( [(3,4), (18,27), (4,14)] )
# => [set([18, 27]), set([3, 4, 14])]
print merge_it( [(1,3), (15,21), (1,10), (57,66), (76,85), (66,76)] )
# => [set([21, 15]), set([1, 10, 3]), set([57, 66, 76, 85])]
print merge_it( [(1,2), (2,3), (3,4), (4,5), (5,9)] )
# => [set([1, 2, 3, 4, 5, 9])]
这是一个片段(包括doctests):http://gist.github.com/586252
答案 2 :(得分:1)
def collapse(L):
""" The input L is a list that contains tuples of various sizes.
If any tuples have shared elements,
exactly one instance of the shared and unshared elements are merged into the first tuple with a shared element.
This function returns a new list that contain merged tuples and an int that represents how many merges were performed."""
answer = []
merges = 0
seen = [] # a list of all the numbers that we've seen so far
for t in L:
tAdded = False
for num in t:
pleaseMerge = True
if num in seen and pleaseMerge:
answer += merge(t, answer)
merges += 1
pleaseMerge = False
tAdded= True
else:
seen.append(num)
if not tAdded:
answer.append(t)
return (answer, merges)
def merge(t, L):
""" The input L is a list that contains tuples of various sizes.
The input t is a tuple that contains an element that is contained in another tuple in L.
Return a new list that is similar to L but contains the new elements in t added to the tuple with which t has a common element."""
answer = []
while L:
tup = L[0]
tupAdded = False
for i in tup:
if i in t:
try:
L.remove(tup)
newTup = set(tup)
for i in t:
newTup.add(i)
answer.append(tuple(newTup))
tupAdded = True
except ValueError:
pass
if not tupAdded:
L.remove(tup)
answer.append(tup)
return answer
def sortByLength(L):
""" L is a list of n-tuples, where n>0.
This function will return a list with the same contents as L
except that the tuples are sorted in non-ascending order by length"""
lengths = {}
for t in L:
if len(t) in lengths.keys():
lengths[len(t)].append(t)
else:
lengths[len(t)] = [(t)]
l = lengths.keys()[:]
l.sort(reverse=True)
answer = []
for i in l:
answer += lengths[i]
return answer
def MergeThat(L):
answer, merges = collapse(L)
while merges:
answer, merges = collapse(answer)
return sortByLength(answer)
if __name__ == "__main__":
print 'starting'
print MergeThat([(3,4), (18,27), (4,14)])
# [(3, 4, 14), (18, 27)]
print MergeThat([(1,3), (15,21), (1,10), (57,66), (76,85), (66,76)])
# [(57, 66, 76, 85), (1, 10, 3), (15, 21)]
答案 3 :(得分:0)
这是另一个不使用itertools
并采用不同的,稍微冗长的方法的解决方案。此解决方案的棘手问题是在t0 in index and t1 in index
时合并集群集。
import doctest
def MergeThat(a):
""" http://stackoverflow.com/questions/3744048/python-how-to-merge-a-list-into-clusters
>>> MergeThat([(3,4), (18,27), (4,14)])
[(3, 4, 14), (18, 27)]
>>> MergeThat([(1,3), (15,21), (1,10), (57,66), (76,85), (66,76)])
[(57, 66, 76, 85), (1, 3, 10), (15, 21)]
"""
index = {}
for t0, t1 in a:
if t0 not in index and t1 not in index:
index[t0] = set()
index[t1] = index[t0]
elif t0 in index and t1 in index:
index[t0] |= index[t1]
oldt1 = index[t1]
for x in index.keys():
if index[x] is oldt1:
index[x] = index[t0]
elif t0 not in index:
index[t0] = index[t1]
else:
index[t1] = index[t0]
assert index[t0] is index[t1]
index[t0].add(t0)
index[t0].add(t1)
return sorted([tuple(sorted(x)) for x in set(map(frozenset, index.values()))], key=len, reverse=True)
if __name__ == "__main__":
import doctest
doctest.testmod()
答案 4 :(得分:0)
其他人编写的代码肯定会起作用,但这是另一种选择,可能更容易理解,也可能算法复杂度更低。
保持字典从数字到集群(实现为python集),他们是其中的成员。还要在相应的集合中包含该数字。将输入对处理为:
然后,只需从字典中收集唯一值,然后按大小的降序排序。这部分工作是O(m log n),因此不会支配运行时。
这应该只需一次通过。编写实际代码留给读者练习。
答案 5 :(得分:0)
这对于大型列表来说效率不高。
def merge_that(lot):
final_list = []
while len(lot) >0 :
temp_set = set(lot[0])
deletable = [0] #list of all tuples consumed by temp_set
for i, tup2 in enumerate(lot[1:]):
if tup2[0] in temp_set or tup2[1] in temp_set:
deletable.append(i)
temp_set = temp_set.union(tup2)
for d in deletable:
del lot[d]
deletable = []
# Some of the tuples consumed later might have missed their brothers
# So, looping again after deleting the consumed tuples
for i, tup2 in enumerate(lot):
if tup2[0] in temp_set or tup2[1] in temp_set:
deletable.append(i)
temp_set = temp_set.union(tup2)
for d in deletable:
del lot[d]
final_list.append(tuple(temp_set))
return final_list
它看起来很难看但很有效。