如果元组是另一个元组的子集,如何有效地找到它

时间:2018-06-12 10:02:59

标签: python subset

我知道set实施很棒。但是,它有一个巨大的限制:它不能处理项目的重复。

让我们使用整数:

tuple1 = (100, 100, 375)
layer = [(100, 100, 100, 375), (20, 100, 100, 375), (20, 20, 100, 375), (20, 20, 20, 375), (20, 20, 30, 375), (20, 20, 40, 375), (20, 20, 50, 375), (20, 20, 60, 375), (20, 20, 70, 375), (20, 20, 80, 375), (20, 30, 100, 375), (20, 30, 30, 375), (20, 30, 40, 375), (20, 30, 50, 375), (20, 30, 60, 375), (20, 30, 70, 375), (20, 30, 80, 375), (20, 40, 100, 375), (20, 40, 40, 375), (20, 40, 50, 375), (20, 40, 60, 375), (20, 40, 70, 375), (20, 40, 80, 375), (20, 50, 100, 375), (20, 50, 50, 375), (20, 50, 60, 375), (20, 50, 70, 375), (20, 50, 80, 375), (20, 60, 100, 375), (20, 60, 60, 375), (20, 60, 70, 375), (20, 60, 80, 375), (20, 70, 100, 375), (20, 70, 70, 375), (20, 70, 80, 375), (20, 80, 100, 375), (20, 80, 80, 375), (30, 100, 100, 375), (30, 30, 100, 375), (30, 30, 30, 375), (30, 30, 40, 375), (30, 30, 50, 375), (30, 30, 60, 375), (30, 30, 70, 375), (30, 30, 80, 375), (30, 40, 100, 375), (30, 40, 40, 375), (30, 40, 50, 375), (30, 40, 60, 375), (30, 40, 70, 375), (30, 40, 80, 375), (30, 50, 100, 375), (30, 50, 50, 375), (30, 50, 60, 375), (30, 50, 70, 375), (30, 50, 80, 375), (30, 60, 100, 375), (30, 60, 60, 375), (30, 60, 70, 375), (30, 60, 80, 375), (30, 70, 100, 375), (30, 70, 70, 375), (30, 70, 80, 375), (30, 80, 100, 100), (30, 80, 100, 375), (30, 80, 80, 375), (40, 100, 100, 375), (40, 40, 100, 375), (40, 40, 40, 375), (40, 40, 50, 375), (40, 40, 60, 375), (40, 40, 70, 375), (40, 40, 80, 375), (40, 50, 100, 375), (40, 50, 50, 375), (40, 50, 60, 375), (40, 50, 70, 375), (40, 50, 80, 375), (40, 60, 100, 375), (40, 60, 60, 375), (40, 60, 70, 375), (40, 60, 80, 375), (40, 70, 100, 375), (40, 70, 70, 375), (40, 70, 80, 375), (40, 80, 100, 375), (40, 80, 80, 375), (50, 100, 100, 375), (50, 50, 100, 375), (50, 50, 50, 375), (50, 50, 60, 375), (50, 50, 70, 375), (50, 50, 80, 375), (50, 50, 80, 80), (50, 60, 100, 375), (50, 60, 60, 375), (50, 60, 70, 375), (50, 60, 80, 375), (50, 70, 100, 375), (50, 70, 70, 375), (50, 70, 70, 80), (50, 70, 80, 375), (50, 80, 100, 375), (50, 80, 80, 375), (50, 80, 80, 80), (60, 100, 100, 375), (60, 60, 100, 375), (60, 60, 60, 375), (60, 60, 70, 375), (60, 60, 80, 375), (60, 70, 100, 375), (60, 70, 70, 375), (60, 70, 80, 375), (60, 80, 100, 375), (60, 80, 80, 375), (60, 80, 80, 80), (70, 100, 100, 375), (70, 70, 100, 375), (70, 70, 70, 375), (70, 70, 80, 375), (70, 80, 100, 100), (70, 80, 100, 375), (70, 80, 80, 375), (80, 100, 100, 375), (80, 80, 100, 375), (80, 80, 80, 100), (80, 80, 80, 375)]

我希望将layer中的项目保留为tuple1的子集。例如,必须保留2个第一个。

现在我正在使用for循环:

new_layer = list()
for elt in layer:
    copy = list(elt)
    for x in tuple1:
        if x in copy:
            copy.remove(x)
    if len(copy) == 1:
        continue
    else:
        new_layer.append(elt)

这将是一个比这更好的解决方案......而且最后的问题实际上更复杂。

我有5层:

  • 第1层:len的元素:2
  • 第2层:len:3
  • 的元素
  • 第3层:len:4
  • 的元素
  • 第4层:len:5
  • 的元素
  • 第5层:len:6
  • 的元素

目标是摆脱层N中派生(由子集构成)的元素的层N-k

感谢您的帮助:)

1 个答案:

答案 0 :(得分:1)

这是一种方法,子类化Counter并实施__contains__(对于in运算符):

from collections import Counter
class TupleCounter(Counter):
    def __init__(self, t):
        super().__init__(t)

    def __contains__(self, other):
        if not isinstance(other, self.__class__):
            other = self.__class__(other)

        for (v,c) in other.items():
            if self.get(v,0) < c:
                return False

        return True

示例:

needle = (100, 100, 375)
layer = [(100, 100, 100, 375), (20, 100, 100, 375), (20, 20, 100, 375), (20, 20, 20, 375), (20, 20, 30, 375), (20, 20, 40, 375), (20, 20, 50, 375), (20, 20, 60, 375), (20, 20, 70, 375), (20, 20, 80, 375), (20, 30, 100, 375), (20, 30, 30, 375), (20, 30, 40, 375), (20, 30, 50, 375), (20, 30, 60, 375), (20, 30, 70, 375), (20, 30, 80, 375), (20, 40, 100, 375), (20, 40, 40, 375), (20, 40, 50, 375), (20, 40, 60, 375), (20, 40, 70, 375), (20, 40, 80, 375), (20, 50, 100, 375), (20, 50, 50, 375), (20, 50, 60, 375), (20, 50, 70, 375), (20, 50, 80, 375), (20, 60, 100, 375), (20, 60, 60, 375), (20, 60, 70, 375), (20, 60, 80, 375), (20, 70, 100, 375), (20, 70, 70, 375), (20, 70, 80, 375), (20, 80, 100, 375), (20, 80, 80, 375), (30, 100, 100, 375), (30, 30, 100, 375), (30, 30, 30, 375), (30, 30, 40, 375), (30, 30, 50, 375), (30, 30, 60, 375), (30, 30, 70, 375), (30, 30, 80, 375), (30, 40, 100, 375), (30, 40, 40, 375), (30, 40, 50, 375), (30, 40, 60, 375), (30, 40, 70, 375), (30, 40, 80, 375), (30, 50, 100, 375), (30, 50, 50, 375), (30, 50, 60, 375), (30, 50, 70, 375), (30, 50, 80, 375), (30, 60, 100, 375), (30, 60, 60, 375), (30, 60, 70, 375), (30, 60, 80, 375), (30, 70, 100, 375), (30, 70, 70, 375), (30, 70, 80, 375), (30, 80, 100, 100), (30, 80, 100, 375), (30, 80, 80, 375), (40, 100, 100, 375), (40, 40, 100, 375), (40, 40, 40, 375), (40, 40, 50, 375), (40, 40, 60, 375), (40, 40, 70, 375), (40, 40, 80, 375), (40, 50, 100, 375), (40, 50, 50, 375), (40, 50, 60, 375), (40, 50, 70, 375), (40, 50, 80, 375), (40, 60, 100, 375), (40, 60, 60, 375), (40, 60, 70, 375), (40, 60, 80, 375), (40, 70, 100, 375), (40, 70, 70, 375), (40, 70, 80, 375), (40, 80, 100, 375), (40, 80, 80, 375), (50, 100, 100, 375), (50, 50, 100, 375), (50, 50, 50, 375), (50, 50, 60, 375), (50, 50, 70, 375), (50, 50, 80, 375), (50, 50, 80, 80), (50, 60, 100, 375), (50, 60, 60, 375), (50, 60, 70, 375), (50, 60, 80, 375), (50, 70, 100, 375), (50, 70, 70, 375), (50, 70, 70, 80), (50, 70, 80, 375), (50, 80, 100, 375), (50, 80, 80, 375), (50, 80, 80, 80), (60, 100, 100, 375), (60, 60, 100, 375), (60, 60, 60, 375), (60, 60, 70, 375), (60, 60, 80, 375), (60, 70, 100, 375), (60, 70, 70, 375), (60, 70, 80, 375), (60, 80, 100, 375), (60, 80, 80, 375), (60, 80, 80, 80), (70, 100, 100, 375), (70, 70, 100, 375), (70, 70, 70, 375), (70, 70, 80, 375), (70, 80, 100, 100), (70, 80, 100, 375), (70, 80, 80, 375), (80, 100, 100, 375), (80, 80, 100, 375), (80, 80, 80, 100), (80, 80, 80, 375)]

needle = TupleCounter(needle)
filtered = [t for t in layer if needle in TupleCounter(t)]
print(filtered)

输出:

[(100, 100, 100, 375), 
 (20, 100, 100, 375), 
 (30, 100, 100, 375), 
 (40, 100, 100, 375), 
 (50, 100, 100, 375), 
 (60, 100, 100, 375), 
 (70, 100, 100, 375), 
 (80, 100, 100, 375)
]

编辑:请注意,这只关心元组中值的频率,而不是它们在其中的相对位置。因此,即使元组的元素顺序相反,它也会匹配(375,100,100)。