没有设置!
我不能使用Sets因为:
仅使用范围的端点,是否有一种减去两个范围列表的最佳方法?
r1 = (1, 1000), (1100, 1200)
r2 = (30, 50), (60, 200), (1150, 1300)
r1 - r2 = (1, 29), (51, 59), (201, 1000), (1100, 1149)
感谢。
答案 0 :(得分:11)
interval包可能会提供您所需要的一切。
from interval import Interval, IntervalSet
r1 = IntervalSet([Interval(1, 1000), Interval(1100, 1200)])
r2 = IntervalSet([Interval(30, 50), Interval(60, 200), Interval(1150, 1300)])
print(r1 - r2)
>>> [1..30),(50..60),(200..1000],[1100..1150)
答案 1 :(得分:5)
一个解决方案(除了此处介绍的所有其他不同解决方案之外)是使用区间/段树(它们实际上是相同的):
http://en.wikipedia.org/wiki/Segment_tree
http://en.wikipedia.org/wiki/Interval_tree
这样做的一个很大的好处是,使用相同的代码进行任意布尔操作(不仅仅是减法)是微不足道的。 de Berg中对此数据结构进行了标准处理。要对一对间隔树执行任何布尔操作(包括减法),只需将它们合并在一起即可。这是一些(不可否认的天真)Python代码,用于实现不平衡范围树。它们不平衡的事实对合并树所花费的时间没有影响,但是这里的树构造是真正愚蠢的部分,最终是二次的(除非通过分区执行reduce,我有点怀疑)。无论如何,你去:
class IntervalTree:
def __init__(self, h, left, right):
self.h = h
self.left = left
self.right = right
def merge(A, B, op, l=-float("inf"), u=float("inf")):
if l > u:
return None
if not isinstance(A, IntervalTree):
if isinstance(B, IntervalTree):
opT = op
A, B, op = B, A, (lambda x, y : opT(y,x))
else:
return op(A, B)
left = merge(A.left, B, op, l, min(A.h, u))
right = merge(A.right, B, op, max(A.h, l), u)
if left is None:
return right
elif right is None or left == right:
return left
return IntervalTree(A.h, left, right)
def to_range_list(T, l=-float("inf"), u=float("inf")):
if isinstance(T, IntervalTree):
return to_range_list(T.left, l, T.h) + to_range_list(T.right, T.h, u)
return [(l, u-1)] if T else []
def range_list_to_tree(L):
return reduce(lambda x, y : merge(x, y, lambda a, b: a or b),
[ IntervalTree(R[0], False, IntervalTree(R[1]+1, True, False)) for R in L ])
我很快就写了这种,并没有那么多测试,所以可能有bug。另请注意,此代码将使用任意布尔运算,而不仅仅是差异(您只需将它们作为参数传递给合并中的op)。评估其中任何一个的时间复杂度与输出树的大小呈线性关系(也与结果中的区间数相同)。举个例子,我在你提供的案例上运行它:
#Example:
r1 = range_list_to_tree([ (1, 1000), (1100, 1200) ])
r2 = range_list_to_tree([ (30, 50), (60, 200), (1150, 1300) ])
diff = merge(r1, r2, lambda a, b : a and not b)
print to_range_list(diff)
我得到了以下输出:
[(1,29),(51,59),(201,1000),(1100,1149)]
这似乎与您的期望一致。现在,如果你想做一些其他的布尔操作,那么它是如何使用相同的函数:
#Intersection
merge(r1, r2, lambda a, b : a and b)
#Union
merge(r1, r2, lambda a, b : a or b)
#Xor
merge(r1, r2, lambda a, b : a != b)
答案 2 :(得分:2)
我认为我误解了这个问题,但如果r2
是r1
class RangeSet:
def __init__(self, elements):
self.ranges = list(elements)
def __iter__(self):
return iter(self.ranges)
def __repr__(self):
return 'RangeSet: %r' % self.ranges
def has(self, tup):
for pos, i in enumerate(self.ranges):
if i[0] <= tup[0] and i[1] >= tup[1]:
return pos, i
raise ValueError('Invalid range or overlapping range')
def minus(self, tup):
pos, (x,y) = self.has(tup)
out = []
if x < tup[0]:
out.append((x, tup[0]-1))
if y > tup[1]:
out.append((tup[1]+1, y))
self.ranges[pos:pos+1] = out
def __sub__(self, r):
r1 = RangeSet(self)
for i in r: r1.minus(i)
return r1
def sub(self, r): #inplace subtraction
for i in r:
self.minus(i)
然后,你这样做:
更新:请注意r2
的最后一个时间间隔与我的意思不同。
>>> r1 = RangeSet(((1, 1000), (1100, 1200)))
>>> r2 = RangeSet([(30, 50), (60, 200), (1150, 1200)])
>>> r1 - r2
RangeSet: [(1, 29), (51, 59), (201, 1000), (1100, 1149)]
>>> r1.sub(r2)
>>> r1
RangeSet: [(1, 29), (51, 59), (201, 1000), (1100, 1149)]
答案 3 :(得分:2)
这是一个有趣的问题!
我认为这是对的,它相当紧凑。它应该适用于所有类型的重叠范围,但它假定格式良好(即[x, y)
其中x < y
)。为简单起见,它使用[x, y)
样式范围。这是基于观察结果,实际上只有六种可能的安排(结果为()):
修改:我发现了一个更紧凑的代表:
(s1 e1) s2 e2
(s1 s2) e1 e2
(s1 s2) (e2 e1)
s2 e2 (s1 e1)
s2 s1 (e2 e1)
s2 s1 e1 e2 ()
给定排序的端点列表,如果endpoints[0] == s1
则前两个端点应该在结果中。如果endpoints[3] == e1
那么最后两个端点应该在结果中。如果两者都没有,那么应该没有结果。
我没有对它进行过多次测试,因此完全有可能出现问题。如果您发现错误,请告诉我!
import itertools
def range_diff(r1, r2):
s1, e1 = r1
s2, e2 = r2
endpoints = sorted((s1, s2, e1, e2))
result = []
if endpoints[0] == s1:
result.append((endpoints[0], endpoints[1]))
if endpoints[3] == e1:
result.append((endpoints[2], endpoints[3]))
return result
def multirange_diff(r1_list, r2_list):
for r2 in r2_list:
r1_list = list(itertools.chain(*[range_diff(r1, r2) for r1 in r1_list]))
return r1_list
测试:
>>> r1_list = [(1, 1001), (1100, 1201)]
>>> r2_list = [(30, 51), (60, 201), (1150, 1301)]
>>> print multirange_diff(r1_list, r2_list)
[(1, 30), (51, 60), (201, 1001), (1100, 1150)]
答案 4 :(得分:1)
这是一个快速python函数,它执行减法,无论初始列表是否格式正确(即将列表转换为等效范围的最小列表,在进行减法之前排序):
def condense(l):
l = sorted(l)
temp = [l.pop(0)]
for t in l:
if t[0] <= temp[-1][1]:
t2 = temp.pop()
temp.append((t2[0], max(t[1], t2[1])))
else:
temp.append(t)
return temp
def setSubtract(l1, l2):
l1 = condense(l1)
l2 = condense(l2)
i = 0
for t in l2:
while t[0] > l1[i][1]:
i += 1
if i >= len(l1):
break
if t[1] < l1[i][1] and t[0] > l1[i][0]:
#t cuts l1[i] in 2 pieces
l1 = l1[:i] + [(l1[i][0], t[0] - 1), (t[1] + 1, l1[i][1])] + l1[i + 1:]
elif t[1] >= l1[i][1] and t[0] <= l1[i][0]:
#t eliminates l1[i]
l1.pop(i)
elif t[1] >= l1[i][1]:
#t cuts off the top end of l1[i]
l1[i] = (l1[i][0], t[0] - 1)
elif t[0] <= l1[i][0]:
#t cuts off the bottom end of l1[i]
l1[i] = (t[1] + 1, l1[i][1])
else:
print "This shouldn't happen..."
exit()
return l1
r1 = (1, 1000), (1100, 1200)
r2 = (30, 50), (60, 200), (1150, 1300)
setSubtract(r1, r2) #yields [(1, 29), (51, 59), (201, 1000), (1100, 1149)]
答案 5 :(得分:1)
有趣的问题!另一个实现,虽然你已经有很多。这很有趣! 涉及一些额外的“装饰”,使我做得更明确。
import itertools
def flatten_range_to_labeled_points(input_range,label):
range_with_labels = [((start,'start_%s'%label),(end,'end_%s'%label)) for (start,end) in input_range]
flattened_range = list(reduce(itertools.chain,range_with_labels))
return flattened_range
def unflatten_range_remove_labels(input_range):
without_labels = [x for (x,y) in input_range]
grouped_into_pairs = itertools.izip(without_labels[::2], without_labels[1::2])
return grouped_into_pairs
def subtract_ranges(range1, range2):
range1_labeled = flatten_range_to_labeled_points(range1,1)
range2_labeled = flatten_range_to_labeled_points(range2,2)
all_starts_ends_together = sorted(range1_labeled + range2_labeled)
in_range1, in_range2 = False, False
new_starts_ends = []
for (position,label) in all_starts_ends_together:
if label=='start_1':
in_range1 = True
if not in_range2:
new_starts_ends.append((position,'start'))
elif label=='end_1':
in_range1 = False
if not in_range2:
new_starts_ends.append((position,'end'))
elif label=='start_2':
in_range2 = True
if in_range1:
new_starts_ends.append((position-1,'end'))
elif label=='end_2':
in_range2 = False
if in_range1:
new_starts_ends.append((position+1,'start'))
# strip the start/end labels, they're not used, I just appended them for clarity
return unflatten_range_remove_labels(new_starts_ends)
我得到了正确的输出:
r1 = (1, 1000), (1100, 1200)
r2 = (30, 50), (60, 200), (1150, 1300)
>>> subtract_ranges(r1,r2)
[(1, 29), (51, 59), (201, 1000), (1100, 1149)]
答案 6 :(得分:0)
这是相当丑陋但它确实适用于给定的例子
def minus1(a,b):
if (b[0] < a[0] and b[1] < a[0]) or (a[1] < b[0] and a[1] < b[1]):
return [a] # doesn't overlap
if a[0]==b[0] and a[1]==b[1]:
return [] # overlaps exactly
if b[0] < a[0] and a[1] < b[1]:
return [] # overlaps completely
if a[0]==b[0]:
return [(b[1]+1,a[1])] # overlaps exactly on the left
if a[1]==b[1]:
return [(a[0],b[0]-1)] # overlaps exactly on the right
if a[0] < b[0] and b[0] < a[1] and a[1] < b[1]:
return [(a[0],b[0]-1)] # overlaps the end
if a[0] < b[1] and b[1] < a[1] and b[0] < a[0]:
return [(b[1]+1,a[1])] # overlaps the start
else:
return [(a[0],b[0]-1),(b[1]+1,a[1])] # somewhere in the middle
def minus(r1, r2):
# assume r1 and r2 are already sorted
r1 = r1[:]
r2 = r2[:]
l = []
v = r1.pop(0)
b = r2.pop(0)
while True:
r = minus1(v,b)
if r:
if len(r)==1:
if r[0] == v:
if v[1] < b[0] and v[1] < b[1]:
l.append(r[0])
if r1:
v = r1.pop(0)
else:
break
else:
if r2:
b = r2.pop(0)
else:
break
else:
v = r[0]
else:
l.append(r[0])
v = r[1]
if r2:
b = r2.pop(0)
else:
l.append(v)
break
else:
if r1:
v = r1.pop(0)
else:
break
if r2:
b = r2.pop(0)
else:
l.append(v)
l.extend(r1)
break
return l
r1 = [(1, 1000), (1100, 1200)]
r2 = [(30, 50), (60, 200), (1150, 1300)]
print minus(r1,r2)
打印:
[(1, 29), (51, 59), (201, 1000), (1100, 1149)]