我是Python的新手,我一直试图找出y-value =<我有一个有序的字典条目:
d = {0: '92', 11: '70', 43: '77', 44: '76', 61: '77', 64: '69',
68: '67', 84: '68', 93: '87', 108: '81', 141: '74'}
我想编写一个函数,允许我根据y值=<的基于d的键(x值)识别“间隔”(a,b)。 N.我的端点(a,b)应该是值开始下降和上升和下移N值的位置,因此实际端点值将“高于”N,但两者之间的条目应低于。
(a,b): {above, below, below, below, above}
例如,我对作为字典的区间感兴趣,这里N = 70:
{(0,43):{92,70,77}, (61,93): {77, 69, 67, 68, 87}} <-- includes the values at endpoints
但是,可以忽略那些值永远不会低于70的“间隔” 所以在这种情况下我们不需要:(43,41),(93,180)
有一种简单的方法吗?到目前为止,我已经能够识别出从“上方”变为“下方”70或反之亦然的点,但不确定如何继续创建间隔和值(例如在字典中)。我想我已经盯着这个太久了。
答案 0 :(得分:3)
由于我无法完全解释的原因,这个问题让我着迷。但我想我终于把它从我的系统中删除了。首先,一个基本的,简洁的解决方案:
intervals = [[]]
prev = None
sorted_items = sorted(d.iteritems())
for k, v in sorted_items:
if v <= 70:
ext = (k,) if (intervals[-1] or prev is None) else (prev, k)
intervals[-1].extend(ext)
elif intervals[-1]:
intervals[-1].append(k)
intervals.append([])
prev = k
if not intervals[-1]:
intervals.pop()
print dict(((iv[0], iv[-1]), [d[k] for k in iv]) for iv in intervals)
很容易抽象以上内容来创建迭代器:
def iter_intervals(vals, filter_f, _nil=object()):
prev = _nil
interval = []
for x in vals:
if filter_f(x):
ext = (x,) if (interval or prev is _nil) else (prev, x)
interval.extend(ext)
elif interval:
interval.append(x)
yield interval
interval = []
prev = x
if interval:
yield interval
intervals = iter_intervals(d.iteritems(), lambda x: x[1] <= 70)
print dict(((iv[0][0], iv[-1][0]), [v for k, v in iv]) for iv in intervals)
但这必须存储很多州。我想知道是否有办法减少这种情况......
def iter_intervals(vals, filter_f, _nil=object()):
iters = itertools.tee(itertools.chain((_nil,), vals, (_nil,)), 3)
next(iters[1]); next(iters[2]); next(iters[2])
triplets = itertools.izip(*iters)
interval = set()
for p, curr, n in triplets:
if filter_f(curr):
interval.update((p, curr, n))
elif interval:
interval.discard(_nil)
yield sorted(interval)
interval = set()
if interval:
interval.discard(_nil)
yield sorted(interval)
intervals = iter_intervals(d.iteritems(), lambda x: x[1] <= 70)
print dict(((iv[0][0], iv[-1][0]), [v for k, v in iv]) for iv in intervals)
完成后,现在更明显的是如何调整ninjagecko的解决方案以避免强制它存储列表的前瞻/后视问题:
def framed_intervals(points, filter_f, _nil=object()):
iters = itertools.tee(itertools.chain((_nil,), points, (_nil,)), 3)
next(iters[1]); next(iters[2]); next(iters[2])
triplets = itertools.izip(*iters)
for below, group in itertools.groupby(triplets, lambda x: filter_f(x[1])):
if below:
interval = set(itertools.chain.from_iterable(group))
interval.discard(_nil) # or continue if None in interval to
yield sorted(interval) # drop incomplete intervals
intervals = framed_intervals(d.iteritems(), lambda x: x[1] <= 70)
print dict(((iv[0][0], iv[-1][0]), [v for k, v in iv]) for iv in intervals)
答案 1 :(得分:1)
d = {0: '92', 11: '70', 43: '77', 44: '76', 61: '77', 64: '69',
68: '67', 84: '68', 93: '87', 108: '81', 141: '74'}
r = []
k = None
v = None
for i in sorted(d.keys()):
if not k is None:
v.append(d[i])
if int(d[i]) > 70:
if k is None:
k = [i]
v = [d[i]];
else:
k.append(i)
r.append((tuple(k), v))
k = None
v = None
print r
答案 2 :(得分:1)
这是一个有点冗长的解决方案:
import collections
values = [(0, '92'), (11, '70'), (43, '77'), (44, '76'), (61, '77'), (64, '69'),
(68, '67'), (84, '68'), (93, '87'), (108, '81'), (141, '74')]
d = collections.OrderedDict(values)
def intervals(d, n):
result = collections.OrderedDict()
interval = list()
lastk, lastv, startk = None, None, None
for k, v in d.iteritems():
if int(v) > n:
if startk is not None:
interval.append(int(d[k]))
result[(startk, k)] = interval
interval = list()
startk = None
else:
if lastv:
interval.append(int(d[lastk]))
startk = lastk
interval.append(int(d[k]))
lastk, lastv = k, int(v) > n
return result
if __name__ == '__main__':
print intervals(d, 70)
当我运行它时会打印:
OrderedDict([((0, 43), [92, 70, 77]), ((61, 93), [77, 69, 67, 68, 87])])
这是期望的结果。
答案 3 :(得分:1)
以下代码应该为您提供所要求的输出:
oninterval = False
dd = {}
keys = d.keys()
keys.sort()
start_key, first_val = keys[0], d[keys[0]]
for k in keys:
v = float(d[k])
if oninterval:
cur_list.append(v)
if not int(v) <= 70: # interval ends
oninterval = False
dd[(start_key,k)] = cur_list
else:
if int(v) <= 70:
cur_list = [first_val, v]
oninterval = True
else:
start_key, first_val = k, v
if oninterval: dd[(start_key, keys[-1])] = cur_list
编辑:
扩展代码分数,接受第一个或最后一个项目,使y值<= 70并将y值视为浮点数
答案 4 :(得分:1)
旁注:您的字典包含字符串值,而不是int值。在你的例子中你可能意味着&lt; =而不是&lt;
为了更清楚地重申您的问题,您:
(x,y)
[71,70,{71],70,71}
)算法如下:
from itertools import *
def dippingIntervals(points, threshold=70):
yBelowThreshold = lambda i: points[i][1]<=threshold
for below,g in groupby(range(len(points)), yBelowThreshold):
if below:
interval = list(g)
start,end = interval[0],interval[-1]
if start>0 and end<len(points)-2: #modify if "open" intervals also desired
yield points[start-1 : end+2]
演示:
>>> d = [(0, 92), (11, 70), (43, 77), (44, 76), (61, 77), (64, 69), (68, 67), (84, 68), (93, 87), (108, 81), (141, 74)]
>>> pprint(list( dippingIntervals(d) ))
[((0, 92), (11, 70), (43, 77)),
((61, 77), (64, 69), (68, 67), (84, 68), (93, 87))]
您可以毫不费力地对数据进行后期处理,例如将其转换为您想要的格式,修改上述函数,如下所示:
... yield (start,end), {xy[1] for xy in points[start-1 : end+2]}
这种方法的缺点是它不适用于迭代器;以下内容适用于迭代器,并且更为“经典”的方式:
def getY(point):
return point[1]
def dippingIntervals(points, threshold=70, key=getY):
"""
Returns runs of points whose y-values dip below intervals
>>> list( dippingIntervals([71,70,74,64,64,70,71], key=lambda x:x) )
[(71, [70], 74),
(74, [64, 64, 70], 71)]
"""
def match(point):
return key(point)<=threshold
lastP = None
for p in points:
if lastP==None:
lastP = p
continue
if not match(lastP) and match(p):
start = lastP
R = [p]
elif match(lastP) and match(p):
R += [p]
elif match(lastP) and not match(p):
end = p
yield start,R,end
lastP = p