我的代码中的一个常见模式是:“搜索列表,直到找到特定元素,然后查看它之前和之后的元素。”
作为一个例子,我可能想查看一个日志文件,其中重要事件用星号标记,然后拉出重要事件的上下文。
在以下示例中,我想知道为什么超级驱动器爆炸:
Spinning up the hyperdrive
Hyperdrive speed 100 rpm
Hyperdrive speed 200 rpm
Hyperdrive lubricant levels low (100 gal.)
* CRITICAL EXISTENCE FAILURE
Hyperdrive exploded
我想要一个函数get_item_with_context()
,它允许我找到带有星号的第一行,然后在其前面提供n
行,并在其后跟m
行
我的尝试如下:
import collections, itertools
def get_item_with_context(predicate, iterable, items_before = 0, items_after = 0):
# Searches through the list of `items` until an item matching `predicate` is found.
# Then return that item.
# If no item matching predicate is found, return None.
# Optionally, also return up to `items_before` items preceding the target, and
# `items after` items after the target.
#
# Note:
d = collections.deque (maxlen = items_before + 1 + items_after)
iter1 = iterable.__iter__()
iter2 = itertools.takewhile(lambda x: not(predicate(x)), iter1)
d.extend(iter2)
# zero-length input, or no matching item
if len(d) == 0 or not(predicate(d[-1])):
return None
# get context after match:
try:
for i in xrange(items_after):
d.append(iter1.next())
except StopIteration:
pass
if ( items_before == 0 and items_after == 0):
return d[0]
else:
return list(d)
用法应该是:
>>> get_item_with_context(lambda x: x == 3, [1,2,3,4,5,6],
items_before = 1, items_after = 1)
[2, 3, 4]
问题:
not(predicate(d[-1]))
找到匹配项,但由于某种原因无效。它总是返回false。items_after
,那么结果就是垃圾。我可以就如何使这项工作变得更健壮吗?或者,如果我重新发明轮子,也可以随意告诉我。
答案 0 :(得分:2)
您可以使用collections.deque对象获取上下文的环形缓冲区。要获得+/- 2行上下文,请按以下方式对其进行初始化:
context = collections.deque(maxlen=5)
然后迭代你喜欢的任何内容,为每一行调用它:
context.append(line)
匹配context[2]
,并为每场比赛输出整个双端队列内容。
答案 1 :(得分:2)
这似乎可以正确处理边缘情况:
from collections import deque
def item_with_context(predicate, seq, before=0, after=0):
q = deque(maxlen=before)
it = iter(seq)
for s in it:
if predicate(s):
return list(q) + [s] + [x for _,x in zip(range(after), it)]
q.append(s)
答案 2 :(得分:1)
这可能是一个完全“非语言”的解决方案:
import itertools
def get_item_with_context(predicate, iterable, items_before = 0, items_after = 0):
found_index = -1
found_element = None
before = [None] * items_before # Circular buffer
after = []
after_index = 0
for element, index in zip(iterable, itertools.count()):
if found_index >= 0:
after += [element]
if len(after) >= items_after:
break
elif predicate(element):
found_index = index
found_element = element
if not items_after:
break
else:
if items_before > 0:
before[after_index] = element
after_index = (after_index + 1) % items_before
if found_index >= 0:
if after_index:
# rotate the circular before-buffer into place
before = before[after_index:] + before[0:after_index]
if found_index - items_before < 0:
# slice off elements that "fell off" the start
before = before[items_before - found_index:]
return before, found_element, after
return None
for index in range(0, 8):
x = get_item_with_context(lambda x: x == index, [1,2,3,4,5,6], items_before = 1, items_after = 2)
print(index, x)
输出:
0 None
1 ([], 1, [2, 3])
2 ([1], 2, [3, 4])
3 ([2], 3, [4, 5])
4 ([3], 4, [5, 6])
5 ([4], 5, [6])
6 ([5], 6, [])
7 None
我冒昧地改变了输出,以便更清楚地匹配谓词和之前和之后的内容:
([2], 3, [4, 5])
^ ^ ^
| | +-- after the element
| +------- the element that matched the predicate
+----------- before the element
该功能处理:
None
(如果你想返回其他内容,则返回最后一行)N
个元素)它使用:
答案 3 :(得分:1)
from itertools import takewhile, tee, chain
from collections import deque
def contextGet(iterable, predicate, before, after):
iter1, iter2 = tee(iterable)
beforeLog = deque(maxlen = before)
for item in takewhile(lambda x: not(predicate(x)), iter1):
beforeLog.append(item)
iter2.next()
afterLog = []
for i in xrange(after + 1):
try:
afterLog.append(iter2.next())
except StopIteration:
break
return chain(beforeLog, afterLog)
可替换地:
def contextGet(iterable, predicate, before, after):
it1, it2 = tee(it)
log = deque(maxlen = (before + after + 1))
for i in chain(dropwhile(lambda x: not predicate(x), it1), xrange(after + 1)):
try:
log.append(it2.next())
except StopIteration:
break
return log
如果列表的其余部分短于after
参数,则第二个可能会返回太多“之前”元素。
答案 4 :(得分:0)
我不确定我是否遗漏了问题,但这可以简单地完成
>>> def get_item_with_context(predicate, iterable, items_before = 0, items_after = 0):
queue = collections.deque(maxlen=items_before+1)
found = False
for e in iterable:
queue.append(e)
if not found and predicate(e):
queue = collections.deque(queue,items_before+1+items_after)
found = True
if found:
if not items_after : break
items_after-=1
if not found:
queue.clear()
return list(queue)
>>> get_item_with_context(lambda x: x == 0, [1,2,3,4,5,6],items_before = 2, items_after = 1)
[]
>>> get_item_with_context(lambda x: x == 4, [1,2,3,4,5,6],items_before = 2, items_after = 1)
[2, 3, 4, 5]
>>> get_item_with_context(lambda x: x == 1, [1,2,3,4,5,6],items_before = 2, items_after = 1)
[1, 2]
>>> get_item_with_context(lambda x: x == 6, [1,2,3,4,5,6],items_before = 2, items_after = 1)
[4, 5, 6]
>>> get_item_with_context(lambda x: x == 4, [1,2,3,4,5,6],items_before = 20, items_after = 10)
[1, 2, 3, 4, 5, 6]
答案 5 :(得分:0)
import collections
def context_match(predicate, iterable, before = 0, after = 0):
pre = collections.deque(maxlen = before + 1)
post = []
match = 0
for el in iterable:
if not match:
pre.append(el)
if predicate(el):
match = 1
elif match:
if len(post) == after:
break
post.append(el)
if not match:
return
output = list(pre)
output.extend(post)
return output
for val in xrange(8):
print context_match(lambda x: x == val, [1,2,3,4,5,6],before = 2, after = 2)
#Output:
None
[1, 2, 3]
[1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6]
[4, 5, 6]
None
答案 6 :(得分:0)
这是更短的内容:
import collections
from itertools import islice
def windowfilter(pred, it, before=0, after=0):
size = before + 1 + after
q = collections.deque(maxlen=size)
it = iter(it)
for x in it:
q.append(x)
if pred(x):
# ok we got the item, add the trailing lines
more = list(islice(it, after))
q.extend(more)
# maybe there were too few items left
got = before + 1 + len(more)
# slice from the end
return tuple(q)[-got:]
测试产量:
seq = [1,2,3,4,5,6]
for elem in range(8):
print elem, windowfilter((lambda x:x==elem), seq, 2, 1)
# Output:
0 None
1 (1, 2)
2 (1, 2, 3)
3 (1, 2, 3, 4)
4 (2, 3, 4, 5)
5 (3, 4, 5, 6)
6 (4, 5, 6)
7 None
答案 7 :(得分:0)
我的回答是这个,
for k,v in enumerate(iterable):
#if cmp(v,predicate) == 0:
if v == predicate:
if k+items_after < len(iterable):
res.append((' '.join(token[(k-items_before):(k+items_after+1)])))
elif k+window == len(token):
res.append((' '.join(token[(k-items_before):])))
else:
res.append((' '.join(token[(k-items_before):])))
return res