假设我有一个元组生成器,我模拟如下:
g = (x for x in (1,2,3,97,98,99))
对于这个特定的生成器,我希望编写一个函数来输出以下内容:
(1,2,3)
(2,3,97)
(3,97,98)
(97,98,99)
(98,99)
(99)
所以我一次迭代三个连续的项目并打印它们,除非我接近尾声。
我的函数中的第一行应该是:
t = tuple(g)
换句话说,最好直接处理元组,或者使用生成器是否有益。如果可以使用这两种方法来解决这个问题,请说明这两种方法的优缺点。此外,如果使用生成器方法可能是明智的,那么这样的解决方案看起来怎么样?
以下是我目前所做的事情:
def f(data, l):
t = tuple(data)
for j in range(len(t)):
print(t[j:j+l])
data = (x for x in (1,2,3,4,5))
f(data,3)
更新:
请注意,我已更新我的函数以获取指定窗口长度的第二个参数。
答案 0 :(得分:3)
返回三个项目的具体示例可以阅读
def yield3(gen):
b, c = gen.next(), gen.next()
try:
while True:
a, b, c = b, c, gen.next()
yield (a, b, c)
except StopIteration:
yield (b, c)
yield (c,)
g = (x for x in (1,2,3,97,98,99))
for l in yield3(g):
print l
答案 1 :(得分:3)
实际上itertools模块中有这方面的功能 - tee()和izip_longest():
>>> from itertools import izip_longest, tee
>>> g = (x for x in (1,2,3,97,98,99))
>>> a, b, c = tee(g, 3)
>>> next(b, None)
>>> next(c, None)
>>> next(c, None)
>>> [[x for x in l if x is not None] for l in izip_longest(a, b, c)]
[(1, 2, 3), (2, 3, 97), (3, 97, 98), (97, 98, 99), (98, 99), (99)]
来自文档:
Return n independent iterators from a single iterable. Equivalent to:
def tee(iterable, n=2):
it = iter(iterable)
deques = [collections.deque() for i in range(n)]
def gen(mydeque):
while True:
if not mydeque: # when the local deque is empty
newval = next(it) # fetch a new value and
for d in deques: # load it to all the deques
d.append(newval)
yield mydeque.popleft()
return tuple(gen(d) for d in deques)
答案 2 :(得分:2)
如果您可能需要一次使用三个以上的元素,并且您不想将整个生成器加载到内存中,我建议您使用deque
模块中的collections
用于存储当前项集的标准库。 deque
(发音为“deck”,意思是“双端队列”)可以从两端有效地推送和弹出值。
from collections import deque
from itertools import islice
def get_tuples(gen, n):
q = deque(islice(gen, n)) # pre-load the queue with `n` values
while q: # run until the queue is empty
yield tuple(q) # yield a tuple copied from the current queue
q.popleft() # remove the oldest value from the queue
try:
q.append(next(gen)) # try to add a new value from the generator
except StopIteration:
pass # but we don't care if there are none left
答案 3 :(得分:1)
实际上它取决于。
对于非常大的集合,生成器可能很有用,您不需要将它们全部存储在内存中以获得所需的结果。 另一方面,你必须打印它似乎是安全的猜测集合不是很大,所以它没有什么区别。
然而,这是一个实现你所寻找的发电机
def part(gen, size):
t = tuple()
try:
while True:
l = gen.next()
if len(t) < size:
t = t + (l,)
if len(t) == size:
yield t
continue
if len(t) == size:
t = t[1:] + (l,)
yield t
continue
except StopIteration:
while len(t) > 1:
t = t[1:]
yield t
>>> a = (x for x in range(10))
>>> list(part(a, 3))
[(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6), (5, 6, 7), (6, 7, 8), (7, 8, 9), (8, 9), (9,)]
>>> a = (x for x in range(10))
>>> list(part(a, 5))
[(0, 1, 2, 3, 4), (1, 2, 3, 4, 5), (2, 3, 4, 5, 6), (3, 4, 5, 6, 7), (4, 5, 6, 7, 8), (5, 6, 7, 8, 9), (6, 7, 8, 9), (7, 8, 9), (8, 9), (9,)]
>>>
注意:代码实际上并不是非常优雅,但是当你必须拆分,比如5件
时,它也可以工作答案 4 :(得分:1)
使用生成器绝对是最好的,因为你不想把所有东西都放在内存中。
使用双端队列可以非常简单地完成。
from collections import deque
from itertools import islice
def overlapping_chunks(size, iterable, *, head=False, tail=False):
"""
Get overlapping subsections of an iterable of a specified size.
print(*overlapping_chunks(3, (1,2,3,97,98,99)))
#>>> [1, 2, 3] [2, 3, 97] [3, 97, 98] [97, 98, 99]
If head is given, the "warm up" before the specified maximum
number of items is included.
print(*overlapping_chunks(3, (1,2,3,97,98,99), head=True))
#>>> [1] [1, 2] [1, 2, 3] [2, 3, 97] [3, 97, 98] [97, 98, 99]
If head is truthy, the "warm up" before the specified maximum
number of items is included.
print(*overlapping_chunks(3, (1,2,3,97,98,99), head=True))
#>>> [1] [1, 2] [1, 2, 3] [2, 3, 97] [3, 97, 98] [97, 98, 99]
If tail is truthy, the "cool down" after the iterable is exhausted
is included.
print(*overlapping_chunks(3, (1,2,3,97,98,99), tail=True))
#>>> [1, 2, 3] [2, 3, 97] [3, 97, 98] [97, 98, 99] [98, 99] [99]
"""
chunker = deque(maxlen=size)
iterator = iter(iterable)
for item in islice(iterator, size-1):
chunker.append(item)
if head:
yield list(chunker)
for item in iterator:
chunker.append(item)
yield list(chunker)
if tail:
while len(chunker) > 1:
chunker.popleft()
yield list(chunker)
答案 5 :(得分:0)
我认为你现在所做的事情似乎比以上任何一个都容易得多。如果没有任何特别需要使其更复杂,我的意见是保持简单。换句话说,最好直接使用元组。
答案 6 :(得分:0)
这是一个适用于Python 2.7.5和3.3.2的生成器。在内部,它尽可能使用迭代器和生成器,因此它应该具有相对内存效率。
try:
from itertools import izip, izip_longest, takewhile
except ImportError: # Python 3
izip = zip
from itertools import zip_longest as izip_longest, takewhile
_NULL = type('_NULL', (object,), {})() # unique singleton object
def tuple_window(n, iterable):
iterators = [iter(iterable) for _ in range(n)]
for n, iterator in enumerate(iterators):
for _ in range(n):
next(iterator)
for t in izip_longest(*iterators, fillvalue=_NULL):
yield tuple(takewhile(lambda v: v is not _NULL, t))
if __name__ == '__main__':
data = (1, 2, 3, 97, 98, 99)
for t in tuple_window(3, data):
print(t)
输出:
(1, 2, 3)
(2, 3, 97)
(3, 97, 98)
(97, 98, 99)
(98, 99)
(99,)