我有一个如下代码:
for v1, v2 in zip(iter1, iter2):
print len(v1) # prints 0
但是当我将zip更改为itertools.izip时,它会打印1
for v1, v2 in izip(iter1, iter2):
print len(v1) # prints 1
其他每个代码都是一样的。我只是用izip替换zip并且它有效。 izip的输出是正确的。
编辑:添加整个代码:
#!/bin/python
"""
How to use:
>>> from color_assign import Bag, assign_colors
>>> from pprint import pprint
>>> old_topics = set([
... Bag(name='T1', group=0, color=1, count=16000),
... Bag(name='T2', group=0, color=1, count=16000),
... Bag(name='T3', group=1, color=2, count=16000),
... Bag(name='T4', group=2, color=3, count=16000),
... ])
>>> new_topics = set([
... Bag(name='T1', group=0, color=None, count=16000),
... Bag(name='T2', group=4, color=None, count=16000),
... Bag(name='T3', group=1, color=None, count=16000),
... Bag(name='T4', group=1, color=None, count=16000),
... ])
>>> color_ranges = [ [1,10] ]
>>> assign_colors(old_topics, new_topics, color_ranges)
>>> pprint(sorted(new_topics, key=attrgetter('name')))
[Bag(name=T1, group=0, color=1, count=16000),
Bag(name=T2, group=4, color=3, count=16000),
Bag(name=T3, group=1, color=2, count=16000),
Bag(name=T4, group=1, color=2, count=16000)]
>>>
"""
from itertools import groupby, izip
from operator import attrgetter
class Bag:
def __init__(self, name, group, color=None, count=None):
self.name = name
self.group = group
self.color = color
self.count = count
def __repr__(self):
return "Bag(name={self.name}, group={self.group}, color={self.color}, count={self.count})".format(self=self)
def __key(self):
return self.name
def __hash__(self):
return hash(self.__key())
def __eq__(self, other):
return type(self) is type(other) and self.__key() == other.__key()
def color_range_gen(color_ranges, used_colors):
color_ranges = sorted(color_ranges)
color_iter = iter(sorted(used_colors))
next_used = next(color_iter, None)
for start_color, end_color in color_ranges:
cur_color = start_color
end_color = end_color
while cur_color <= end_color:
if cur_color == next_used:
next_used = next(color_iter, None)
else:
yield cur_color
cur_color = cur_color + 1
def assign_colors(old_topics, new_topics, color_ranges):
old_topics -= (old_topics-new_topics) #Remove topics from old_topics which are no longer present in new_topics
used_colors = set()
def group_topics(topics):
by_group = attrgetter('group')
for _, tgrp in groupby(sorted(topics, key=by_group), by_group):
yield tgrp
for topic_group in group_topics(old_topics):
oldtset = frozenset(topic_group)
peek = next(iter(oldtset))
try:
new_group = next(topic.group for topic in new_topics if topic.name == peek.name and not topic.color)
except StopIteration:
continue
newtset = frozenset(topic for topic in new_topics if topic.group == new_group)
if oldtset <= newtset:
for topic in newtset:
topic.color = peek.color
used_colors.add(peek.color)
free_colors = color_range_gen(color_ranges, used_colors)
unassigned_topics = (t for t in new_topics if not t.color)
for tset, color in zip(group_topics(unassigned_topics), free_colors):
for topic in tset:
topic.color = color
if __name__ == '__main__':
import doctest
doctest.testmod()
用法:
my_host:my_dir$ /tmp/color_assign.py
**********************************************************************
File "/tmp/color_assign.py", line 21, in __main__
Failed example:
pprint(sorted(new_topics, key=attrgetter('name')))
Expected:
[Bag(name=T1, group=0, color=1, count=16000),
Bag(name=T2, group=4, color=3, count=16000),
Bag(name=T3, group=1, color=2, count=16000),
Bag(name=T4, group=1, color=2, count=16000)]
Got:
[Bag(name=T1, group=0, color=None, count=16000),
Bag(name=T2, group=4, color=3, count=16000),
Bag(name=T3, group=1, color=2, count=16000),
Bag(name=T4, group=1, color=2, count=16000)]
**********************************************************************
1 items had failures:
1 of 7 in __main__
***Test Failed*** 1 failures.
my_host:my_dir$ sed -i 's/zip(/izip(/g' /tmp/color_assign.py
my_host:my_dir$ /tmp/color_assign.py
my_host:my_dir$
更新:
问题是groupby
在使用zip
答案 0 :(得分:6)
是的,他们的输出是一样的。唯一的区别是zip
在内存中创建一个列表,而izip
返回一个迭代器。
>>> from itertools import izip
>>> zip(range(5), 'abcde')
[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (4, 'e')]
>>> it = izip(range(5), 'abcde')
>>> it
<itertools.izip object at 0xa660fcc>
>>> next(it)
(0, 'a')
>>> next(it)
(1, 'b')
请注意,我在Python3中删除了izip
,zip
在那里返回iterator
。
答案 1 :(得分:3)
您遇到的问题是由两个因素共同造成的。首先,izip
仅根据需要推进底层迭代器,而zip
需要立即获取所有项。其次,当groupby
对象被提升时,the previous iterators are no longer valid:
返回的组本身就是一个迭代器,它与
groupby()
共享底层的iterable。由于源是共享的,因此当groupby()
对象前进时,前一个组不再可见。因此,如果以后需要该数据,则应将其存储为列表:
作为一个简单的解决方法,您可以更改group_topics
在其群组中调用list
,然后再产生它们。