我有一个User对象,有两个布尔属性,如下所示:
class User(object):
def __init__(self, a, b):
self.a = a # Always a bool
self.b = b # Always a bool
我有一个名为user_list
的对象列表,我希望获得有多少个对象的频率计数== True,a == False,b == True,b ==假
我最初的方法是使用collections.Counter,但这需要在列表中循环两次:
a_count = collections.Counter(u.a for u in user_list)
b_count = collections.Counter(u.b for u in user_list)
print a_count[True], a_count[False], b_count[True], b_count[False]
我还想过只使用4个计数器,但这很难看,并且感觉不到pythonic:
a_true_count = 0
a_false_count = 0
b_true_count = 0
b_false_count = 0
for u in user_list:
if u.a:
a_true_count += 1
else:
a_false_count += 1
if u.b:
b_true_count += 1
else:
a_false_count += 1
print a_true_count, a_false_count, b_true_count, b_false_count
有更有效的方法吗?输出可以是任何东西:4个单独的变量,带有值的字典,列表,元组,等等,只要它有4个值。
提前致谢!
答案 0 :(得分:3)
我认为使用collections.Counter
是正确的想法,只需使用单个Counter
和单循环以更通用的方式进行:
from collections import Counter
user_list = [User(True, False), User(False, True), User(True, True), User(False, False)]
user_attr_count = Counter()
for user in user_list:
user_attr_count['a_%s' % user.a] += 1
user_attr_count['b_%s' % user.b] += 1
print user_attr_count
# Counter({'b_False': 2, 'a_True': 2, 'b_True': 2, 'a_False': 2})
答案 1 :(得分:2)
为什么不使用两个计数器,并从user_list
的长度中减去以找到其他两个值?
a_false_count = len(user_list) - a_true_count
b_false_count = len(user_list) - b_true_count
这样的显式循环可能是时间上最有效的解决方案,但是如果你正在寻找一些更简洁的代码,你可以试试filter()
:
a_false_count = len(filter(lambda x: x.a,user_list))
b_false_count = len(filter(lambda x: x.b,user_list))
答案 2 :(得分:1)
您可以使用位屏蔽:
def count(user_list,mask):
return Counter((u.a<<1 | u.b)&mask for u in user_list)
a=0b10
b=0b01
aANDb=0b11
print count(user_list,aANDb)
答案 3 :(得分:1)
from collections import Counter
c = Counter()
for u in user_list:
c['a'] += u.a
c['b'] += u.b
print c['a'], len(user_list) - c['a'], c['b'], len(user_list) - c['b']
答案 4 :(得分:1)
这是一个与你第一次接近的解决方案,除了它只迭代列表一次。它创建了两个计数器,遍历列表,并为每个用户更新每个计数器。进行计数的实际步骤如下:
for user in user_list:
a_count.update([user.a])
b_count.update([user.b])
它使用更新功能更新每个计数器对象。你可以这样做,而不是像你在第一个例子中那样使用生成器在一行中创建计数器。整个代码示例如下:
import collections
class User(object):
def __init__(self, a, b):
self.a = a
self.b = b
user_list = [
User(True, False),
User(False, True),
User(True, True),
User(False, False)
]
a_count = collections.Counter()
b_count = collections.Counter()
for user in user_list:
a_count.update([user.a])
b_count.update([user.b])
print a_count[True], a_count[False], b_count[True], b_count[False]
答案 5 :(得分:1)
from collections import Counter
# for test, import random:
import random
# define class
class User(object):
def __init__(self, a, b):
self.a = a # Always a bool
self.b = b # Always a bool
# create an arbitrary set
users = [ User( r % 2 == 0, r % 3 == 0 ) for r in (random.randint(0,100) for x in xrange(100)) ]
# and... count
aCounter, bCounter = map(Counter, zip(*((u.a, u.b) for u in users)))
<强>更新强>
map(sum, zip(*tuples))
在较小的样本大小上略微快于for循环,但对于较大的样本大小,for循环的扩展要好得多.for for循环不会因为在元组列表上工作而获得很大的性能提升做其他方法。可能是因为它已经非常优化了。
collections.Counter
仍然很慢。
import random
import itertools
import time
from collections import Counter
# define class
class User(object):
def __init__(self, a, b):
self.a = a # Always a bool
self.b = b # Always a bool
# create an arbitrary sample
users = [ User( r % 2 == 0, r % 3 == 0 ) for r in (random.randint(0,100) for x in xrange(100)) ]
# create a list of tuples of the arbitrary sample
users2 = [ ( u.a,u.b) for u in users ]
# useful function-timer decorator
def timer(times=1):
def outer(fn):
def wrapper(*args, **kwargs):
t0 = time.time()
for n in xrange(times):
r = fn(*args, **kwargs)
dt = time.time() - t0
print '{} ran {} times in {} seconds with {:f} ops/sec'.format(fn.__name__, times, dt, times/dt)
return r
return wrapper
return outer
# now create the timeable functions
n=10000
@timer(times=n)
def time_sum():
return map(sum, zip(*((u.a, u.b) for u in users)))
@timer(times=n)
def time_counter():
return map(Counter, zip(*((u.a, u.b) for u in users)))
@timer(times=n)
def time_for():
a,b=0,0
for u in users:
if u.a is True:
a += 1
if u.b is True:
b += 1
return a,b
@timer(times=n)
def time_itermapzip():
return list(itertools.imap(sum, itertools.izip(*((u.a, u.b) for u in users))))
@timer(times=n)
def time_sum2():
return map(sum, zip(*users2))
@timer(times=n)
def time_counter2():
return map(Counter, zip(*users2))
@timer(times=n)
def time_for2():
a,b=0,0
for _a,_b in users2:
if _a is True:
a += 1
if _b is True:
b += 1
return a,b
@timer(times=n)
def time_itermapzip2():
return list(itertools.imap(sum, itertools.izip(*users2)))
v = time_sum()
v = time_counter()
v = time_for()
v = time_itermapzip()
v = time_sum2()
v= time_counter2()
v = time_for2()
v = time_itermapzip2()
# time_sum ran 10000 times in 0.446894168854 seconds with 22376.662523 ops/sec
# time_counter ran 10000 times in 1.29836297035 seconds with 7702.006471 ops/sec
# time_for ran 10000 times in 0.267076015472 seconds with 37442.523554 ops/sec
# time_itermapzip ran 10000 times in 0.459508895874 seconds with 21762.364319 ops/sec
# time_sum2 ran 10000 times in 0.174293994904 seconds with 57374.323226 ops/sec
# time_counter2 ran 10000 times in 0.989939928055 seconds with 10101.623055 ops/sec
# time_for2 ran 10000 times in 0.183295965195 seconds with 54556.574605 ops/sec
# time_itermapzip2 ran 10000 times in 0.193426847458 seconds with 51699.131384 ops/sec
print "True a's: {}\t False a's: {}\nTrue b's: {}\t False b's:{}".format(v[0], len(users)-v[0], v[1], len(users)-v[1])
# True a's: 53 False a's: 47
# True b's: 31 False b's:69
v
# [53, 31]
样本大小为1000的相同代码:
# time_sum ran 10000 times in 9.30428719521 seconds with 1074.773359 ops/sec
# time_counter ran 10000 times in 16.7009849548 seconds with 598.767080 ops/sec
# time_for ran 10000 times in 2.61371207237 seconds with 3825.976130 ops/sec
# time_itermapzip ran 10000 times in 9.40824103355 seconds with 1062.897939 ops/sec
# time_sum2 ran 10000 times in 5.70988488197 seconds with 1751.348794 ops/sec
# time_counter2 ran 10000 times in 13.4643371105 seconds with 742.702735 ops/sec
# time_for2 ran 10000 times in 2.49017906189 seconds with 4015.775473 ops/sec
# time_itermapzip2 ran 10000 times in 6.10926699638 seconds with 1636.857581 ops/sec