让我们说一场比赛即将到来,您必须帮助设置。
该比赛有N支队伍,例如:
teams = [Team1, Team2, ..., TeamN]
在每个团队中,可以从1-3名球员中的任何一个从他们自己的球员列表中选择。
比方说,每个列表代表可能的领导者,菜鸟,助手。
每个团队都必须有一个领导者,但可能没有潜在的新秀或助手。
例如,Team1可能看起来像
Team1.Leaders = [Leader1, Leader2, ..., LeaderX1]
Team1.Rookies = [Rookie1, Rookie2, ..., RookieX2]
Team1.Helpers = [Helper1, Helper2, ..., HelperX3]
或者它的Rookies或Helpers或两者都可能有一个空列表。
团队,领导者,新秀和助手的每个列表的大小可能有所不同。
您必须为每个团队选出1个领导者,并可能选择1个新秀或1个助手,或两者都选,这取决于是否存在新秀和助手。 (同样,每个团队可以选择1-3名球员)
获取可以为每个团队挑选球员的所有方式的组合列表。
例如
Team1 = Team()
Team1.Leaders = [Leader1, Leader2]
Team1.Rookies = [Rookie1]
Team1.Helpers = []
Team2 = Team()
Team2.Leaders = [Leader3]
Team2.Rookies = []
Team2.Helpers = [Helper1, Helper2]
combinations = team_combinations(teams)
组合的预期输出如下:
[{"Team1":[Leader1, Rookie1], "Team2":[Leader3, Helper1]},
{"Team1":[Leader1, Rookie1], "Team2":[Leader3, Helper2]},
{"Team1":[Leader2, Rookie1], "Team2":[Leader3, Helper1]},
{"Team1":[Leader2, Rookie1], "Team2":[Leader3, Helper2]}]
其中每本词典都是您如何选择玩家的组合。
我正在尝试尽可能快地获得组合,就像您可以想象的那样,当有很多玩家可供选择时,您甚至可能拥有数百万种不同的组合。
我尝试使用递归,其中基本情况是len(teams)== 1,而递归步骤是其他任何事情。我设法创建了一个有效的函数,但是获取组合的过程甚至可能需要一个小时才能完成。
最快的组合方式是什么? 使用itertools会有所帮助吗? 有没有更快的方法?
答案 0 :(得分:2)
首先,您只能做一件事来减少问题的复杂性-减少球队和球员的数量。这是可靠地加速代码的一种肯定方法。
反之,如果组合总数少于10**9
,则任何微观优化都会变得很重要。也可以使用低级编程语言。对于Python,我要强调以下几点:
tuple
作为leaders
,rookies
和helpers
的类型。它具有相当快的迭代器,可以避免隐式复制数据(例如,在itertools.product
中); sys.getrecursionlimit()
的范围。for
-循环插入堆栈; for
循环组成的这是一个小草图。我写的所有类都没有__init__
,应该通过具体的实现来继承。
from itertools import product, chain
from operator import attrgetter
try:
from math import prod # python 3.8
except ImportError:
from functools import reduce
from operator import mul
prod = lambda i: reduce(mul, i, 1)
class Team(object):
__slots__ = ('leaders', 'rookies', 'helpers')
leaders: tuple
rookies: tuple
helpers: tuple
def l_product(self):
return product(self.leaders) # optimization tricky
def l_r_product(self):
return product(self.leaders, self.rookies)
def l_h_product(self):
return product(self.leaders, self.helpers)
def l_r_h_product(self):
return product(self.leaders, self.rookies, self.helpers)
def __iter__(self):
return chain(self.l_product(), self.l_r_product(), self.l_h_product(), self.l_r_h_product())
def __len__(self):
return (
(l := len(self.leaders)) +
(l * (r := len(self.rookies))) +
(l * (h := len(self.helpers))) +
(l * r * h)
)
@property
def combs_size(self):
# empiric value indicating quite close size of all produced tuples-combinations in memory
return (
(l := len(self.leaders)) +
((l * (r := len(self.rookies))) * 2) +
((l * (h := len(self.helpers))) * 2) +
(l * r * h * 3)
)
class CachedTeam(Team):
__slots__ = Team.__slots__ + ('_cached_combs',)
def __init__(self, team):
self.leaders = team.leaders
self.rookies = team.rookies
self.helpers = team.helpers
self._cached_combs = tuple(team)
def __iter__(self):
return iter(self._cached_combs)
class Tournament(object):
_teams: list
_producer: ... # Callable
_free_memory = float('inf') # empiric value indicating ALL memory that can be occupied
# while producing elements
_max_effective_teams_length = 100 # empiric value, it is related with max-memory
# that _compiled_producer can occupy
_min_required_memory_multiplier = 4 # empiric value, it is related with memory required to
# _stack_producer
CachedTeam = CachedTeam
_wrapper = tuple
def __len__(self):
return prod(map(len, self._teams))
def __iter__(self):
try:
return self._producer()
except AttributeError:
self._create_producer()
return self._producer()
@property
def teams_combs_size(self):
return sum(map(attrgetter('combs_size'), self._teams))
def _create_producer(self):
teams = self._teams
teams_length = len(teams)
if not teams_length:
self._producer = ((),).__iter__
elif self.teams_combs_size < self._free_memory:
# itertools.product creates tuple-copy for all non-tuple input
self._producer = self._product_producer
else:
self._check_memory()
self._configure_cache()
if teams_length <= max(2, self._max_effective_teams_length):
self._producer = self._compiled_producer
else:
self._producer = self._stack_producer
def _product_producer(self):
# it should be used when we have a lot of memory
return product(*self._teams)
@property
def _compiled_producer(self):
# e.g: ((t0,t1,t2,) for t0 in teams[0] for t1 in teams[1] for t2 in teams[2])
# it should be used when number of teams is not big
teams_length = len(self._teams)
assert teams_length >= 1
producer_code = 'lambda: (({yielded}){for_loops})'
return eval(
producer_code.format(
yielded = ''.join(map('t{},'.format, range(0, teams_length))),
for_loops = ''.join(map(' for t{0} in teams[{0}]'.format,
range(0, teams_length))),
),
{'teams': self._teams}
)
def _stack_producer(self):
# it should be used when number of teams is big
wrapper = self._wrapper
teams = self._teams
teams_length = len(teams)
assert teams_length >= 2
common = [None] * teams_length # common memory
teams_iterators = [None] * teams_length
teams_iterators[0] = iter(teams[0])
last_team = teams[-1]
x = 0 # stack pointer
max_x = teams_length - 1
pre_max_x = max_x - 1
while True:
try:
common[x] = next(teams_iterators[x])
# except StopIteration:
except:
if x: # if first place loop is not over:
x -= 1 # decrement stack pointer
continue
return
if x == pre_max_x:
for common[max_x] in last_team:
yield wrapper(common) # don't forget to use wrapper that creates copy!!
else:
# create next place loop and increment stack pointer
teams_iterators[x] = iter(teams[(x := x + 1)])
def _check_memory(self):
self._free_memory -= (len(self._teams) * self._min_required_memory_multiplier)
if self._free_memory < 0:
raise ValueError('not enough memory to operate with teams')
def _configure_cache(self):
teams = self._teams
CachedTeam = self.CachedTeam
free_memory = self._free_memory
# We want to cache small teams at first. It is also quite easy to get their iterators,
# so we want to iterate them more often than not-cached teams:
teams.sort(key=len, reverse=True)
try:
for i, t in enumerate(reversed(self._teams), 1):
if (cs := t.combs_size) > free_memory:
return
teams[-i] = CachedTeam(t)
free_memory -= cs
finally:
self._free_memory = free_memory
我已经写了三种获取所有团队笛卡尔积的基本实现:_product_producer
,_compiled_producer
和_stack_producer
。要选择其中之一,我们需要知道我们拥有的近似内存值。这就是为什么我们需要声明combs_size
,_free_memory
,_max_effective_teams_length
,_min_required_memory_multiplier
,teams_combs_size
。
内部_teams
应该是私有的,因为产生的组合的顺序取决于团队的顺序。为了优化生产者,我们可能需要求助团队。
_configure_cache
内部的某个地方调用 __init__
,但是对我来说这似乎是错误的,因为初始化的时间不应该太长。您还可以根据需要将其丢弃。
这里有一些测试:
team0 = Team()
team0.leaders = ('L00',)
team0.rookies = ('R00', 'R01', 'R02')
team0.helpers = ('H00', 'H01')
team1 = Team()
team1.leaders = ('L10', 'L11')
team1.rookies = ()
team1.helpers = ()
team2 = Team()
team2.leaders = ('L20',)
team2.rookies = ()
team2.helpers = ('H20', 'H21')
team3 = Team()
team3.leaders = ('L30',)
team3.rookies = ('R30', 'R31', 'R32', 'R33', 'R34')
team3.helpers = ()
tm0 = Tournament()
tm0._teams = [team0, team1, team2, team3]
tm1 = Tournament()
tm1._teams = list(tm0._teams)
tm1._free_memory = 30
tm2 = Tournament()
tm2._teams = list(tm0._teams)
tm2._free_memory = 30
tm2._max_effective_teams_length = 3
。
>>> list(team2)
[('L20',), ('L20', 'H20'), ('L20', 'H21')]
>>> for t in tm0._teams:
... print(len(list(t)), len(t), type(t))
12 12 <class '__main__.Team'>
2 2 <class '__main__.Team'>
3 3 <class '__main__.Team'>
6 6 <class '__main__.Team'>
>>> len(tm0) == len(list(tm0)) == len(list(tm1)) == len(list(tm2)) == 12 * 2 * 3 * 6 == 432
True
>>> for t in tm1._teams:
... print(len(t), type(t))
12 <class '__main__.Team'>
6 <class '__main__.Team'>
3 <class '__main__.CachedTeam'>
2 <class '__main__.CachedTeam'>
>>> list(tm0._product_producer()) == list(tm0._compiled_producer()) == list(tm0._stack_producer())
True