现在,在pancake sorting中找到最短的翻转序列是NP难的,但我想找到它们中的每一个,并计算它们。
每个排列的含义我想找到恢复身份但不长于最短排列的所有前缀逆转序列。
这是我到目前为止所得到的:
#!/bin/env python3
# coding: utf-8
from math import factorial
import itertools
from multiprocessing import cpu_count, Manager, Pool
import numpy
import scipy.sparse
def flip(x, value):
return tuple(value[:x][::-1] + value[x:])
def rank(perm):
n = len(perm)
fact = factorial(n)
r = 0
for i in range(n):
fact //= n - i
r += len([x for x in perm[i:] if x < perm[i]]) * fact
return r
def unrank(i, items):
its = items[:]
perm = []
n = len(items)
fact = factorial(n)
r = i % fact
while its:
fact //= n
c, r = divmod(r, fact)
perm.append(its.pop(c))
n -= 1
return tuple(perm)
def get_colex_row(r, n, _fact):
row = scipy.sparse.dok_matrix((
1, _fact[n - 1]), dtype=numpy.int8)
perm = unrank(r, [i for i in range(n)])
for i in range(n):
column = r - r % _fact[i] + rank(perm[:-i - 2:-1])
row[0, column] = i + 1
return row
def get_colex_matrix(n):
fact = [factorial(i) for i in range(1, n + 1)]
m = scipy.sparse.dok_matrix(
(fact[n - 1], fact[n - 1]), dtype=numpy.int8)
items = [_ for _ in range(1, n + 1)]
for r in range(fact[n - 1]):
row = get_colex_row(r, n, fact)
m[r] = row
return m
def get_distance(n, items):
nfact = factorial(n)
stack = {unrank(i, items) for i in range(nfact)}
m = get_colex_matrix(n)
distance = {unrank(nfact - 1, items)[::-1] : 0}
new_distance = {nfact - 1}
d = 0
while distance.keys() != stack:
new_new_distance = set()
d += 1
for visiting in new_distance:
for i in range(2, n + 1):
key_index = m[visiting].tolist().index(i)
key = unrank(key_index, items)[::-1]
if key not in distance:
distance[key] = d
new_new_distance.add(key_index)
new_distance = new_new_distance
return distance
def get_paths_serial(items):
n = len(items)
nfact = factorial(n)
stack = {unrank(i, items) for i in range(nfact)}
m = get_colex_matrix(n)
distance = {unrank(nfact - 1, items)[::-1]: {()}}
new_distance = {nfact - 1}
while distance.keys() != stack:
new_new_distance = set()
for visiting_index in new_distance:
for i in range(2, n + 1):
key_index = m[visiting_index].tolist().index(i)
key = unrank(key_index, items)[::-1]
visiting = unrank(visiting_index, items)[::-1]
paths = distance[visiting]
prev_sample = next(iter(paths))
if key not in distance:
distance[key] = {path + (i,) for path in paths}
new_new_distance.add(key_index)
else:
curr_sample = next(iter(distance[key]))
if len(prev_sample) + 1 < len(curr_sample):
print("Shouldn't happen!")
distance[key] = {path + (i,) for path in paths}
elif len(prev_sample) + 1 == len(curr_sample):
distance[key] |= {path + (i,) for path in paths}
else:
# not relevant
pass
new_distance = new_new_distance
return distance
def _worker(ns, index):
row = get_colex_row(index, ns.n, ns.fact).toarray().tolist()[0]
visiting = unrank(index, ns.items)[::-1]
paths = ns.distance[visiting]
prev_sample = next(iter(paths))
out = {}
my_new_distance = set()
for i in range(2, ns.n + 1):
key_index = row.index(i)
key = unrank(key_index, ns.items)[::-1]
if key not in ns.distance:
out[key] = {path + (i,) for path in paths}
my_new_distance.add(key_index)
else:
curr_sample = next(iter(ns.distance[key]))
if len(prev_sample) + 1 < len(curr_sample):
print("Shouldn't happen!")
out[key] = {path + (i,) for path in paths}
elif len(prev_sample) + 1 == len(curr_sample):
out[key].update(path + (i,) for path in paths)
return my_new_distance, out
def get_paths_parallel(items):
n = len(items)
fact = [factorial(i) for i in range(1, n + 1)]
distance = {unrank(fact[n - 1] - 1, items)[::-1]: {()}}
stack = {unrank(i, items) for i in range(fact[n - 1])}
already_visited = set()
visiting = {fact[n - 1] - 1}
mgr = Manager()
namespace = mgr.Namespace()
namespace.fact = fact
namespace.distance = distance
namespace.items = items
namespace.n = n
with Pool(2 * cpu_count()) as pool:
while distance.keys() != stack:
result = pool.starmap(_worker, ((namespace, job)
for job in visiting))
visiting = set()
for next_to_visit, visited in result:
visiting |= next_to_visit
for k, v in visited.items():
if k in distance:
distance[k] |= v
else:
distance[k] = v
visiting -= already_visited
already_visited |= visiting
namespace.distance = distance
return distance
def colex(value, other):
for i in range(len(value) - 1, 0, -1):
if value[i] == other[i]:
continue
return value[i] > other[i]
return False
def ordered_by(order_cmp):
'Convert a cmp= function into a key= function'
if order_cmp is None:
return None
class K(object):
def __init__(self, obj):
self.value = obj
def __gt__(self, other):
if len(self.value) != len(other.value):
assert "Not the same length"
return order_cmp(self.value, other.value)
return K
def get_ordered(n, order):
return sorted(itertools.permutations(range(1, n + 1)),
key=ordered_by(order))
def get_matrix(n, order=None):
stack = get_ordered(n, order)
m = numpy.zeros((len(stack), len(stack)), numpy.int8)
for i,s in enumerate(stack):
for x in range(1, n + 1):
m[i, stack.index(flip(x, s))] = x
return m
我不确定我做错了什么,但get_paths_parallel
的运行速度比get_paths_serial
慢,请帮助!
我真的应该(也可能很快)更好地记录我的代码。
暂时,我还要说几句话:
它使用共同词典排序来对排列进行排名并在邻接矩阵中找到索引。在那里我存储了转换排列的翻转长度,例如, A ( i,j )= k 如果在排名为 k 长度前缀反转> i 产生排名 j 排列。为了节省内存而不是存储整个矩阵,我按需生成行,并通过排除已访问过的行来限制访问权限,我也使用scipy.sparse.dok_matrix
出于同样的原因。
除了这些之外,它只是淹没图表直到达到所有排列。
有些功能并未使用上述所有或任何考虑因素,例如get_matrix
,但仅用于验证其他人,例如get_colex_matrix
正在按预期工作。
我以一种有点复杂的方式创建了key
功能,但这仅仅是因为我已经尝试过其他排序,然而我才决定使用co-lex 。
答案 0 :(得分:0)
使用multiprocessing.Manager
在进程之间共享数据会使速度变慢。
解决方案是将所需数据复制到每个进程的内存空间(将它们作为参数传递)或者为它们使用全局变量。
同样使用scipy.sparse.dok_matrix
是过度的,dict
会这样做。
我会抓住我在这个主题上发现的文献,然后将其链接起来。