计算煎饼分拣中最短的翻转序列

时间:2018-01-12 20:06:38

标签: python algorithm sorting optimization multiprocessing

现在,在pancake sorting中找到最短的翻转序列是NP难的,但我想找到它们中的每一个,并计算它们。

每个排列的含义我想找到恢复身份但不长于最短排列的所有前缀逆转序列。

这是我到目前为止所得到的:

#!/bin/env python3
# coding: utf-8

from math import factorial
import itertools

from multiprocessing import cpu_count, Manager, Pool

import numpy
import scipy.sparse

def flip(x, value):
    return tuple(value[:x][::-1] + value[x:])

def rank(perm):
    n = len(perm)
    fact = factorial(n)
    r = 0
    for i in range(n):
        fact //= n - i
        r += len([x for x in perm[i:] if x < perm[i]]) * fact
    return r

def unrank(i, items):
    its = items[:]
    perm = []
    n = len(items)
    fact = factorial(n)
    r = i % fact
    while its:
        fact //= n
        c, r = divmod(r, fact)
        perm.append(its.pop(c))
        n -= 1
    return tuple(perm)

def get_colex_row(r, n, _fact):
    row = scipy.sparse.dok_matrix((
        1, _fact[n - 1]), dtype=numpy.int8)
    perm = unrank(r, [i for i in range(n)])
    for i in range(n):
        column = r - r % _fact[i] + rank(perm[:-i - 2:-1])
        row[0, column] = i + 1
    return row

def get_colex_matrix(n):
    fact = [factorial(i) for i in range(1, n + 1)]
    m = scipy.sparse.dok_matrix(
            (fact[n - 1], fact[n - 1]), dtype=numpy.int8)
    items = [_ for _ in range(1, n + 1)]

    for r in range(fact[n - 1]):
        row = get_colex_row(r, n, fact)
        m[r] = row
    return m

def get_distance(n, items):
    nfact = factorial(n)
    stack = {unrank(i, items) for i in range(nfact)}
    m = get_colex_matrix(n)
    distance = {unrank(nfact - 1, items)[::-1] : 0}
    new_distance = {nfact - 1}
    d = 0
    while distance.keys() != stack:
        new_new_distance = set()
        d += 1
        for visiting in new_distance:
            for i in range(2, n + 1):
                key_index = m[visiting].tolist().index(i)
                key = unrank(key_index, items)[::-1]
                if key not in distance:
                    distance[key] = d
                    new_new_distance.add(key_index)
        new_distance = new_new_distance
    return distance

def get_paths_serial(items):
    n = len(items)
    nfact = factorial(n)
    stack = {unrank(i, items) for i in range(nfact)}
    m = get_colex_matrix(n)
    distance = {unrank(nfact - 1, items)[::-1]: {()}}
    new_distance = {nfact - 1}
    while distance.keys() != stack:
        new_new_distance = set()
        for visiting_index in new_distance:
            for i in range(2, n + 1):
                key_index = m[visiting_index].tolist().index(i)
                key = unrank(key_index, items)[::-1]
                visiting = unrank(visiting_index, items)[::-1]
                paths = distance[visiting]

                prev_sample = next(iter(paths))

                if key not in distance:
                    distance[key] = {path + (i,) for path in paths}
                    new_new_distance.add(key_index)
                else:
                    curr_sample = next(iter(distance[key]))
                    if len(prev_sample) + 1 < len(curr_sample):
                        print("Shouldn't happen!")
                        distance[key] = {path + (i,) for path in paths}
                    elif len(prev_sample) + 1 == len(curr_sample):
                        distance[key] |= {path + (i,) for path in paths}
                    else:
                        # not relevant
                        pass
        new_distance = new_new_distance
    return distance

def _worker(ns, index):
    row = get_colex_row(index, ns.n, ns.fact).toarray().tolist()[0]
    visiting = unrank(index, ns.items)[::-1]
    paths = ns.distance[visiting]
    prev_sample = next(iter(paths))
    out = {}
    my_new_distance = set()
    for i in range(2, ns.n + 1):
        key_index = row.index(i)
        key = unrank(key_index, ns.items)[::-1]
        if key not in ns.distance:
            out[key] = {path + (i,) for path in paths}
            my_new_distance.add(key_index)
        else:
            curr_sample = next(iter(ns.distance[key]))
            if len(prev_sample) + 1 < len(curr_sample):
                print("Shouldn't happen!")
                out[key] = {path + (i,) for path in paths}
            elif len(prev_sample) + 1 == len(curr_sample):
                out[key].update(path + (i,) for path in paths)
    return my_new_distance, out

def get_paths_parallel(items):
    n = len(items)
    fact = [factorial(i) for i in range(1, n + 1)]
    distance = {unrank(fact[n - 1] - 1, items)[::-1]: {()}}
    stack = {unrank(i, items) for i in range(fact[n - 1])}
    already_visited = set()
    visiting = {fact[n - 1] - 1}
    mgr = Manager()

    namespace = mgr.Namespace()

    namespace.fact = fact
    namespace.distance = distance
    namespace.items = items
    namespace.n = n

    with Pool(2 * cpu_count()) as pool:
        while distance.keys() != stack:
            result = pool.starmap(_worker, ((namespace, job)
                    for job in visiting))

            visiting = set()
            for next_to_visit, visited in result:
                visiting |= next_to_visit
                for k, v in visited.items():
                    if k in distance:
                        distance[k] |= v
                    else:
                        distance[k] = v
            visiting -= already_visited
            already_visited |= visiting
            namespace.distance = distance
    return distance

def colex(value, other):
    for i in range(len(value) - 1, 0, -1):
        if value[i] == other[i]:
            continue
        return value[i] > other[i]
    return False

def ordered_by(order_cmp):
    'Convert a cmp= function into a key= function'
    if order_cmp is None:
        return None

    class K(object):
        def __init__(self, obj):
            self.value = obj
        def __gt__(self, other):
            if len(self.value) != len(other.value):
                assert "Not the same length"
            return order_cmp(self.value, other.value)
    return K

def get_ordered(n, order):
    return sorted(itertools.permutations(range(1, n + 1)),
                  key=ordered_by(order))


def get_matrix(n, order=None):
    stack = get_ordered(n, order)
    m = numpy.zeros((len(stack), len(stack)), numpy.int8)
    for i,s in enumerate(stack):
         for x in range(1, n + 1):
             m[i, stack.index(flip(x, s))] = x
    return m

我不确定我做错了什么,但get_paths_parallel的运行速度比get_paths_serial慢,请帮助!

我真的应该(也可能很快)更好地记录我的代码。

暂时,我还要说几句话:

它使用共同词典排序来对排列进行排名并在邻接矩阵中找到索引。在那里我存储了转换排列的翻转长度,例如, A i,j )= k 如果在排名为 k 长度前缀反转> i 产生排名 j 排列。为了节省内存而不是存储整个矩阵,我按需生成行,并通过排除已访问过的行来限制访问权限,我也使用scipy.sparse.dok_matrix出于同样的原因。

除了这些之外,它只是淹没图表直到达到所有排列。

有些功能并未使用上述所有或任何考虑因素,例如get_matrix,但仅用于验证其他人,例如get_colex_matrix正在按预期工作。

我以一种有点复杂的方式创建了key功能,但这仅仅是因为我已经尝试过其他排序,然而我才决定使用co-lex 。

1 个答案:

答案 0 :(得分:0)

使用multiprocessing.Manager在进程之间共享数据会使速度变慢。

解决方案是将所需数据复制到每个进程的内存空间(将它们作为参数传递)或者为它们使用全局变量。

同样使用scipy.sparse.dok_matrix是过度的,dict会这样做。

我会抓住我在这个主题上发现的文献,然后将其链接起来。