如何从不包含重复的列表列表中查找一组值

时间:2016-11-14 08:26:12

标签: python

你有一个Python列表,如下所示:

l = [[ 1,  2,  3],
     [18, 20, 22],
     [ 3, 14, 16],
     [ 1,  3, 05],
     [18,  2, 16]]

您如何从每个子列表中选择一个值,以便不重复单个值,并最小化结果列表的总和?

result = [1, 18, 3, 5, 2]

2 个答案:

答案 0 :(得分:2)

这是一个紧凑的暴力解决方案,所以它必须执行columns**rows测试,这是不好的。我怀疑有backtracking算法通常效率更高,但在最坏的情况下,可能需要检查所有可能性。

from itertools import product

lst = [
    [ 1,  2,  3],
    [18, 20, 22],
    [ 3, 14, 16],
    [ 1,  3,  5],
    [18,  2, 16],
]

nrows = len(lst) 
m = min((t for t in product(*lst) if len(set(t)) == nrows), key=sum)
print(m)

<强>输出

(1, 18, 3, 5, 2)

这是一个使用递归生成器而不是itertools.product的更快版本。

def select(data, seq):
    if data:
        for seq in select(data[:-1], seq):
            for u in data[-1]:
                if u not in seq:
                    yield seq + [u]
    else:
        yield seq

def solve(data):
    return min(select(data, []), key=sum)

这是一个递归生成器的修改版本,随着它的进行排序,但当然速度较慢,而且它会消耗更多的RAM。如果对输入数据进行排序,它通常会很快找到最小解,但是当找到最小选择时,我无法找到一种简单的方法让它停止。

def select(data, selected):
    if data:
        for selected in sorted(select(data[:-1], selected), key=sum):
            for u in data[-1]:
                if u not in selected:
                    yield selected + [u]
    else:
        yield selected

这里有一些比较莫里斯和我的解决方案速度的计时代码。它运行在Python 2和Python 3上。我在Python 2.6和DSP上得到了类似的时间结果。 Python 3.6在我的旧2GHz 32位机器上运行旧的Debian衍生Linux。

from __future__ import print_function, division
from timeit import Timer
from itertools import product
from random import seed, sample, randrange

n = randrange(0, 1 << 32)
print('seed', n)
seed(n)

def show(data):
    indent = ' ' * 4
    s = '\n'.join(['{0}{1},'.format(indent, row) for row in data])
    print('[\n{0}\n]\n'.format(s))

def make_data(rows, cols):
    maxn = rows * cols
    nums = range(1, maxn)
    return [sample(nums, cols) for _ in range(rows)]

def sort_data(data):
    newdata = [sorted(row) for row in data]
    newdata.sort(reverse=True, key=sum)
    return newdata

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

def solve_Maurice(data):
    result = None
    for item in product(*data):
        if len(item) > len(set(item)):
            # Try the next combination if there are duplicates
            continue
        if result is None or sum(result) > sum(item):
            result = item
    return result

def solve_prodgen(data):
    rows = len(data) 
    return min((t for t in product(*data) if len(set(t)) == rows), key=sum)

def select(data, seq):
    if data:
        for seq in select(data[:-1], seq):
            for u in data[-1]:
                if u not in seq:
                    yield seq + [u]
    else:
        yield seq

def solve_recgen(data):
    return min(select(data, []), key=sum)

funcs = (
    solve_Maurice,
    solve_prodgen,
    solve_recgen,
)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

def verify():
    for func in funcs:
        fname = func.__name__
        seq = func(data)
        print('{0:14} {1}'.format(fname, seq))
    print()

def time_test(loops, reps):
    ''' Print timing stats for all the functions '''
    timings = []
    for func in funcs:
        fname = func.__name__
        setup = 'from __main__ import data, ' + fname
        cmd = fname + '(data)'
        t = Timer(cmd, setup)
        result = t.repeat(reps, loops)
        result.sort()
        timings.append((result, fname))

    timings.sort()
    for result, fname in timings:
        print('{0:14} {1}'.format(fname, result))

rows, cols = 6, 4
print('Number of selections:', cols ** rows)

data = make_data(rows, cols)
data = sort_data(data)
show(data)

verify()

loops, reps = 100, 3
time_test(loops, reps)

典型输出

seed 22290
Number of selections: 4096
[
    [6, 11, 22, 23],
    [9, 14, 17, 19],
    [5, 9, 16, 22],
    [5, 6, 9, 13],
    [1, 3, 6, 22],
    [4, 5, 6, 13],
]

solve_Maurice  (11, 9, 5, 6, 1, 4)
solve_prodgen  (11, 9, 5, 6, 1, 4)
solve_recgen   [11, 9, 5, 6, 1, 4]

solve_recgen   [0.5476037560001714, 0.549133045002236, 0.5647858490046929]
solve_prodgen  [1.2500368960027117, 1.296529343999282, 1.3022710209988873]
solve_Maurice  [1.485518219997175, 1.489505891004228, 1.784105566002836]

答案 1 :(得分:1)

编辑:我之前的解决方案只适用于大多数情况,这应该可以解决所有问题:

from itertools import product
l = [[1, 2, 3], [18, 20, 22], [3, 14, 16], [1, 3, 5], [18, 2, 16]]

result = None
for item in product(*l):
    if len(item) > len(set(item)):
        # Try the next combination if there are duplicates
        continue
    if result is None or sum(result) > sum(item):
        result = item
print(result)

<强>输出

(1, 18, 3, 5, 2)