Question

我有一个n个整数的数组（不一定是不同的！），我想迭代所有大小为k的子集。但是，我想排除所有重复的子集。

e.g。

array = {1,2,2,3,3,3,3}, n = 7, k = 2

然后我想迭代的子集（每次一次）是：

{1,2},{1,3},{2,2},{2,3},{3,3}

这样做的有效算法是什么？递归方法是最有效/优雅的吗？

如果您有特定于语言的答案，我正在使用C ++。

Answer 1

我喜欢这个问题。当然，它限制你的向量中只有32个元素，但它仍然很酷。

首先，给定位掩码，确定下一个位掩码置换（source）：

uint32_t next(uint32_t v) {
    uint32_t t = v | (v - 1);
    return (t + 1) | (((~t & -~t) - 1) >> (__builtin_ctz(v) + 1));  
}

接下来，给定一个vector和一个位掩码，根据该掩码提供一个新的vector：

std::vector<int> filter(const std::vector<int>& v, uint32_t mask) {
    std::vector<int> res;
    while (mask) {
        res.push_back(v[__builtin_ctz(mask)]);
        mask &= mask - 1;
    }   
    return res;
}

有了这个，我们只需要一个循环：

std::set<std::vector<int>> get_subsets(const std::vector<int>& arr, uint32_t k) {   
    std::set<std::vector<int>> s;
    uint32_t max = (1 << arr.size());
    for (uint32_t v = (1 << k) - 1; v < max; v = next(v)) {
        s.insert(filter(arr, v));
    }
    return s;
}

int main()
{
    auto s = get_subsets({1, 2, 2, 3, 3, 3, 3}, 2);
    std::cout << s.size() << std::endl; // prints 5
}

Answer 2

用于以字典顺序生成一组唯一值的组合的相同（或几乎相同）算法可用于以字典顺序生成多集的组合。这样做可以避免重复数据删除的必要性，这是非常昂贵的，并且还避免了维护所有生成的组合的必要性。它确实需要对原始值列表进行排序。

以下简单实现在平均（和最坏情况）时间O（ n n 中找到 n 多组的下一个 k 组合EM>）。它需要两个范围：第一个范围是排序的 k 组合，第二个范围是排序的多个集合。（如果任一范围未排序或第一个范围中的值不构成第二个范围的子（多）组，则行为未定义;不进行完整性检查。）

实际上只使用了第二个范围的结束迭代器，但我认为这使得调用约定有点奇怪。

template<typename BidiIter, typename CBidiIter,
         typename Compare = std::less<typename BidiIter::value_type>>
int next_comb(BidiIter first, BidiIter last,
              CBidiIter /* first_value */, CBidiIter last_value,
              Compare comp=Compare()) {
  /* 1. Find the rightmost value which could be advanced, if any */
  auto p = last;
  while (p != first && !comp(*(p - 1), *--last_value)) --p;
  if (p == first) return false;
  /* 2. Find the smallest value which is greater than the selected value */
  for (--p; comp(*p, *(last_value - 1)); --last_value) { }
  /* 3. Overwrite the suffix of the subset with the lexicographically smallest
   *    sequence starting with the new value */
  while (p != last) *p++ = *last_value++;
  return true;
}

应该清楚的是，步骤1和2组合最多进行O（ n ）比较，因为每个 n 值最多用于一次比较。第3步复制最多O（ k ）值，我们知道 k ≤ n 。

在没有重复值的情况下，通过将当前组合作为迭代器的容器保存到值列表而不是实际值，可以将其改进为O（ k ）。这也可以避免复制值，但需要额外的解引用。如果另外我们缓存将每个值迭代器与迭代器关联到下一个最大值的第一个实例的函数，我们可以消除步骤2并将算法减少到O（ k ），即使重复值也是如此。如果有大量的重复并且比较费用很高，这可能是值得的。

这是一个简单的使用示例：

std::vector<int> values = {1,2,2,3,3,3,3};
/* Since that's sorted, the first subset is just the first k values */
const int k = 2;
std::vector<int> subset{values.cbegin(), values.cbegin() + k};

/* Print each combination */
do {
  for (auto const& v : subset) std::cout << v << ' ';
  std::cout << '\n';
} while (next_comb(subset.begin(),  subset.end(),
                   values.cbegin(), values.cend()));

住在coliru

Answer 3

与之前的答案不同，这不是那么有效，也没有做任何像花絮那样花哨的事情。但是，它不会限制数组的大小或子集的大小。

此解决方案使用std::next_permutation生成组合，并利用std::set的唯一性属性。

#include <algorithm>
#include <vector>
#include <set>
#include <iostream>
#include <iterator>

using namespace std;

std::set<std::vector<int>> getSubsets(const std::vector<int>& vect, size_t numToChoose)
{
    std::set<std::vector<int>> returnVal;
    // return the whole thing if we want to
    // choose everything 
    if (numToChoose >= vect.size())
    {
        returnVal.insert(vect);
        return returnVal;
    }

    // set up bool vector for combination processing
    std::vector<bool> bVect(vect.size() - numToChoose, false);

    // stick the true values at the end of the vector
    bVect.resize(bVect.size() + numToChoose, true); 

    // select where the ones are set in the bool vector and populate
    // the combination vector
    do
    {
        std::vector<int> combination;
        for (size_t i = 0; i < bVect.size() && combination.size() <= numToChoose; ++i)
        {
            if (bVect[i])
                combination.push_back(vect[i]);
        }
        // sort the combinations
        std::sort(combination.begin(), combination.end());

        // insert this new combination in the set
        returnVal.insert(combination);
    } while (next_permutation(bVect.begin(), bVect.end()));
    return returnVal;
}

int main()
{
    std::vector<int> myVect = {1,2,2,3,3,3,3};

    // number to select
    size_t numToSelect = 3;

    // get the subsets
    std::set<std::vector<int>> subSets = getSubsets(myVect, numToSelect);

    // output the results
    for_each(subSets.begin(), subSets.end(), [] (const vector<int>& v) 
    { cout << "subset "; copy(v.begin(), v.end(), ostream_iterator<int>(cout, " ")); cout << "\n"; });
}

实例：http://coliru.stacked-crooked.com/a/beb800809d78db1a

基本上我们设置了一个bool向量，并使用与bool向量中true项的位置对应的值填充向量。然后我们将它排序并插入到集合中。 std::next_permutation将bool数组中的true值混洗，我们重复一遍。

不可否认，它不像以前的答案那样复杂，而且可能比以前的答案慢，但它应该能够胜任。

Answer 4

this solution的基本思想是像next_permutation这样的函数，但它生成下一个＆＃34;数字＆＃34;的升序序列。这里称为ascend_ordered。

template< class It >
auto ascend_ordered( const int n_digits, const It begin, const It end )
    -> bool
{
    using R_it = reverse_iterator< It >;
    const R_it r_begin  = R_it( end );
    const R_it r_end    = R_it( begin );

    int max_digit = n_digits - 1;
    for( R_it it = r_begin ; it != r_end; ++it )
    {
        if( *it < max_digit )
        {
            ++*it;
            const int n_further_items = it - r_begin;
            for( It it2 = end - n_further_items; it2 != end; ++it2 )
            {
                *it2 = *(it2 - 1) + 1;
            }
            return true;
        }
        --max_digit;
    }
    return false;
}

手头案件的主要计划：

auto main() -> int
{
    vector<int> a = {1,2,2,3,3,3,3};
    assert( is_sorted( begin( a ), end( a ) ) );
    const int k = 2;
    const int n = a.size();
    vector<int> indices( k );
    iota( indices.begin(), indices.end(), 0 );      // Fill with 0, 1, 2 ...
    set<vector<int>> encountered;
    for( ;; )
    {
        vector<int> current;
        for( int const i : indices ) { current.push_back( a[i] ); }
        if( encountered.count( current ) == 0 )
        {
            cout << "Indices " << indices << " -> values " << current << endl;
            encountered.insert( current );
        }
        if( not ascend_ordered( n, begin( indices ), end( indices ) ) )
        {
            break;
        }
    }
}

支持包含和i / o：

#include <algorithm>
using std::is_sorted;

#include <assert.h>

#include <iterator>
using std::reverse_iterator;

#include <iostream>
using std::ostream; using std::cout; using std::endl;

#include <numeric>
using std::iota;

#include <set>
using std::set;

#include <utility>
using std::begin; using std::end;

#include <vector>
using std::vector;

template< class Container, class Enable_if = typename Container::value_type >
auto operator<<( ostream& stream, const Container& c )
    -> ostream&
{
    stream << "{";
    int n_items_outputted = 0;
    for( const int x : c )
    {
        if( n_items_outputted >= 1 ) { stream << ", "; }
        stream << x;
        ++n_items_outputted;
    }
    stream << "}";
    return stream;
}

遍历大小为k的不同子集

4 个答案: