查找所有k个大小的子集,其中包含n个大小的重复未排序正整数的包

时间:2015-05-02 19:22:01

标签: c# algorithm .net-2.0 dynamic-programming subset-sum

请注意,这是 C#.NET 2.0 项目( Linq不允许)所必需的。

我知道这里已经提出了非常类似的问题,我已经制作了一些工作代码(见下文)但仍然想知道如何在k和s条件下更快地使算法更快。

这是我迄今为止所学到的: 动态编程是查找ONE(不是所有)子集的最有效方法。如果我错了,请纠正我。有没有办法反复调用DP代码来生成更新的子集,直到包(重复设置)用完为止?

如果没有,那么有没有一种方法可以加快我下面的回溯递归算法,它确实产生了我需要的但是在O(2 ^ n)中运行,我认为,考虑到s和k? / p>

这是我固定的数字包,不会改变,n = 114,数字范围从3到286:

    int[] numbers = new int[]
    {
        7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
        123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
        112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
        34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
        54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
        60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
        14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
        28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
        29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
        15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
        11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
        5, 4, 5, 6
    };

要求

  • 空间限制为最大2-3GB,但时间应为O(n ^某事) (某物^ N)。

  • 不得对行李进行分类,不得删除行李。

  • 结果应该是匹配中数字的索引 子集,而不是数字本身(因为我们有重复)。

动态编程尝试

这是C#动态编程版本,改编自stackoverflow.com上类似问题的答案:

using System;
using System.Collections.Generic;

namespace Utilities
{
    public static class Combinations
    {
        private static Dictionary<int, bool> m_memo = new Dictionary<int, bool>();
        private static Dictionary<int, KeyValuePair<int, int>> m_previous = new Dictionary<int, KeyValuePair<int, int>>();
        static Combinations()
        {
            m_memo.Clear();
            m_previous.Clear();
            m_memo[0] = true;
            m_previous[0] = new KeyValuePair<int, int>(-1, 0);

        }

        public static bool FindSubset(IList<int> set, int sum)
        {
            //m_memo.Clear();
            //m_previous.Clear();
            //m_memo[0] = true;
            //m_previous[0] = new KeyValuePair<int, int>(-1, 0);

            for (int i = 0; i < set.Count; ++i)
            {
                int num = set[i];
                for (int s = sum; s >= num; --s)
                {
                    if (m_memo.ContainsKey(s - num) && m_memo[s - num] == true)
                    {
                        m_memo[s] = true;

                        if (!m_previous.ContainsKey(s))
                        {
                            m_previous[s] = new KeyValuePair<int, int>(i, num);
                        }
                    }
                }
            }

            return m_memo.ContainsKey(sum) && m_memo[sum];
        }
        public static IEnumerable<int> GetLastIndex(int sum)
        {
            while (m_previous[sum].Key != -1)
            {
                yield return m_previous[sum].Key;
                sum -= m_previous[sum].Value;
            }
        }

        public static void SubsetSumMain(string[] args)
        {
            int[] numbers = new int[]
        {
            7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
            123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
            112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
            34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
            54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
            60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
            14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
            28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
            29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
            15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
            11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
            5, 4, 5, 6
        };

            int sum = 400;
            //int size = 4; // don't know to use in dynamic programming

            // call dynamic programming
            if (Numbers.FindSubset(numbers, sum))
            {
                foreach (int index in Numbers.GetLastIndex(sum))
                {
                    Console.Write((index + 1) + "." + numbers[index] + "\t");
                }
                Console.WriteLine();
            }
            Console.WriteLine();

            Console.ReadKey();
        }
    }
}

递归编程尝试

这是C#递归编程版本,改编自stackoverflow.com上类似问题的答案:

using System;
using System.Collections.Generic;

namespace Utilities
{
    public static class Combinations
    {
        private static int s_count = 0;
        public static int CountSubsets(int[] numbers, int index, int current, int sum, int size, List<int> result)
        {
            if ((numbers.Length <= index) || (current > sum)) return 0;
            if (result == null) result = new List<int>();

            List<int> temp = new List<int>(result);
            if (current + numbers[index] == sum)
            {
                temp.Add(index);
                if ((size == 0) || (temp.Count == size))
                {
                    s_count++;
                }
            }
            else if (current + numbers[index] < sum)
            {
                temp.Add(index);
                CountSubsets(numbers, index + 1, current + numbers[index], sum, size, temp);
            }

            CountSubsets(numbers, index + 1, current, sum, size, result);
            return s_count;
        }

        private static List<List<int>> m_subsets = new List<List<int>>();
        public static List<List<int>> FindSubsets(int[] numbers, int index, int current, int sum, int size, List<int> result)
        {
            if ((numbers.Length <= index) || (current > sum)) return m_subsets;
            if (result == null) result = new List<int>();

            List<int> temp = new List<int>(result);
            if (current + numbers[index] == sum)
            {
                temp.Add(index);
                if ((size == 0) || (temp.Count == size))
                {
                    m_subsets.Add(temp);
                }
            }
            else if (current + numbers[index] < sum)
            {
                temp.Add(index);
                FindSubsets(numbers, index + 1, current + numbers[index], sum, size, temp);
            }

            FindSubsets(numbers, index + 1, current, sum, size, result);

            return m_subsets;
        }

        public static void SubsetSumMain(string[] args)
        {
            int[] numbers = new int[]
        {
            7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
            123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
            112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
            34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
            54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
            60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
            14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
            28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
            29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
            15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
            11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
            5, 4, 5, 6
        };

            int sum = 17;
            int size = 2;

            // call backtracking recursive programming
            Console.WriteLine("CountSubsets");
            int count = Numbers.CountSubsets(numbers, 0, 0, sum, size, null);
            Console.WriteLine("Count = " + count);
            Console.WriteLine();

            // call backtracking recursive programming
            Console.WriteLine("FindSubsets");
            List<List<int>> subsets = Numbers.FindSubsets(numbers, 0, 0, sum, size, null);
            for (int i = 0; i < subsets.Count; i++)
            {
                if (subsets[i] != null)
                {
                    Console.Write((i + 1).ToString() + ":\t");
                    for (int j = 0; j < subsets[i].Count; j++)
                    {
                        int index = subsets[i][j];
                        Console.Write((index + 1) + "." + numbers[index] + " ");
                    }
                    Console.WriteLine();
                }
            }
            Console.WriteLine("Count = " + subsets.Count);

            Console.ReadKey();
        }
    }
}

请让我知道如何将动态编程版本限制为大小为k的子集,如果我可以重复调用它,那么它会在每次调用时返回不同的子集,直到没有更多匹配的子集。

此外,我不确定在何处初始化DP算法的备忘录。我是在静态构造函数中完成的,它在访问任何方法时自动运行。这是正确的初始化位置还是需要移动到FindSunset()方法内部[注释掉]?

至于递归版本,它是回溯吗?我们怎样才能加快速度。它工作正常,考虑到k和s,但效率很低。

让这个主题成为所有C#SubsetSum相关问题的母亲!

3 个答案:

答案 0 :(得分:0)

只需搜索大小为K的所有组合,并检查每个组合是否满足条件。

适合您情况的k组合的最快算法是:

for (var i1 = 0; i1 <= n; i1++)
{
    for (var i2 = i1 + 1; i2 <= n; i2++)
    {
        for (var i3 = i2 + 1; i3 <= n; i3++)
        {
            ...

            for (var ik = iOneBeforeK + 1; ik <= n; ik++)
            {
                if (arr[i1] + arr[i2] + ... + arr[ik] == sum)
                {
                    // this is a valid subset
                }
            }
        }
    }
}

但是你在谈论数字并总结它们,这意味着你可以用更聪明的算法来制作截止数据。

由于所有数字都是正数,因此您知道如果单个数字足够大,则无法向其添加任何更多正数并将其总和为s。鉴于s=6k=4,搜索中包含的最高数字为s-k+1=33+1+1+1k个数字,1是您可能的最低数字,总计为6。任何高于3的数字都不能添加3个其他正数,并且总和<= 6。

但请等一下,您的最低可能值不是1,而是3。那甚至更好。因此,假设k=10n=60min=3。 &#34;最高数字情景&#34;是x+min(k-1)=n - &gt; x=60-3*9=33。因此,即使考虑的最高数字也是33

这会减少要考虑的数组中的相关数量,因此会减少要查找的组合数量。

但它变得更好。假设k=10n=60min=3。数组中的第一个数字恰好是20,因此它是相关的并且应该被检查。让我们找到包含那个20的相关子集:
一个新的&#34;难题&#34;出现! k=10-1n=60-20min=3。你现在可以从子插座中切断许多数字,并且每一步都会一次又一次地切断它。

通过计算子puzzle中k-1最小数字的平均值并将其用作min,可以进一步改善这一点。

通过预先计算子项目k中的[0..n]最低平均数,以及k-1[1..n]最低数字k-2和{{},可以进一步提高这一点。 1}} subpuzzle [2..n]中的平均值最低等等,并使用它们而不是在每个子项目评估中反复重新计算相同的内容。

答案 1 :(得分:0)

可以通过与背包问题类似的解决方案来解决

dp [i] [j] [k] =使用前“ i”个元素的总和等于j的k个子集的数量

dp [i] [j] [k] = dp [i-1] [j] [k] + dp [i-1] [j-a [i]] [k-1]

是dp的更新(使用ith元素还是不使用ith元素)

for(int i=0;i<=n;i++) dp[i][0][0]=1;
for(int i=1;i<=n;i++){
    for(int j=0;j<=w;j++){
        for(int k=1;k<=i;k++){
            dp[i][j][k]=dp[i-1][j][k] ;
            if(j>=a[i-1]){
                dp[i][j][k]+=dp[i-1][j-a[i-1]][k-1];
            }
        }
    }
}

答案 2 :(得分:0)

有多种解决方案,但是没有人展示如何使用动态编程来找到答案。

关键是使用动态编程来建立一个数据结构,以便以后可以从中找到所有解决方案。

除了请求的功能外,我还收集了有关有多少个解决方案的信息,并写了FindSolution(node, position)以返回从position开始的位置node的解决方案,而没有计算其余的结果。如果您想全部使用它们,那么使用该功能将效率很低。但是,例如,使用此功能,可以计算十亿分之一的方式来表示10000作为20个质数的总和。使用给定的其他方法,这是不可行的。

using System;
using System.Collections.Generic;

public class SubsetSum
{
    public class SolutionNode<T>
    {
        // The index we found the value at
        public int Index {get; set;}
        // The value we add for this solution
        public T Value {get; set;}
        // How many solutions we have found.
        public int Count {get; set;}
        // The rest of this solution.
        public SolutionNode<T> Tail {get; set;}
        // The next solution.
        public SolutionNode<T> Next {get; set;}
    }

    // This uses dynamic programming to create a summary of all solutions.
    public static SolutionNode<int> FindSolution(int[] numbers, int target, int subsetSize)
    {
        // By how many are in our list, by what they sum to, what SolutionNode<int> has our answer?
        List<Dictionary<int, SolutionNode<int>>> solutionOf = new List<Dictionary<int, SolutionNode<int>>>();

        // Initialize empty solutions.
        for (int i = 0; i <= subsetSize; i++)
        {
            solutionOf.Add(new Dictionary<int, SolutionNode<int>>());
        }

        // We discover from the last number in the list forward.
        // So discovering from the last index forward makes them ordered.
        for (int i = numbers.Length - 1; -1 < i; i--)
        {
            int number = numbers[i];
            // Descending here so we don't touch solutionOf[j-1] until after we have solutionOf[j] updated.
            for (int j = subsetSize; 0 < j; j--)
            {
                // All previously found sums with j entries
                Dictionary<int, SolutionNode<int>> after = solutionOf[j];
                // All previously found sums with j-1 entries
                Dictionary<int, SolutionNode<int>> before = solutionOf[j-1];
                foreach (KeyValuePair<int, SolutionNode<int>> pair in before)
                {
                    SolutionNode<int> newSolution = new SolutionNode<int>();
                    int newSum = pair.Key + number;
                    newSolution.Index = i;
                    newSolution.Value = number;
                    newSolution.Count = pair.Value.Count;
                    newSolution.Tail = pair.Value;
                    if (after.ContainsKey(newSum))
                    {
                        newSolution.Next = after[newSum];
                        newSolution.Count = pair.Value.Count + after[newSum].Count;
                    }
                    after[newSum] = newSolution;
                }

                // And special case empty set.
                if (1 == j)
                {
                    SolutionNode<int> newSolution = new SolutionNode<int>();
                    newSolution.Index = i;
                    newSolution.Value = number;
                    newSolution.Count = 1;
                    if (after.ContainsKey(number))
                    {
                        newSolution.Next = after[number];
                        newSolution.Count = after[number].Count;
                    }
                    after[number] = newSolution;
                }
            }
        }

        // Return what we found.
        try
        {
            return solutionOf[subsetSize][target];
        }
        catch
        {
            throw new Exception("No solutions found");
        }
    }

    // The function we were asked for.
    public static IEnumerable<List<int>> ListSolutions (SolutionNode<int> node)
    {
        List<int> solution = new List<int>();
        List<SolutionNode<int>> solutionPath = new List<SolutionNode<int>>();

        // Initialize with starting information.
        solution.Add(0); // This will be removed when we get node
        solutionPath.Add(node); // This will be our start.

        while (0 < solutionPath.Count)
        {
            // Erase the tail of our previous solution
            solution.RemoveAt(solution.Count - 1);
            // Pick up our next.
            SolutionNode<int> current = solutionPath[solutionPath.Count - 1];
            solutionPath.RemoveAt(solutionPath.Count - 1);
            while (current != null)
            {
                solution.Add(current.Index);
                solutionPath.Add(current.Next);
                if (current.Tail == null)
                {
                    yield return solution;
                }
                current = current.Tail;
            }
        }
    }

    // And for fun, a function that dynamic programming makes easy - return any one of them!
    public static List<int> FindSolution(SolutionNode<int> node, int position)
    {
        // Switch to counting from the end.
        position = node.Count - position - 1;
        List<int> solution = new List<int>();
        while (node != null)
        {
            while (node.Next != null && position < node.Next.Count)
            {
                node = node.Next;
            }
            solution.Add(node.Index);
            node = node.Tail;
        }
        return solution;
    }

    public static void Main(string[] args)
    {
        SolutionNode<int> solution = FindSolution(
            new[]{
                7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
            123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
            112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
            34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
            54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
            60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
            14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
            28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
            29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
            15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
            11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
            5, 4, 5, 6}
            , 400, 4);
        IEnumerable<List<int>> listing = ListSolutions(solution);
        foreach (List<int> sum in listing)
        {
            Console.WriteLine ("solution {0}", string.Join(", ", sum.ToArray()));
        }
    }
}

顺便说一句,这是我第一次尝试编写C#。太痛苦了。