使用一个TKey阵列对许多TValue数组进行排序

时间:2014-02-09 17:48:04

标签: c# .net performance algorithm sorting

我有一个N个内部数组的外部数组,大小为M.我想根据另一个数组K对每个内部数组进行排序,其方式与内置Array.Sort<TKey, TValue> Method (TKey[], TValue[], IComparer<TKey>) {{3 }} .NET method

该方法在排序后修改Key数组,因此我可以使用它仅对单个内部数组进行排序。为了对许多数组进行排序,我将Key数组复制到每个内部数组的另一个KeyBuffer数组,在每个排序步骤重用KeyBuffer并避免分配和GC。如果典型的N是10K-100K且M <1,那么这是最有效的方法吗? 1000?鉴于M的大小很小,复制和排序应该在CPU缓存中完成 - 这是我能得到的最快的?

我担心的是,通过这样做,我正在对缓冲区进行排序并丢弃结果(N-1)次,这是一种浪费。此外,我正在进行N次实际排序,但在第一次排序后,我已经知道旧索引到新索引的映射,我可以以某种方式重用该映射用于其他(N-1)步骤。

您如何避免不必要的排序并将已知的映射从第一步应用到其他步骤?

以下是我现在如何做的代码。问题是,是否有可能更有效地做到这一点。

using System;
using System.Collections.Generic;

namespace MultiSorting {
    class Program {
        static void Main(string[] args) {

            var N = 10;
            var M = 5;
            var outer = new List<string[]>(N);

            for (var i = 0; i < N; i++) {
                string[] inner = { "a" + i, "d" + i, "c" + i, "b" + i, "e" + i };
                outer.Add(inner);
            }

            int[] keys = { 1, 4, 3, 2, 5 };

            var keysBuffer = new int[M];
            for (int i = 0; i < N; i++) {
                Array.Copy(keys, keysBuffer, M);
                // doing sort N times, but we know the map 
                // old_index -> new_index from the first sorting
                // plus we sort keysBuffer N times but use the result only one time
                Array.Sort(keysBuffer, outer[i]); 
            }
            keys = keysBuffer;

            foreach (var key in keys) {
                Console.Write(key + " "); // 1, 2, 3, 4, 5
            }
            Console.WriteLine("");
            for (var i = 0; i < N; i++) {
                foreach (var item in outer[i]) {
                    Console.Write(item + " "); // a{i}, b{i}, c{i}, d{i}, e{i}
                }
                Console.WriteLine("");
            }
            Console.ReadLine();
        }
    }

1 个答案:

答案 0 :(得分:2)

刚刚使用此方法并直接在for循环中实现了映射重用。我没想到一个简单的循环而不是本机内置方法可能会加快速度,可能是因为我低估了排序的算法成本和阵列循环的成本,而且当一个分析器说这个工作主要在里面完成时,我常常放松.NET方法......

Naive是问题中的代码,ReuseMap是问题中的文字所描述的内容,Linq来自@ L.B的答案。 ...InPlace修改了输入,...Copy没有。

结果,N = 2000,M = 500,10次运行,以毫秒为单位:

  • NaiveInPlace:1005
  • ReuseMapInPlace:129(Log2(500)= 9.0,加速= 7.8x)
  • NaiveCopy:1181
  • ReuseMapCopy:304
  • LinqCopy:3284

整个测试如下:

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

namespace MultiSorting {
    class Program {
        static void Main() {

            const int n = 2;
            const int m = 10;

            var keys = GenerateKeys(m);
            foreach (var key in keys) {
                Console.Write(key + " "); 
            }
            Console.WriteLine("");
            var keysBuffer = new int[keys.Length];
            Array.Copy(keys, keysBuffer, keys.Length);
            Array.Sort(keysBuffer);
            foreach (var key in keysBuffer) {
                Console.Write(key + " "); 
            }
            Console.WriteLine("");
            // warm up, check that output is the same
            List<string[]> outer = MultiSortNaiveInPlace(keys, GenerateOuter(n, m));
            PrintResults(outer);
            outer = MultiSortNaiveCopy(keys, GenerateOuter(n, m));
            PrintResults(outer);
            outer = MultiSortReuseMapInPlace(keys, GenerateOuter(n, m));
            PrintResults(outer);
            outer = MultiSortReuseMapCopy(keys, GenerateOuter(n, m));
            PrintResults(outer);
            outer = MultiSortLinqCopy(keys, GenerateOuter(n, m));
            PrintResults(outer);

            // tests
            keys = GenerateKeys(500);
            NaiveInPlace(2000, 500, keys);
            ReuseMapInPlace(2000, 500, keys);
            NaiveCopy(2000, 500, keys);
            ReuseMapCopy(2000, 500, keys);
            LinqCopy(2000, 500, keys);

            Console.ReadLine();
        }

        private static void NaiveInPlace(int n, int m, int[] keys) {
            const int rounds = 10;
            var source = new List<List<string[]>>(rounds);
            for (int i = 0; i < rounds; i++) {
                source.Add(GenerateOuter(n, m));
            }
            GC.Collect();
            var sw = Stopwatch.StartNew();
            for (int i = 0; i < rounds; i++) {
                source[i] = MultiSortNaiveInPlace(keys, source[i]);
            }
            sw.Stop();
            Console.WriteLine("NaiveInPlace: " + sw.ElapsedMilliseconds);

        }

        private static void ReuseMapInPlace(int n, int m, int[] keys) {
            const int rounds = 10;
            var source = new List<List<string[]>>(rounds);
            for (int i = 0; i < rounds; i++) {
                source.Add(GenerateOuter(n, m));
            }
            GC.Collect();
            var sw = Stopwatch.StartNew();
            for (int i = 0; i < rounds; i++) {
                source[i] = MultiSortReuseMapInPlace(keys, source[i]);
            }
            sw.Stop();
            Console.WriteLine("ReuseMapInPlace: " + sw.ElapsedMilliseconds);

        }

        private static void NaiveCopy(int n, int m, int[] keys) {
            const int rounds = 10;
            var source = new List<List<string[]>>(rounds);
            for (int i = 0; i < rounds; i++) {
                source.Add(GenerateOuter(n, m));
            }
            GC.Collect();
            var sw = Stopwatch.StartNew();
            for (int i = 0; i < rounds; i++) {
                source[i] = MultiSortNaiveCopy(keys, source[i]);
            }
            sw.Stop();
            Console.WriteLine("NaiveCopy: " + sw.ElapsedMilliseconds);

        }

        private static void ReuseMapCopy(int n, int m, int[] keys) {
            const int rounds = 10;
            var source = new List<List<string[]>>(rounds);
            for (int i = 0; i < rounds; i++) {
                source.Add(GenerateOuter(n, m));
            }
            GC.Collect();
            var sw = Stopwatch.StartNew();
            for (int i = 0; i < rounds; i++) {
                source[i] = MultiSortReuseMapCopy(keys, source[i]);
            }
            sw.Stop();
            Console.WriteLine("ReuseMapCopy: " + sw.ElapsedMilliseconds);

        }

        private static void LinqCopy(int n, int m, int[] keys) {
            const int rounds = 10;
            var source = new List<List<string[]>>(rounds);
            for (int i = 0; i < rounds; i++) {
                source.Add(GenerateOuter(n, m));
            }
            GC.Collect();
            var sw = Stopwatch.StartNew();
            for (int i = 0; i < rounds; i++) {
                source[i] = MultiSortLinqCopy(keys, source[i]);
            }
            sw.Stop();
            Console.WriteLine("LinqCopy: " + sw.ElapsedMilliseconds);

        }

        private static void PrintResults(List<string[]> outer) {

            for (var i = 0; i < outer.Count; i++) {
                foreach (var item in outer[i]) {
                    Console.Write(item + " "); // a{i}, b{i}, c{i}, d{i}, e{i}
                }
                Console.WriteLine("");
            }

        }

        private static int[] GenerateKeys(int m) {
            var keys = new int[m];
            for (int i = 0; i < m; i++) { keys[i] = i; }
            var rnd = new Random();
            keys = keys.OrderBy(x => rnd.Next()).ToArray();
            return keys;
        }

        private static List<string[]> GenerateOuter(int n, int m) {
            var outer = new List<string[]>(n);

            for (var o = 0; o < n; o++) {
                var inner = new string[m];
                for (int i = 0; i < m; i++) { inner[i] = "R" + o + "C" + i; }
                outer.Add(inner);
            }
            return outer;
        }

        private static List<string[]> MultiSortNaiveInPlace(int[] keys, List<string[]> outer) {
            var keysBuffer = new int[keys.Length];
            foreach (var inner in outer) {
                Array.Copy(keys, keysBuffer, keys.Length);
                // doing sort N times, but we know the map 
                // old_index -> new_index from the first sorting
                // plus we sort keysBuffer N times but use the result only one time
                Array.Sort(keysBuffer, inner);
            }
            return outer;
        }

        private static List<string[]> MultiSortNaiveCopy(int[] keys, List<string[]> outer) {
            var result = new List<string[]>(outer.Count);
            var keysBuffer = new int[keys.Length];

            for (var n = 0; n < outer.Count(); n++) {
                var inner = outer[n];
                var newInner = new string[keys.Length];
                Array.Copy(keys, keysBuffer, keys.Length);
                Array.Copy(inner, newInner, keys.Length);
                // doing sort N times, but we know the map 
                // old_index -> new_index from the first sorting
                // plus we sort keysBuffer N times but use the result only one time
                Array.Sort(keysBuffer, newInner);
                result.Add(newInner);
            }
            return result;
        }

        private static List<string[]> MultiSortReuseMapInPlace(int[] keys, List<string[]> outer) {
            var itemsBuffer = new string[keys.Length];
            var keysBuffer = new int[keys.Length];
            Array.Copy(keys, keysBuffer, keysBuffer.Length);
            var map = new int[keysBuffer.Length];
            for (int m = 0; m < keysBuffer.Length; m++) {
                map[m] = m;
            }
            Array.Sort(keysBuffer, map);

            for (var n = 0; n < outer.Count(); n++) {
                var inner = outer[n];
                for (int m = 0; m < map.Length; m++) {
                    itemsBuffer[m] = inner[map[m]];
                }
                Array.Copy(itemsBuffer, outer[n], inner.Length);
            }
            return outer;
        }

        private static List<string[]> MultiSortReuseMapCopy(int[] keys, List<string[]> outer) {
            var keysBuffer = new int[keys.Length];
            Array.Copy(keys, keysBuffer, keysBuffer.Length);
            var map = new int[keysBuffer.Length];
            for (int m = 0; m < keysBuffer.Length; m++) {
                map[m] = m;
            }
            Array.Sort(keysBuffer, map);
            var result = new List<string[]>(outer.Count);
            for (var n = 0; n < outer.Count(); n++) {
                var inner = outer[n];
                var newInner = new string[keys.Length];
                for (int m = 0; m < map.Length; m++) {
                    newInner[m] = inner[map[m]];
                }
                result.Add(newInner);
            }
            return result;
        }

        private static List<string[]> MultiSortLinqCopy(int[] keys, List<string[]> outer) {
            var result = outer.Select(arr => arr.Select((item, inx) => new { item, key = keys[inx] })
                                    .OrderBy(x => x.key)
                                    .Select(x => x.item)
                                    .ToArray()) // allocating
                  .ToList(); // allocating
            return result;
        }

    }
}