我有一个N个内部数组的外部数组,大小为M.我想根据另一个数组K
对每个内部数组进行排序,其方式与内置Array.Sort<TKey, TValue> Method (TKey[], TValue[], IComparer<TKey>)
{{3 }} .NET method。
该方法在排序后修改Key数组,因此我可以使用它仅对单个内部数组进行排序。为了对许多数组进行排序,我将Key
数组复制到每个内部数组的另一个KeyBuffer
数组,在每个排序步骤重用KeyBuffer并避免分配和GC。如果典型的N是10K-100K且M <1,那么这是最有效的方法吗? 1000?鉴于M的大小很小,复制和排序应该在CPU缓存中完成 - 这是我能得到的最快的?
我担心的是,通过这样做,我正在对缓冲区进行排序并丢弃结果(N-1)次,这是一种浪费。此外,我正在进行N次实际排序,但在第一次排序后,我已经知道旧索引到新索引的映射,我可以以某种方式重用该映射用于其他(N-1)步骤。
您如何避免不必要的排序并将已知的映射从第一步应用到其他步骤?
以下是我现在如何做的代码。问题是,是否有可能更有效地做到这一点。
using System;
using System.Collections.Generic;
namespace MultiSorting {
class Program {
static void Main(string[] args) {
var N = 10;
var M = 5;
var outer = new List<string[]>(N);
for (var i = 0; i < N; i++) {
string[] inner = { "a" + i, "d" + i, "c" + i, "b" + i, "e" + i };
outer.Add(inner);
}
int[] keys = { 1, 4, 3, 2, 5 };
var keysBuffer = new int[M];
for (int i = 0; i < N; i++) {
Array.Copy(keys, keysBuffer, M);
// doing sort N times, but we know the map
// old_index -> new_index from the first sorting
// plus we sort keysBuffer N times but use the result only one time
Array.Sort(keysBuffer, outer[i]);
}
keys = keysBuffer;
foreach (var key in keys) {
Console.Write(key + " "); // 1, 2, 3, 4, 5
}
Console.WriteLine("");
for (var i = 0; i < N; i++) {
foreach (var item in outer[i]) {
Console.Write(item + " "); // a{i}, b{i}, c{i}, d{i}, e{i}
}
Console.WriteLine("");
}
Console.ReadLine();
}
}
答案 0 :(得分:2)
刚刚使用此方法并直接在for
循环中实现了映射重用。我没想到一个简单的循环而不是本机内置方法可能会加快速度,可能是因为我低估了排序的算法成本和阵列循环的成本,而且当一个分析器说这个工作主要在里面完成时,我常常放松.NET方法......
Naive
是问题中的代码,ReuseMap
是问题中的文字所描述的内容,Linq
来自@ L.B的答案。 ...InPlace
修改了输入,...Copy
没有。
结果,N = 2000,M = 500,10次运行,以毫秒为单位:
整个测试如下:
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
namespace MultiSorting {
class Program {
static void Main() {
const int n = 2;
const int m = 10;
var keys = GenerateKeys(m);
foreach (var key in keys) {
Console.Write(key + " ");
}
Console.WriteLine("");
var keysBuffer = new int[keys.Length];
Array.Copy(keys, keysBuffer, keys.Length);
Array.Sort(keysBuffer);
foreach (var key in keysBuffer) {
Console.Write(key + " ");
}
Console.WriteLine("");
// warm up, check that output is the same
List<string[]> outer = MultiSortNaiveInPlace(keys, GenerateOuter(n, m));
PrintResults(outer);
outer = MultiSortNaiveCopy(keys, GenerateOuter(n, m));
PrintResults(outer);
outer = MultiSortReuseMapInPlace(keys, GenerateOuter(n, m));
PrintResults(outer);
outer = MultiSortReuseMapCopy(keys, GenerateOuter(n, m));
PrintResults(outer);
outer = MultiSortLinqCopy(keys, GenerateOuter(n, m));
PrintResults(outer);
// tests
keys = GenerateKeys(500);
NaiveInPlace(2000, 500, keys);
ReuseMapInPlace(2000, 500, keys);
NaiveCopy(2000, 500, keys);
ReuseMapCopy(2000, 500, keys);
LinqCopy(2000, 500, keys);
Console.ReadLine();
}
private static void NaiveInPlace(int n, int m, int[] keys) {
const int rounds = 10;
var source = new List<List<string[]>>(rounds);
for (int i = 0; i < rounds; i++) {
source.Add(GenerateOuter(n, m));
}
GC.Collect();
var sw = Stopwatch.StartNew();
for (int i = 0; i < rounds; i++) {
source[i] = MultiSortNaiveInPlace(keys, source[i]);
}
sw.Stop();
Console.WriteLine("NaiveInPlace: " + sw.ElapsedMilliseconds);
}
private static void ReuseMapInPlace(int n, int m, int[] keys) {
const int rounds = 10;
var source = new List<List<string[]>>(rounds);
for (int i = 0; i < rounds; i++) {
source.Add(GenerateOuter(n, m));
}
GC.Collect();
var sw = Stopwatch.StartNew();
for (int i = 0; i < rounds; i++) {
source[i] = MultiSortReuseMapInPlace(keys, source[i]);
}
sw.Stop();
Console.WriteLine("ReuseMapInPlace: " + sw.ElapsedMilliseconds);
}
private static void NaiveCopy(int n, int m, int[] keys) {
const int rounds = 10;
var source = new List<List<string[]>>(rounds);
for (int i = 0; i < rounds; i++) {
source.Add(GenerateOuter(n, m));
}
GC.Collect();
var sw = Stopwatch.StartNew();
for (int i = 0; i < rounds; i++) {
source[i] = MultiSortNaiveCopy(keys, source[i]);
}
sw.Stop();
Console.WriteLine("NaiveCopy: " + sw.ElapsedMilliseconds);
}
private static void ReuseMapCopy(int n, int m, int[] keys) {
const int rounds = 10;
var source = new List<List<string[]>>(rounds);
for (int i = 0; i < rounds; i++) {
source.Add(GenerateOuter(n, m));
}
GC.Collect();
var sw = Stopwatch.StartNew();
for (int i = 0; i < rounds; i++) {
source[i] = MultiSortReuseMapCopy(keys, source[i]);
}
sw.Stop();
Console.WriteLine("ReuseMapCopy: " + sw.ElapsedMilliseconds);
}
private static void LinqCopy(int n, int m, int[] keys) {
const int rounds = 10;
var source = new List<List<string[]>>(rounds);
for (int i = 0; i < rounds; i++) {
source.Add(GenerateOuter(n, m));
}
GC.Collect();
var sw = Stopwatch.StartNew();
for (int i = 0; i < rounds; i++) {
source[i] = MultiSortLinqCopy(keys, source[i]);
}
sw.Stop();
Console.WriteLine("LinqCopy: " + sw.ElapsedMilliseconds);
}
private static void PrintResults(List<string[]> outer) {
for (var i = 0; i < outer.Count; i++) {
foreach (var item in outer[i]) {
Console.Write(item + " "); // a{i}, b{i}, c{i}, d{i}, e{i}
}
Console.WriteLine("");
}
}
private static int[] GenerateKeys(int m) {
var keys = new int[m];
for (int i = 0; i < m; i++) { keys[i] = i; }
var rnd = new Random();
keys = keys.OrderBy(x => rnd.Next()).ToArray();
return keys;
}
private static List<string[]> GenerateOuter(int n, int m) {
var outer = new List<string[]>(n);
for (var o = 0; o < n; o++) {
var inner = new string[m];
for (int i = 0; i < m; i++) { inner[i] = "R" + o + "C" + i; }
outer.Add(inner);
}
return outer;
}
private static List<string[]> MultiSortNaiveInPlace(int[] keys, List<string[]> outer) {
var keysBuffer = new int[keys.Length];
foreach (var inner in outer) {
Array.Copy(keys, keysBuffer, keys.Length);
// doing sort N times, but we know the map
// old_index -> new_index from the first sorting
// plus we sort keysBuffer N times but use the result only one time
Array.Sort(keysBuffer, inner);
}
return outer;
}
private static List<string[]> MultiSortNaiveCopy(int[] keys, List<string[]> outer) {
var result = new List<string[]>(outer.Count);
var keysBuffer = new int[keys.Length];
for (var n = 0; n < outer.Count(); n++) {
var inner = outer[n];
var newInner = new string[keys.Length];
Array.Copy(keys, keysBuffer, keys.Length);
Array.Copy(inner, newInner, keys.Length);
// doing sort N times, but we know the map
// old_index -> new_index from the first sorting
// plus we sort keysBuffer N times but use the result only one time
Array.Sort(keysBuffer, newInner);
result.Add(newInner);
}
return result;
}
private static List<string[]> MultiSortReuseMapInPlace(int[] keys, List<string[]> outer) {
var itemsBuffer = new string[keys.Length];
var keysBuffer = new int[keys.Length];
Array.Copy(keys, keysBuffer, keysBuffer.Length);
var map = new int[keysBuffer.Length];
for (int m = 0; m < keysBuffer.Length; m++) {
map[m] = m;
}
Array.Sort(keysBuffer, map);
for (var n = 0; n < outer.Count(); n++) {
var inner = outer[n];
for (int m = 0; m < map.Length; m++) {
itemsBuffer[m] = inner[map[m]];
}
Array.Copy(itemsBuffer, outer[n], inner.Length);
}
return outer;
}
private static List<string[]> MultiSortReuseMapCopy(int[] keys, List<string[]> outer) {
var keysBuffer = new int[keys.Length];
Array.Copy(keys, keysBuffer, keysBuffer.Length);
var map = new int[keysBuffer.Length];
for (int m = 0; m < keysBuffer.Length; m++) {
map[m] = m;
}
Array.Sort(keysBuffer, map);
var result = new List<string[]>(outer.Count);
for (var n = 0; n < outer.Count(); n++) {
var inner = outer[n];
var newInner = new string[keys.Length];
for (int m = 0; m < map.Length; m++) {
newInner[m] = inner[map[m]];
}
result.Add(newInner);
}
return result;
}
private static List<string[]> MultiSortLinqCopy(int[] keys, List<string[]> outer) {
var result = outer.Select(arr => arr.Select((item, inx) => new { item, key = keys[inx] })
.OrderBy(x => x.key)
.Select(x => x.item)
.ToArray()) // allocating
.ToList(); // allocating
return result;
}
}
}