我有一个2D数组,有4列和1921980行string array = new string[1921980, 4]
。数组应按第四列中的值排序。所以我从Bubble Sort开始,它的速度非常慢,甚至是并行化的。它还会产生很高的CPU利用率。我切换到快速排序(递归),事情变得更快。问题是我在排序时调用了2个方法:
Console.Write
显示进度StringToInt32
自定义方法,速度高于Convert.ToInt32
或Int32.Parse
但是这会在一段时间后产生StackOverFlowException。所以我决定选择Quick Sort(迭代)。感觉就像Bubble Sort。我想我的代码有些问题。
static int StringToInt32(string s)
{
int tmp = 0;
switch (s[0])
{
case '-':
for (int counter = 1; counter < s.Length; counter++)
{
tmp = tmp * 10 + (s[counter] - '0');
}
tmp *= -1;
return tmp;
break;
default:
for (int counter = 0; counter < s.Length; counter++)
{
tmp = tmp * 10 + (s[counter] - '0');
}
return tmp;
break;
}
}
static void QuickSort(ref string[,] array, int length)
{
Stack<int> stack = new Stack<int>();
stack.Push(length - 1);
stack.Push(0);
int percent = -1;
int look = 0;
while (stack.Count != 0)
{
/**
* Pop array or sub array start and end index
*/
int left = stack.Pop();
int right = stack.Pop();
int newPivotIndex = Partition(ref array, left, right);
/**
* divide right i.e. second sub array from newPivotIndex+1 to right
* here newPivotIndex+1 is index where right elements are greater or = than pivot
* use stack for storing array index and in while loop pop that right sub array indexes.
*/
if ((newPivotIndex + 1) < right)
{
stack.Push(right);
stack.Push(newPivotIndex + 1);
}
/**
* divide sub array from left to mid or newPivotIndex-1
* here newPivotIndex-1 is index where left elements are lesser or = than pivot
* use stack for storing array index and in while loop pop that left sub array indexes.
*/
if ((newPivotIndex - 1) > left)
{
stack.Push(newPivotIndex - 1);
stack.Push(left);
}
look++;
if (look * 100 / length != percent)
{
percent = look * 100 / length;
Console.Write("\r{0}%", percent);
}
}
Console.Write("\r100%");
}
static int GetMedianPivot(ref string[,] array, int left, int right)
{
int mid = ((left + right) / 2);
/** middle number of array is less than left number of array
* then Swap middle and left number
*/
if (StringToInt32(array[mid, 3]) < StringToInt32(array[left, 3]))
{
Swap(ref array, left, mid);
}
/** rightmost number of array is less than left number of array
* then Swap right and left number
*/
if (StringToInt32(array[right, 3]) < StringToInt32(array[left, 3]))
{
Swap(ref array, left, right);
}
/**
* now right number is less than mid then Swap number which
* shifts median of three numbers into mid position
*/
if (StringToInt32(array[right, 3]) < StringToInt32(array[mid, 3]))
{
Swap(ref array, mid, right);
}
/**
* Shift Median or pivot from mid to rightmost position
* i.e. out of partitioning index
*/
Swap(ref array, mid, right);
/**return pivot number value which is right most now.**/
return StringToInt32(array[right, 3]);
}
static int Partition(ref string[,] array, int left, int right)
{
int pivot = StringToInt32(array[right, 3]); ;
/**
* Initialize low = left i.e. start index of array or logical sub-array
* Initialize high = right-1 i.e. end index of array or logical sub array;
* becuase right is pivot element so start from right-1;
*/
int low = left;
int high = right - 1;
do
{
while (StringToInt32(array[low, 3]) < pivot && low < right - 1)
{
low++;
}
while (StringToInt32(array[high, 3]) >= pivot && high > left)
{
high--;
}
/**
* Swap elements when any left element is greater than pivot or
* any right element is less than pivot.
*/
if (low < high)
{
Swap(ref array, low, high);
low++;
high--;
}
} while (low < high);
/**
* Swap right most pivot to its right position i.e. at low, then
* left elements are lesser and right elements are greater than pivot.
*/
if (StringToInt32(array[low, 3]) > pivot)
{
Swap(ref array, low, right);
}
return low;
}
static void Swap(ref string[,] array, int i, int j)
{
string[] buffer = new string[4];
buffer[0] = array[i, 0];
buffer[1] = array[i, 1];
buffer[2] = array[i, 2];
buffer[3] = array[i, 3];
array[i, 0] = array[j, 0];
array[i, 1] = array[j, 1];
array[i, 2] = array[j, 2];
array[i, 3] = array[j, 3];
array[j, 0] = buffer[0];
array[j, 1] = buffer[1];
array[j, 2] = buffer[2];
array[j, 3] = buffer[3];
}
答案 0 :(得分:0)
为什么你到处都使用refs?数组已经是一个指针,你可以有效地使用它作为参数,使用ref并不是更快。 Sami Kuhmonen也有一些重要的观察结果。
我会用这句话:
int ArrSize = 1921980;
long[] tmp = new long[ArrSize];
for (int i = 0; i < ArrSize; ++i)
{
tmp[i] = (((long)StringToInt32(array[i, 3])) << 32) | (long)i;
}
Array.Sort(tmp);
string[,] array2 = new string[ArrSize, 4];
for (int i = 0; i < ArrSize; ++i)
{
int index = (int)tmp[i];
array2[i, 0] = array[index, 0];
array2[i, 1] = array[index, 1];
array2[i, 2] = array[index, 2];
array2[i, 3] = array[index, 3];
}
负数是一个问题吗?将负数int转换为高位32位的长产品1。但是在我们的案例中他们被转移了。高32位最终等于数组中的原始数字。为了进行比较,(不是这样做,但你可以imagine that is)你一点一点地进行比较,如果你发现差异就产生结果。从中可以看出,只要高32位存在差异,低32位就无关紧要了。 (long)i
是安全的,因为我是数组中的索引,它始终是正数。 (int)tmp[i]
只丢弃高32位,不会以任何方式改变低位。结论是,这不是问题。
另一个问题可能是,这是否稳定,这意味着,这是否会保留原始序列中物品的顺序,以确定相同的项目?如果它们相等,则它们在高32位中没有区别。低32位是原始数组中的索引,这意味着它们代表原始顺序。如果高32位相等,则通过低32位进行比较,这意味着按原始顺序排序。这可能有点令人惊讶,但即使在Array.Sort
本身不是这样的情况下,以这种方式排序也是稳定的。