我使用Java fork / join pool和.Net Parallel.Invoke实现了并行mergesort,我观察了版本之间的性能差异:5秒(Java)和45秒(.Net)来排序2GB文件。这几乎是数量级的差异。
这是.Net代码:
public void ParallelSort()
{
Start = DateTime.Now.Ticks;
int lo = 0;
int hi = values.Length - 1;
int mid = values.Length / 2;
int maxDepth = Environment.ProcessorCount;
Sort(lo, mid, hi, maxDepth);
End = DateTime.Now.Ticks;
}
private void Sort(int lo, int mid, int hi, int maxDepth)
{
if (hi - lo < 16)
InsertionSort.Sort(values, lo, hi);
else if (maxDepth > 0)
{
Parallel.Invoke(
() => Sort(lo, (mid + lo)/2, mid, maxDepth-1),
() => Sort(mid + 1, (hi + mid)/2, hi, maxDepth-1));
Merge(lo, mid, hi);
}
else
{
Sort(lo, (mid + lo)/2, mid, 0);
Sort(mid + 1, (hi + mid)/2, hi, 0);
Merge(lo, mid, hi);
}
}
Concurrency Visualizer显示,据我所知,线程主要是花时间等待,同步和GCing(没有足够的代表发布图片)。
如何通过Java fork / join implmentation实现可比速度?或者.Net并行引擎设计对于积极的多线程(多任务处理)效率低下?
PS:Java代码
public class MergeSort extends RecursiveAction
{
public void parallelSort()
{
int lo = 0;
int hi = values.length - 1;
int mid = values.length / 2;
ForkJoinPool p = new ForkJoinPool();
MergeSort s = new MergeSort(lo, mid, hi);
try
{
p.invoke(s);
p.shutdown();
}
catch (Exception e)
{
e.printStackTrace();
}
}
@Override
protected void compute()
{
if (hi - lo > 16)
{
invokeAll(
new MergeSort(lo, (mid + lo)/2, mid),
new MergeSort(mid + 1, (hi + mid)/2, hi));
merge(lo, mid, hi);
}
else
ElementarySort.insertionSort(values, lo, hi);
}
}
更新1: 有趣的是,自下而上的实现显示反向结果:.Net的Parallel.For()闪耀和Java的ExecutorService.InvokeAll():4秒对193秒来排序256Mb文件。不知道,也许我做错了什么。 Java代码:
public void parallelBottomUpSort() throws Exception
{
ExecutorService p = Executors.newFixedThreadPool(8);
for (int sz = 1; sz < values.length; sz *= 2)
{
int N = (int) Math.ceil((double) values.length / (2 * sz));
System.out.println(N);
List<MergeSort> ms = new ArrayList<>(N);
for (int i = 0; i < N; i++)
{
int lo = 2 * sz * i;
int hi = Math.min(lo + 2 * sz, values.length) - 1;
int mid = lo + sz - 1;
ms.add(new MergeSort(values, aux, lo, mid, hi));
}
p.invokeAll(ms);
}
p.shutdown();
}
.Net代码:
public void ParallelBottomUpSort()
{
for (int sz = 1; sz < values.Length; sz *= 2)
Parallel.For(0, (int) Math.Ceiling((double) values.Length/(2*sz)), i =>
{
int lo = 2 * sz * i;
int hi = Math.Min(lo + 2 * sz, values.Length) - 1;
int mid = lo + sz - 1;
Merge(lo, mid, hi);
});
}
看起来.Net的并行引擎通常更稳定。 Java的并发包有最糟糕和最好的情况。
更新2:可编辑的来源
class Sort
{
private int[] values, aux;
public Sort(int[] values)
{
this.values = values;
this.aux = new int[values.Length];
}
private unsafe void Merge(int lo, int mid, int hi)
{
if (lo == hi)
return;
if (mid + 1 <= hi && values[mid] <= values[mid + 1])
return;
Buffer.BlockCopy(values, sizeof(int) * lo, aux, sizeof(int) * lo, sizeof(int) * (hi - lo + 1));
int i = lo;
int j = mid + 1;
fixed (int* a = values, b = aux)
{
for (int k = lo; k <= hi; k++)
{
if (i > mid)
a[k] = b[j++];
else if (j > hi)
a[k] = b[i++];
else if (b[i] < b[j])
a[k] = b[i++];
else
a[k] = b[j++];
}
}
}
public void ParallelBottomUpSort()
{
for (int sz = 1; sz < values.Length; sz *= 2)
Parallel.For(0, (int)Math.Ceiling((double)values.Length / (2 * sz)), i =>
{
int lo = 2 * sz * i;
int hi = Math.Min(lo + 2 * sz, values.Length) - 1;
int mid = lo + sz - 1;
Merge(lo, mid, hi);
});
}
public void ParallelTopDownSort()
{
int lo = 0;
int hi = values.Length - 1;
int mid = values.Length / 2;
int maxDepth = (int)Math.Log(Environment.ProcessorCount, 2);
ParallelTopDownSort(lo, mid, hi, maxDepth);
}
private void ParallelTopDownSort(int lo, int mid, int hi, int maxDepth)
{
if (hi - lo < 16)
InsertionSort.Sort(values, lo, hi);
else if (maxDepth > 0)
{
Parallel.Invoke(
() => ParallelTopDownSort(lo, (mid + lo) / 2, mid, maxDepth - 1),
() => ParallelTopDownSort(mid + 1, (hi + mid) / 2, hi, maxDepth - 1));
Merge(lo, mid, hi);
}
else
{
ParallelTopDownSort(lo, (mid + lo) / 2, mid, 0);
ParallelTopDownSort(mid + 1, (hi + mid) / 2, hi, 0);
Merge(lo, mid, hi);
}
}
static unsafe void Main(string[] args)
{
// path to file to be sorted
String path = args[0];
Console.WriteLine("Reading file");
int[] values;
// Loading file
var loadStart = DateTime.Now.Ticks;
long intCount = (new FileInfo(path)).Length / 4;
values = new int[intCount];
MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(path);
MemoryMappedViewAccessor mma = mmf.CreateViewAccessor();
byte* ptr = (byte*) 0;
mma.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);
Marshal.Copy(new IntPtr(ptr), values, 0, (int) intCount);
var loadEnd = DateTime.Now.Ticks;
Console.WriteLine("Loaded in " + (loadEnd - loadStart) / (10000) + " ms");
// Sorting
Console.WriteLine("Sorting " + values.Length + " ints");
Sort ms = new Sort(values);
var start = DateTime.Now.Ticks;
ms.ParallelBottomUpSort();
var end = DateTime.Now.Ticks;
Console.WriteLine("Done sorting");
Console.WriteLine((end - start) / (1000 * 10000));
}
}