Single vs MultiThread:Mergesort性能比较

时间:2015-09-23 14:17:47

标签: java arrays multithreading performance sorting

任何人都可以查看下面的代码段吗?

它是一个类来测试mergesort的多线程解决方案,方法是将一个单词数组拆分为10个子数组,而不是单线程解决方案,并将整个数组作为参数传递。

由于某种原因,当阵列大小为100万时,只对多线程版本有效,而且当我将其增加到1000万时,它比单线程解决方案更糟糕(比单线程解决方案多2秒)。 / p>

可能是因为连续有序列表的最后合并步骤,第120行。

我想在某些时候,分裂数组没有更多的性能优势,使用10个线程来处理数据集并对合并的结果进行排序。

我知道它不是一个合适的基准,跳过JVM预热步骤可能会对时间产生一点影响。

数组大小的平均时间= 100万:

单线程:450毫秒

多线程:350毫秒

数组大小的平均时间= 1000万:

单线程:3.5s

多线程:5s

package brandao;

import com.fnf.xes.services.test.util.generators.impl.StupidDictionary;
import org.apache.commons.io.FileUtils;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * User: ostras
 * Date: 23/09/15
 * Time: 14:16
 * 
 * Class to test multithreaded solution of mergesort by splitting array of words into 10 sub-arrays, vs single threaded solution and passing the whole array as argument.
 * For some reason only works better for multiT when the size of the array is 1000000.
 * Probably because of final merge step of the concatenated ordered lists, line 120.
 * At some point there is no more performance advantage of splitting the array and using 10 threads to process the dataset.
 */
public class ThreadTest {

    private static StupidDictionary stupidDictionary = new StupidDictionary();
    private static ArrayList<String> list = new ArrayList<String>();
    private static ArrayList<String> list2 = new ArrayList<String>();


    public static void main(String[] args) {
        ThreadTest test = new ThreadTest();
        int length = 10000000;

        stupidDictionary.loadDefault();

        for (int i = 0; i <= length; i++) {
            String word = stupidDictionary.getRandomWord();
            list.add(word);
            list2.add(word);
        }

        //System.out.println("The list is : " + list.toString());

        long startTime = System.currentTimeMillis();

        test.testSingleThread(list);

        long stopTime = System.currentTimeMillis();
        long elapsedTime = stopTime - startTime;
        System.out.println("Single Thread : " + elapsedTime + "ms");
        //System.out.println("Sorted list : " + list);
/*
        try{
            FileUtils.writeLines(new File("textdata.txt"), list);
        } catch (IOException ioe) {

        }
*/

        startTime = System.currentTimeMillis();

        test.testMultiThread(list2);

        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println("Multi Thread : " + elapsedTime + "ms");
        //System.out.println("Sorted list 2 : " + test.testMultiThread(list2));
    }

    public void testSingleThread(ArrayList<String> array) {
        mergeSort(array);
    }

    public ArrayList<String> testMultiThread(final ArrayList<String> obj) {
/*        MyThread rum = new MyThread(obj.subList(0 , 100000));
        MyThread rdois = new MyThread(obj.subList(100000 , 200000));
        MyThread rtres = new MyThread(obj.subList(200000 , 300000));
        MyThread rquatro = new MyThread(obj.subList(300000 , 400000));
        MyThread rcinco = new MyThread(obj.subList(400000 , 500000));
        MyThread rseis = new MyThread(obj.subList(500000 , 600000));
        MyThread rsete = new MyThread(obj.subList(600000 , 700000));
        MyThread roito = new MyThread(obj.subList(700000 , 800000));
        MyThread rnove = new MyThread(obj.subList(800000 , 900000));
        MyThread rdez = new MyThread(obj.subList(900000 , 1000001));*/

        MyThread rum = new MyThread(obj.subList(0, 1000000));
        MyThread rdois = new MyThread(obj.subList(1000000, 2000000));
        MyThread rtres = new MyThread(obj.subList(2000000, 3000000));
        MyThread rquatro = new MyThread(obj.subList(3000000, 4000000));
        MyThread rcinco = new MyThread(obj.subList(4000000, 5000000));
        MyThread rseis = new MyThread(obj.subList(5000000, 6000000));
        MyThread rsete = new MyThread(obj.subList(6000000, 7000000));
        MyThread roito = new MyThread(obj.subList(7000000, 8000000));
        MyThread rnove = new MyThread(obj.subList(8000000, 9000000));
        MyThread rdez = new MyThread(obj.subList(9000000, 10000001));

        new Thread(rum).start();
        new Thread(rdois).start();
        new Thread(rtres).start();
        new Thread(rquatro).start();
        new Thread(rcinco).start();
        new Thread(rseis).start();
        new Thread(rsete).start();
        new Thread(roito).start();
        new Thread(rnove).start();
        new Thread(rdez).start();

        ArrayList<String> result = new ArrayList<String>();
        result.addAll(rum.getToSort());
        result.addAll(rdois.getToSort());
        result.addAll(rtres.getToSort());
        result.addAll(rquatro.getToSort());
        result.addAll(rcinco.getToSort());
        result.addAll(rseis.getToSort());
        result.addAll(rsete.getToSort());
        result.addAll(roito.getToSort());
        result.addAll(rnove.getToSort());
        result.addAll(rdez.getToSort());

        mergeSort(result);

        return result;
    }

    public void mergeSort(List<String> obj) {
        Collections.sort(obj);
    }

    public class MyThread implements Runnable {
        List<String> toSort;

        public MyThread(List<String> toSort) {
            this.toSort = toSort;
        }

        public void run() {
            mergeSort(toSort);
        }

        public List<String> getToSort() {
            return this.toSort;
        }
    }
}

1 个答案:

答案 0 :(得分:1)

Mergesort应该是内存带宽有限,但每个内核中的缓存可能会有所帮助。也许尝试使用4或8个线程而不是10.完成排序后,合并4或8个部分,而不是再次连接和排序。

我不确定这会有多大帮助。使用C / C ++,合并类型的400万64位无符号整数在我的系统上只需不到1/2秒,即Intel 2600K(3.4 ghz)。