我如何对二进制文件进行外部合并排序

时间:2013-05-01 03:58:42

标签: java sorting external mergesort

这个简短的程序生成一个名为data.bin的二进制文件,其中包含一堆随机生成的项目,每个项目为1024字节。每个项目的前24个字节是键。那么如何从data.bin文件中读取每个项目并对其进行外部合并排序?

import java.math.*;
import java.io.*;
import java.util.*;

public class CreateRandomDataFile {

   public static void main(String[] args) throws Exception {

      RandomAccessFile data = new RandomAccessFile("data.bin","rws");
      Random r = new Random(482010);
      Scanner input = new Scanner(System.in);
      int number = 100000;
      byte[] item = new byte[1024];

      System.out.print("How many items (perhaps 800000)\n> ");
      number = input.nextInt();

      for (int i=0; i<number; i++) {
         r.nextBytes(item);
         data.write(item);
      }
      data.close();
        System.out.println("Done");
   }
}  

1 个答案:

答案 0 :(得分:1)

查看http://www.vogella.com/articles/JavaAlgorithmsMergesort/article.html

使用比较器对任何对象进行此操作:

    package com.sel2in.sort;

    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.Comparator;
    import java.util.List;

    /** based on http://www.vogella.com/articles/JavaAlgorithmsMergesort/article.html */
    public class MergeSort<T> {

        private List<T> items;
        private List<T> helper;

        private Comparator<T> comprtr;
        private int cnt;

        public void sort(T[] values, Comparator<T> comprtr) {
            items = new ArrayList<T>();
            items.addAll(Arrays.asList(values));

            cnt = values.length;
            this.helper = new ArrayList<T>(cnt);
            this.comprtr = comprtr;
            mergesort(0, cnt - 1);
        }

        public void sort(List<T> values, Comparator<T> comprtr) {
            items = values;
            //items.addAll(Arrays.asList(values));
            cnt = values.size();
            this.helper = new ArrayList<T>(cnt);
            helper.addAll(items);
            this.comprtr = comprtr;
            mergesort(0, cnt - 1);
        }   

        private void mergesort(int low, int high) {
            // Check if low is smaller then high, if not then the array is sorted
            if (low < high) {
                // Get the index of the element which is in the middle
                int middle = low + (high - low) / 2;
                // Sort the left side of the array
                mergesort(low, middle);
                // Sort the right side of the array
                mergesort(middle + 1, high);
                // Combine them both
                merge(low, middle, high);
            }
        }

        private void merge(int low, int middle, int high) {

            // Copy both parts into the helper array
            for (int i = low; i <= high; i++) {
                // helper[i] = items[i];
                helper.set(i, items.get(i));
            }

            int i = low;
            int j = middle + 1;
            int k = low;
            // Copy the smallest values from either the left or the right side back
            // to the original array
            while (i <= middle && j <= high) {
                int cm = comprtr.compare(helper.get(i), helper.get(j));
                //(helper[i] <= helper[j]) 
                if (cm <= 0) {
                    //items[k] = helper[i];
                    items.set(k, helper.get(i));
                    i++;
                } else {
                    //items[k] = helper[j];
                    items.set(k, helper.get(j));
                    j++;
                }
                k++;
            }
            // Copy the rest of the left side of the array into the target array
            while (i <= middle) {
                //items[k] = helper[i];
                items.set(k, helper.get(i));
                k++;
                i++;
            }

        }

    }

字符串比较器,用于输入类型 - 对你有用,因为你有键和其余的行,我们可以让我们自己的对象不必使用Entry但它已经存在

    package com.sel2in.sort;

    import java.util.Comparator;    
    import java.util.Map.Entry;

    /** compare 2 strings, taken from the key of a Map entry - Tushar Kapila */
    public class MapStrKeyComparator implements Comparator<java.util.Map.Entry<String,Object>> {
        @Override
        public int compare(Entry<String,Object> o1, Entry<String,Object> o2) {
            String s = o1.getKey().toString();
            String n = o2.getKey().toString();
            return s.compareTo(n);

        }
    }

测试

    package com.sel2in.sort;

    import java.util.ArrayList;
    import java.util.Comparator;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.Map.Entry;
    import java.util.Set;

    public class TstMege {

        /**
         * @param args
         */
        public static void main(String[] args) {
            Map<String, String>mp = new HashMap<String, String>();

            //here instead of the test values load from your file and seperate key and value
            mp.put("tt", "nxcn3er3e33");
            mp.put("aa", "gjkrmt454");
            mp.put("zz", "rft56GDD");
            mp.put("zz", "rft56GDD");
            //sort
            Set<Entry<String, String>> entries = mp.entrySet();
            MapStrKeyComparator cpmr = new MapStrKeyComparator();
            MergeSort sorter = new MergeSort();
            //sort. sort(List<T> values, Comparator<T> comprtr)
            ArrayList<Entry<String, String>> lst = new ArrayList<Entry<String, String>>(entries.size());
            lst.addAll(entries);
            System.out.println("before " + lst);
            sorter.sort(lst, cpmr);
            System.out.println("sorted " + lst);

        }

    }

输出

before [tt=nxcn3er3e33, zz=rft56GDD, aa=gjkrmt454]
sorted [aa=gjkrmt454, tt=nxcn3er3e33, zz=rft56GDD]

我们自己的入门级

package com.sel2in.sort;

import java.util.Map.Entry;

public class TEntry<K,V> implements Entry<K, V> {
    final K key;
    V value;

    TEntry(){
        key = null;
    }

    TEntry(K k){
        key = k;
    }    

    TEntry(K k, V v) {
        value = v;
        key = k;
    }    



    @Override
    public K getKey() {
        return key;
    }

    @Override
    public V getValue() {
        return value;
    }

    @Override
    public V setValue(V newValue) {
        V oldValue = value;
        value = newValue;
        return oldValue;
    }

    public String toString(){
        return key + ":" + value;
    }

    //can copy hashcode , equals but not important now

}

文件读取二进制和排序

    package com.sel2in.sort;

    import java.io.RandomAccessFile;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.Map.Entry;

        public class TstFileMegeSort {

            /**
             * @param args
             */
            public static void main(String[] args) {
                //Map<String, String>mp = new HashMap<String, String>();
                ArrayList<Entry<String, String>> lst = new ArrayList<Entry<String, String>> (); 
                TEntry<String, String> en = null;
                //here instead of the test values load from your file and seperate key and value
                try {
                    RandomAccessFile data = new RandomAccessFile("data.bin","rws");
                    long l = data.length();
                    long recs = l / 1024;
                    long cnt = 0;
                    byte []b = new byte[1024];

                    while(cnt < recs){
                        cnt++;
                        data.readFully(b);
                        byte []key = Arrays.copyOfRange(b, 0, 24);
                        byte []value = Arrays.copyOfRange(b, 24, 1024);
                        en = new TEntry<String, String>(new String(key), new String(value));
                        lst.add(en);
                    }


                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }

                //sort
                //Set<Entry<String, String>> entries = mp.entrySet();
                MapStrKeyComparator cpmr = new MapStrKeyComparator();
                MergeSort sorter = new MergeSort();
                //sort. sort(List<T> values, Comparator<T> comprtr)
                //ArrayList<Entry<String, String>> lst = new ArrayList<Entry<String, String>>(entries.size());
                //lst.addAll(entries);
                System.out.println("before " + lst);
                sorter.sort(lst, cpmr);
                System.out.println("sorted " + lst);

            }

        }