这个简短的程序生成一个名为data.bin的二进制文件,其中包含一堆随机生成的项目,每个项目为1024字节。每个项目的前24个字节是键。那么如何从data.bin文件中读取每个项目并对其进行外部合并排序?
import java.math.*;
import java.io.*;
import java.util.*;
public class CreateRandomDataFile {
public static void main(String[] args) throws Exception {
RandomAccessFile data = new RandomAccessFile("data.bin","rws");
Random r = new Random(482010);
Scanner input = new Scanner(System.in);
int number = 100000;
byte[] item = new byte[1024];
System.out.print("How many items (perhaps 800000)\n> ");
number = input.nextInt();
for (int i=0; i<number; i++) {
r.nextBytes(item);
data.write(item);
}
data.close();
System.out.println("Done");
}
}
答案 0 :(得分:1)
查看http://www.vogella.com/articles/JavaAlgorithmsMergesort/article.html
使用比较器对任何对象进行此操作:
package com.sel2in.sort;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
/** based on http://www.vogella.com/articles/JavaAlgorithmsMergesort/article.html */
public class MergeSort<T> {
private List<T> items;
private List<T> helper;
private Comparator<T> comprtr;
private int cnt;
public void sort(T[] values, Comparator<T> comprtr) {
items = new ArrayList<T>();
items.addAll(Arrays.asList(values));
cnt = values.length;
this.helper = new ArrayList<T>(cnt);
this.comprtr = comprtr;
mergesort(0, cnt - 1);
}
public void sort(List<T> values, Comparator<T> comprtr) {
items = values;
//items.addAll(Arrays.asList(values));
cnt = values.size();
this.helper = new ArrayList<T>(cnt);
helper.addAll(items);
this.comprtr = comprtr;
mergesort(0, cnt - 1);
}
private void mergesort(int low, int high) {
// Check if low is smaller then high, if not then the array is sorted
if (low < high) {
// Get the index of the element which is in the middle
int middle = low + (high - low) / 2;
// Sort the left side of the array
mergesort(low, middle);
// Sort the right side of the array
mergesort(middle + 1, high);
// Combine them both
merge(low, middle, high);
}
}
private void merge(int low, int middle, int high) {
// Copy both parts into the helper array
for (int i = low; i <= high; i++) {
// helper[i] = items[i];
helper.set(i, items.get(i));
}
int i = low;
int j = middle + 1;
int k = low;
// Copy the smallest values from either the left or the right side back
// to the original array
while (i <= middle && j <= high) {
int cm = comprtr.compare(helper.get(i), helper.get(j));
//(helper[i] <= helper[j])
if (cm <= 0) {
//items[k] = helper[i];
items.set(k, helper.get(i));
i++;
} else {
//items[k] = helper[j];
items.set(k, helper.get(j));
j++;
}
k++;
}
// Copy the rest of the left side of the array into the target array
while (i <= middle) {
//items[k] = helper[i];
items.set(k, helper.get(i));
k++;
i++;
}
}
}
字符串比较器,用于输入类型 - 对你有用,因为你有键和其余的行,我们可以让我们自己的对象不必使用Entry但它已经存在
package com.sel2in.sort;
import java.util.Comparator;
import java.util.Map.Entry;
/** compare 2 strings, taken from the key of a Map entry - Tushar Kapila */
public class MapStrKeyComparator implements Comparator<java.util.Map.Entry<String,Object>> {
@Override
public int compare(Entry<String,Object> o1, Entry<String,Object> o2) {
String s = o1.getKey().toString();
String n = o2.getKey().toString();
return s.compareTo(n);
}
}
测试
package com.sel2in.sort;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
public class TstMege {
/**
* @param args
*/
public static void main(String[] args) {
Map<String, String>mp = new HashMap<String, String>();
//here instead of the test values load from your file and seperate key and value
mp.put("tt", "nxcn3er3e33");
mp.put("aa", "gjkrmt454");
mp.put("zz", "rft56GDD");
mp.put("zz", "rft56GDD");
//sort
Set<Entry<String, String>> entries = mp.entrySet();
MapStrKeyComparator cpmr = new MapStrKeyComparator();
MergeSort sorter = new MergeSort();
//sort. sort(List<T> values, Comparator<T> comprtr)
ArrayList<Entry<String, String>> lst = new ArrayList<Entry<String, String>>(entries.size());
lst.addAll(entries);
System.out.println("before " + lst);
sorter.sort(lst, cpmr);
System.out.println("sorted " + lst);
}
}
输出
before [tt=nxcn3er3e33, zz=rft56GDD, aa=gjkrmt454] sorted [aa=gjkrmt454, tt=nxcn3er3e33, zz=rft56GDD]
我们自己的入门级
package com.sel2in.sort;
import java.util.Map.Entry;
public class TEntry<K,V> implements Entry<K, V> {
final K key;
V value;
TEntry(){
key = null;
}
TEntry(K k){
key = k;
}
TEntry(K k, V v) {
value = v;
key = k;
}
@Override
public K getKey() {
return key;
}
@Override
public V getValue() {
return value;
}
@Override
public V setValue(V newValue) {
V oldValue = value;
value = newValue;
return oldValue;
}
public String toString(){
return key + ":" + value;
}
//can copy hashcode , equals but not important now
}
文件读取二进制和排序
package com.sel2in.sort;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map.Entry;
public class TstFileMegeSort {
/**
* @param args
*/
public static void main(String[] args) {
//Map<String, String>mp = new HashMap<String, String>();
ArrayList<Entry<String, String>> lst = new ArrayList<Entry<String, String>> ();
TEntry<String, String> en = null;
//here instead of the test values load from your file and seperate key and value
try {
RandomAccessFile data = new RandomAccessFile("data.bin","rws");
long l = data.length();
long recs = l / 1024;
long cnt = 0;
byte []b = new byte[1024];
while(cnt < recs){
cnt++;
data.readFully(b);
byte []key = Arrays.copyOfRange(b, 0, 24);
byte []value = Arrays.copyOfRange(b, 24, 1024);
en = new TEntry<String, String>(new String(key), new String(value));
lst.add(en);
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//sort
//Set<Entry<String, String>> entries = mp.entrySet();
MapStrKeyComparator cpmr = new MapStrKeyComparator();
MergeSort sorter = new MergeSort();
//sort. sort(List<T> values, Comparator<T> comprtr)
//ArrayList<Entry<String, String>> lst = new ArrayList<Entry<String, String>>(entries.size());
//lst.addAll(entries);
System.out.println("before " + lst);
sorter.sort(lst, cpmr);
System.out.println("sorted " + lst);
}
}