我有一些数据,我必须将数据分成不同的桶,因此在分配每个桶的数据之前,桶中的重复数不应超过5个。
例如:
build.gradle
在第二个桶中的这个例子中,只有5个元素是重复的。我有>我需要遵循上述逻辑的100k数据。怎么做?
答案 0 :(得分:1)
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;
public class SeparateData {
public static void main(String[] args) {
Integer[] intArr = {1, 2, 3, 4, 5, 35, 7, 8, 9, 10, 1, 2, 3, 4, 5, 73, 26, 19, 15, 1, 2, 3, 4, 5, 6, 33, 21,
22, 12, 88, 3, 2, 4, 5, 74, 13, 14, 17, 20, 1, 44, 30, 31, 37, 5, 4, 3, 99, 66, 11, 2, 5, 7, 43, 27};
separate(intArr, 10, 5);
}
static void separate(Integer intArr[], int countPerBucket, int dupilicateLimit) {
// quickSort
Arrays.sort(intArr);
// build a list without duplicates
List<Integer> noDupList = new ArrayList<Integer>();
LinkedHashMap<Integer, Integer> hashMap = new LinkedHashMap<Integer, Integer>();
for (int i = 0; i < intArr.length - 1; i++) {
int count = 2;
if (intArr[i] == intArr[i + 1]) {
if (hashMap.get(intArr[i]) != null) {
count = hashMap.get(intArr[i]) + 1;
}
hashMap.put(intArr[i], count);
} else {
if ((i == 0 && intArr[i] != intArr[i + 1]) || (i > 0 && intArr[i] != intArr[i - 1])) {
noDupList.add(intArr[i]);
}
if (i == intArr.length - 2 && intArr[i] != intArr[i + 1]) {
noDupList.add(intArr[i + 1]);
}
}
}
// build a list with duplicates
List<List<Integer>> dupList = new ArrayList<List<Integer>>();
Set<Integer> set = hashMap.keySet();
while (set.size() > 0) {
int i = 0;
List<Integer> list = new ArrayList<Integer>();
Iterator<Integer> iterator = set.iterator();
while (iterator.hasNext()) {
if (i == dupilicateLimit) {
break;
}
Integer key = iterator.next();
list.add(key);
Integer count = hashMap.get(key);
if (count == 1) {
iterator.remove();
} else {
hashMap.put(key, count - 1);
}
i++;
}
System.out.println(list);
dupList.add(list);
}
//spepate the data
List<Integer[]> buckets = new ArrayList<Integer[]>();
int j = 0;
for (int i = 0; i < dupList.size(); i++) {
Integer[] bucket = new Integer[countPerBucket];
for (int o = 0; o < bucket.length; o++) {
if (o < dupList.get(i).size()) {
bucket[o] = dupList.get(i).get(o);
} else if (o >= dupList.get(i).size() && j < noDupList.size()) {
bucket[o] = noDupList.get(j);
j++;
}
}
buckets.add(bucket);
}
while (j < noDupList.size()) {
Integer[] bucket = new Integer[countPerBucket];
for (int o = 0; o < bucket.length; o++) {
bucket[o] = noDupList.get(j);
j++;
}
buckets.add(bucket);
}
writeResult(countPerBucket, buckets);
}
/**
* print result
*
* @param countPerBucket
* @param buckets
*/
private static void writeResult(int countPerBucket, List<Integer[]> buckets) {
for (int i = 0; i < buckets.size(); i++) {
System.out.print("bucket" + i + "\t");
}
System.out.println();
for (int k = 0; k < countPerBucket; k++) {
for (int i = 0; i < buckets.size(); i++) {
if (k < buckets.get(i).length && buckets.get(i)[k] != null) {
System.out.print(buckets.get(i)[k] + "\t");
}
}
System.out.println();
}
}
}
<强>输出:强>
bucket0 bucket1 bucket2 bucket3 bucket4 bucket5
1 1 1 1 2 5
2 2 2 2 3 7
3 3 3 3 4 74
4 4 4 4 5 88
5 5 5 5 7 99
6 12 19 27 37
8 13 20 30 43
9 14 21 31 44
10 15 22 33 66
11 17 26 35 73