我正在尝试将2D KD树实现放在一起。此时它可以工作,但运行时间爆炸超过~100k点。 100k需要15s,1e6需要30分钟。起初我认为瓶颈是找到中值的排序,但似乎是使用subList和addAll方法。任何改进建议都会很棒。
谢谢,
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Random;
public class KDtree {
//****************************************************
//setting up a data set for input
//****************************************************
public kdLite() {
long startTime = System.currentTimeMillis() / 1000;
//select random values to generate data set
double[][] dataSet = new double[2][100000];
for (int i = 0; i < 100000; i++) {
dataSet[0][i] = (Math.random() * (99));
dataSet[1][i] = (Math.random() * (99));
//System.out.print(dataSet[0][i] + "\t" + dataSet[1][i] + "\n");
}
//System.out.print("\n");
//setup a point class for simple data manipulation and add data to it
ArrayList<Point> preSorted = new ArrayList<Point>();
for (int i = 0; i < dataSet[0].length; i++) {
Point point = new Point(i, dataSet[0][i], dataSet[1][i], 0);
preSorted.add(point);
}
//split and sort the list
ArrayList<Point> outList = splitList(preSorted);
// add the list to the binary tree structure
BinaryST buildKD = new BinaryST();
for (int i = 0; i < outList.size(); i++) {
buildKD.insertNode(outList.get(i));
}
long endTime = System.currentTimeMillis() / 1000;
System.out.println((int) (endTime - startTime) / 60 + " Minutes and " + (endTime - startTime) + " Seconds");
// buildKD.printTree();
//****************************************************
}
//****************************************************
//the brunt of the code. this method takes a list of Point objects
//solves for the axis to split on and cuts the list into 2^i segments
//****************************************************
public ArrayList<Point> splitList(ArrayList<Point> arrToSplit) {
ArrayList<ArrayList<Point>> splitList = new ArrayList<ArrayList<Point>>();
ArrayList<Point> Meds = new ArrayList<Point>();
int axis = 0;
int toSplit = 0;
double maxXdif = 0;
double maxYdif = 0;
//populate first bucket
splitList.add(new ArrayList<Point>());
for (int i = 0; i < arrToSplit.size(); i++) {
splitList.get(0).add(arrToSplit.get(i));
}
for (int slice = 0; slice < arrToSplit.size(); slice++) {
//get first bucket that has more than one value then use it first
for (int i = 0; i < splitList.size(); i++) {
if (splitList.get(i).size() >= 1) {
toSplit = i;
if (splitList.get(i).size() > 1) {
break;
}
}
}
if (splitList.get(toSplit).size() > 1) {
sortByX(splitList.get(toSplit));
maxXdif = Math.abs(splitList.get(toSplit).get(0).x - splitList.get(toSplit).get(splitList.get(toSplit).size() - 1).x);
sortByY(splitList.get(toSplit));
maxYdif = Math.abs(splitList.get(toSplit).get(0).y - splitList.get(toSplit).get(splitList.get(toSplit).size() - 1).y);
//arrange by splitting axis according to largest distance to find splitting axis
if (maxXdif > maxYdif) {
axis = 0;
sortByX(splitList.get(toSplit));
} else {
axis = 1;
sortByY(splitList.get(toSplit));
}
//solve for median point .. arbitrate if no point lies on axis (uneven split)
int Med = (int) Math.floor(splitList.get(toSplit).size() / 2);
//take median point, assign splitting axis
splitList.get(toSplit).get(Med).axis = axis;
Meds.add(splitList.get(toSplit).get(Med));
splitList.get(toSplit).remove(Med);
---- >>>>>> PROBLEM CODE
// relocate all points except median to new list, delete the median value
List<Point> head = splitList.get(toSplit).subList(Med, splitList.get(toSplit).size());
splitList.add(new ArrayList<Point>());
splitList.get(splitList.size() - 1).addAll(head);
head.clear();
splitList.get(toSplit).subList(Med - 1, splitList.get(toSplit).size() - 1).clear();
} else {
//these are the leftover points so ordering is arbitrary
//randomize axis to ensure balance
Random random = new Random();
int randomAxis = random.nextInt(2 - 0);
Meds.add(splitList.get(toSplit).get(0));
splitList.get(toSplit).get(0).axis = randomAxis;
splitList.remove(toSplit);
}
}
return Meds;
}
//****************************************************
//****************************************************
//sorting methods for sorting a list by x or y
//must use comparator to sort by custom object attributes
//****************************************************
private ArrayList<Point> sortByX(ArrayList<Point> xList) {
Collections.sort(xList, new Comparator<Point>() {
public int compare(Point p1, Point p2) {
return Double.compare(p1.getX(), p2.getX());
}
});
return xList;
}
private ArrayList<Point> sortByY(ArrayList<Point> yList) {
Collections.sort(yList, new Comparator<Point>() {
public int compare(Point p1, Point p2) {
return Double.compare(p1.getY(), p2.getY());
}
});
return yList;
}
//****************************************************
}
答案 0 :(得分:1)
使用此:
ArrayList<Point>(int capacity);
因为默认情况下会创建一个新的ArrayList,容量为10个元素。它通过创建一个新数组,每次达到它的大小时,它将当前容量增加一倍,旧数据被垃圾收集器破坏。因此,在您目前的情况下,您的ArrayList容量为10-> 20-> 40-> 80-> 160-> ...
答案 1 :(得分:0)
在splitList()函数中有一个sortByX()和sortByY()调用,它们所采用的参数彼此不相关。 所以我认为..只要你的CPU功率有一些额外的资源,也许你可以让这两个计算在不同的线程中运行并在完成后使用它。
创建ArrayList时设置初始ArrayList容量也是个好主意。默认值为32左右,填充ArrayList时发生的情况是......它创建的内部数组比原始数据大两倍,并将现有的内部项目项目复制到新项目中。对于较小的阵列长度来说这是可以的,但在像你这样的情况下可能会有问题。
IIRC,有一些实现差异,因此性能也适用于subList(),因此如果您使用Java6运行测试,只需尝试使用Java7。