在迭代数据集时,到目前为止,按排序顺序仅跟踪前10个数字的最佳方法是什么?
解决方案......结束了Generic Min和Max Heaps的实施......遗憾的是,它们不能在Java库中使用,也不能在互联网上使用....代码上没有gauruntees ......
import java.util.ArrayList;
public class MaxHeapGeneric <K extends Comparable> {
//ArrayList to hold the heap
ArrayList<K> h = new ArrayList<K>();
public MaxHeapGeneric()
{
}
public int getSize()
{
return h.size();
}
private K get(int key){
return h.get(key);
}
public void add(K key){
h.add(null);
int k = h.size() - 1;
while (k > 0){
int parent = (k-1)/2;
K parentValue = h.get(parent);
//MaxHeap -
//for minheap - if(key > parentValue)
if(key.compareTo(parentValue) <= 0) break;
h.set(k, parentValue);
k = parent;
}
h.set(k, key);
}
public K getMax()
{
return h.get(0);
}
public void percolateUp(int k, K key){
if(h.isEmpty())
return ;
while(k < h.size() /2){
int child = 2*k + 1; //left child
if( child < h.size() -1 && (h.get(child).compareTo(h.get(child+1)) < 0) )
{
child++;
}
if(key.compareTo(h.get(child)) >=0) break;
h.set(k, h.get(child));
k = child;
}
h.set(k, key);
}
public K remove()
{
K removeNode = h.get(0);
K lastNode = h.remove(h.size() - 1);
percolateUp(0, lastNode);
return removeNode;
}
public boolean isEmpty()
{
return h.isEmpty();
}
public static void main(String[] args)
{
MaxHeapGeneric<Integer> test = new MaxHeapGeneric<Integer>();
test.add(5);
test.add(9);
test.add(445);
test.add(1);
test.add(534);
test.add(23);
while(!test.isEmpty())
{
System.out.println(test.remove());
}
}
}
最小堆
import java.util.ArrayList;
public class MinHeapGeneric <K extends Comparable> {
//ArrayList to hold the heap
ArrayList<K> h = new ArrayList<K>();
public MinHeapGeneric()
{
}
public int getSize()
{
return h.size();
}
private K get(int key){
return h.get(key);
}
public void add(K key){
h.add(null);
int k = h.size() - 1;
while (k > 0){
int parent = (k-1)/2;
K parentValue = h.get(parent);
//for minheap - if(key > parentValue)
if(key.compareTo(parentValue) > 0) break;
h.set(k, parentValue);
k = parent;
}
h.set(k, key);
}
public K getMax()
{
return h.get(0);
}
public void percolateUp(int k, K key){
if(h.isEmpty())
return ;
while(k < h.size() /2){
int child = 2*k + 1; //left child
if( child < h.size() -1 && (h.get(child).compareTo(h.get(child+1)) >= 0) )
{
child++;
}
if(key.compareTo(h.get(child)) < 0) break;
h.set(k, h.get(child));
k = child;
}
h.set(k, key);
}
public K remove()
{
K removeNode = h.get(0);
K lastNode = h.remove(h.size() - 1);
percolateUp(0, lastNode);
return removeNode;
}
public boolean isEmpty()
{
return h.isEmpty();
}
public static void main(String[] args)
{
MinHeapGeneric<Integer> test = new MinHeapGeneric<Integer>();
test.add(5);
test.add(9);
test.add(445);
test.add(1);
test.add(534);
test.add(23);
while(!test.isEmpty())
{
System.out.println(test.remove());
}
}
}
答案 0 :(得分:2)
使用最小堆(优先级队列)来跟踪前10个项目。对于二进制堆,时间复杂度为O(N log M),其中N是项目数,M是10。
与在数组中存储顶级项目相比,对于大型M来说这更快:基于阵列的方法是O(NM)。同上链接列表。
在伪代码中:
heap = empty min-heap
for each datum d:
heap.push(d) // add the new element onto the heap
if heap.size > 10:
heap.pop() // remove the smallest element
endif
endfor
现在堆包含10个最大的项目。要弹出:
while heap is not empty:
item = heap.top()
print item
endwhile