使用堆进行中位数维护

时间:2017-04-10 15:58:28

标签: python algorithm data-structures heap

我试图在python3中实现这个问题。从1到10,000的整数流的答案应该是1213,我没有得到。我怀疑我的堆实现存在问题,而不是main()函数中的实际解决方案。

问题陈述:此问题的目标是实施"中位维护"算法。文本文件包含未排序顺序的1到10000之间的整数列表;你应该把它视为一个数字流,一个接一个地到达。令xi表示文件的第i个数,第k个中值mk被定义为数字x1,...,xk的中值。 (因此,如果k是奇数,则mk是((k + 1)/ 2)x1,...,xk中的最小数;如果k是偶数,则mk是x1中的第(k / 2)个最小数, ...,XK。)

找出10000个中位数的总和,模数为10000。

#A = [4, 1, 3, 2, 16, 9, 10, 14, 8 , 7]
max_heap, min_heap = [], []
M, max_size, min_size, data_count, heap = 0, 0, 0, 0, True #True: MAX, False: MIN

def main():
    global M
    data = list(map(int, open('Median.txt').read().splitlines()))
    #data = [2, 1, 3, 4, 5, 6, 7, 8, 9, 10]
    insert(min_heap, data[0], not heap)
    M += data[0]
    insert(max_heap, data[1], heap)
    M += data[1]
    del data[:2]

    for x in data:
        if x < max_heap[0]:
            insert(max_heap, x, heap)
        else:
            insert(min_heap, x, not heap)

        if abs(max_size - min_size) > 1:
            if max_size > min_size:
                y = extract(max_heap, heap)
                insert(min_heap, y, not heap)
            else:
                y = extract(min_heap, not heap)
                insert(max_heap, y, heap)

        if (min_size + max_size)%2 == 0:
            M += max_heap[0]
        else:
            if max_size > min_size:
                M += max_heap[0]
            else:
                M += min_heap[0]
        print(max_heap, min_heap)
    print(len(max_heap), len(min_heap))
    print(M%10000)

def parent(i):
    return i//2

def left(i):
    return 2*i

def right(i):
    return (2*i) + 1

def heapify(A, i, heap):
    l, r = left(i), right(i)
    if heap:
        largest = None
        if l < max_size and A[l] > A[i]:
            largest = l
        else:
            largest = i
        if r < max_size and A[r] > A[largest]:
            largest = r

        if largest != i:
            A[i], A[largest] = A[largest], A[i]
            heapify(A, largest, heap)
    else:
        smallest = None
        if l < min_size and A[l] < A[i]:
            smallest = l
        else:
            smallest = i
        if r < min_size and A[r] < A[smallest]:
            smallest = r

        if smallest != i:
            A[i], A[smallest] = A[smallest], A[i]
            heapify(A, smallest, heap)

def buildHeap(A, heap):
    global max_size, min_size

    if heap:
        max_size = len(A)
    else:
        min_size = len(A)

    for i in reversed(range(len(A)//2)):
        heapify(A, i, heap)

def insert(A, key, heap):
    global max_size, min_size

    if heap:
        if max_size == len(A):
            A.append(key)
        else:
            A[max_size] = key
        max_size += 1
        insertUtils(A, parent(max_size), heap)
    else:
        if min_size == len(A):
            A.append(key)
        else:
            A[min_size] = key
        min_size += 1
        insertUtils(A, parent(min_size), heap)

def insertUtils(A, i, heap):
    heapify(A, i, heap)
    if parent(i) != 0:
        insertUtils(A, parent(i), heap)
    heapify(A, parent(i), heap)

def extract(A, heap): #THE PROBLEM IS HERE I BELIEVE!
    global max_size, min_size

    if heap:
        if max_size < 1:
            raise Exception('Heap underflow!')

        max = A[0]
        A[0] = A[max_size - 1]
        max_size -= 1
        heapify(A, 0, heap)
        return max
    else:
        if min_size < 1:
            raise Exception('Heap underflow!')

        min = A[0]
        A[0] = A[min_size - 1]
        min_size -= 1
        heapify(A, 0, not heap)
        return min

main()
'''
buildHeap(A, heap)
insert(A, 17, heap)
for i in range(11):
    print(A)
    print(extract(A, heap))
insert(A, 18, heap)
print(A)
print(extract(A, heap))
'''

我知道,我经常提问,这是一段相当长的代码。但我真的很感激一些帮助!谢谢! :)

1 个答案:

答案 0 :(得分:0)

启动

一开始似乎有一个问题:

insert(min_heap, data[0], not heap)
M += data[0]
insert(max_heap, data[1], heap)
M += data[1]

此代码假定前两个项目按降序排列。

如果前两个项目的顺序递增,那么您的堆将被初始化为前,而第二个计算中位数也将不正确。

索引

主要问题似乎是基于零的索引(在数组查找中使用)和基于一的索引(在父/左/右函数中使用)之间的混淆。

将所有内容更改为从零开始的索引应该会有所帮助。 堆函数变为:

def parent(i):
    return (i+1)//2-1

def left(i):
    return 2*(i+1)-1

def right(i):
    return (2*(i+1))

def insertUtils(A, i, heap):
    heapify(A, i, heap)
    if parent(i) >= 0:
        insertUtils(A, parent(i), heap)

def insert(A, key, heap):
    global max_size, min_size

    if heap:
        if max_size == len(A):
            A.append(key)
        else:
            A[max_size] = key
        max_size += 1
        insertUtils(A, parent(max_size-1), heap)
    else:
        if min_size == len(A):
            A.append(key)
        else:
            A[min_size] = key
        min_size += 1
        insertUtils(A, parent(min_size-1), heap)