使用二进制堆构建霍夫曼树

时间:2018-10-22 19:26:32

标签: java huffman-code binary-heap

我目前正在尝试编写一个程序,该程序读取文本文件并通过创建HuffmanTree对其进行编码。我在优先级队列的二进制堆中使用并行数组,并跟踪我的霍夫曼树。

我知道从堆中删除两分钟,合并它们,然后再将它们重新插入到堆中直到剩下一分钟的原则,但是我在将逻辑/算法转换为代码时遇到了麻烦。

这是我的HuffmanEncode类:

public class HuffmanEncode {

    public HuffmanEncode(String in, String out) {
        // Implements the Huffman encoding algorithm
        // Add private methods and instance variables as needed
        int[] freqs = new int[128]; // character counts
        char[] chars = new char[128]; //characters

        freqs = countFrequencies(in);
        HuffmanTree[] trees = new HuffmanTree[128]; //single node trees

        for(int i= 0; i < freqs.length; i++) {
            chars[i] = (char)i;
            trees[i] = new HuffmanTree(chars[i]);
        }  

        BinaryHeap heap = new BinaryHeap(128); // create a binary heap
        for(int i = 0; i < 128; i++) { 
            heap.insert(freqs[i], trees[i]); 
        }

        // STUCK HERE

        buildTree(heap);
        HuffmanTree root = new HuffmanTree();

        // STUCK HERE

    }

    private void buildTree(BinaryHeap h) {
        // grab two smallest
        while (h.getSize() > 1) { //repeat until there is only one left
            int temp1, temp2;
            HuffmanTree t1, t2;
            temp1 = h.getMinPriority();
            temp2 = h.getMinPriority();
            // add their frequency to create new single node tree with char 128
            int sum = temp1 + temp2;
            HuffmanTree node = new HuffmanTree((char)128);
            // insert it back into the heap
            h.insert(sum, node);
        }
    }

    // count the frequencies of all characters in ascii 0-127 and store them in an array
    private int[] countFrequencies(String input) {
        File f1 = new File(input);
        int[] count = new int[128];
        try {
            BufferedReader in = new BufferedReader (new FileReader (f1));
            int nextChar;
            char ch;

            while((nextChar = in.read()) != -1) { // stop when end of file is reached
                ch = ((char) nextChar);
                if(ch <= 127)
                    count[ch]++;
            }
            in.close();
        } catch (FileNotFoundException e) {
            System.out.println("file not found");
        } catch (IOException e) {
            System.out.println("Buffered Reader error");
        }
        return count;
    }

这是我的Binary Heap类:

public class BinaryHeap {
    // implements a binary heap where the heap rule is the value in the parent 
    // node is less than or equal to the values in the child nodes

    // implementation uses parallel arrays to store the priorities and the trees
    // must use this implementation

    int priority[];
    HuffmanTree trees[];
    int size;

    public BinaryHeap(int s) {
        priority = new int[s+1];
        trees = new HuffmanTree[s+1];
        size = 0;
    }

    public void removeMin() {
        // PRE: size != 0;
        // removes the priority and the tree at the root of the heap
        int parent;
        int child;
        int x = priority[size];
        HuffmanTree z = trees[size];
        size--;
        child = 2;
        while(child <= size) {
            parent = child / 2;
            if(child < size && priority[child+1] < priority[child])
                child++;
            if(x < priority[child]) break;
            priority[parent] = priority[child];
            trees[parent] = trees[child];
            child = 2 * child;
        }
        priority[child/2] = x;
        trees[child/2] = z;
    }

    public int getMinPriority() {
        // PRE: size != 0
        // return the priority in the root of the heap
        int min = priority[1];
        removeMin();
        return min;
    }

    public boolean full() {
        // return true if the heap is full otherwise return false
        return size == priority.length-1;
    }

    public void insert(int p, HuffmanTree t) {
        // insert the priority p and the associated tree t into the heap
        // PRE: !full()
        int parent;
        int child;
        size++;
        child = size;
        parent = child/2;
        priority[0] = p;
        trees[0] = t;
        while (priority[parent] > p) {
            priority[child] = priority[parent];
            trees[child] = trees[parent];
            child = parent;
            parent = child/2;
        }
        priority[child] = p;
        trees[child] = t;
    }

    public int getSize() {
        // return the number of values (priority, tree) pairs in the heap
        return size;
    }
}

这是HuffmanTree对象的类:

import java.util.*;

public class HuffmanTree {

    private class Node{
        private Node left;
        private char data;
        private Node right;
        private Node parent;

        private Node(Node L, char d, Node R, Node P) {
            left = L;
            data = d;
            right = R;
            parent = P;
        }
    }

    private Node root;
    private Node current; // value is changed by move methods

    public HuffmanTree() {
        root = null;
        current = null;
    }

    public HuffmanTree(char d) {
        // single node tree
        root = new Node(null, d, null, null);
        current = null;
    }

    public HuffmanTree(String t, char nonLeaf) {
        // Assumes t represents a post order representation of the tree
        // nonLeaf is the char value of the data in the non-leaf nodes
        // use (char) 128 for the non-leaf value
    }

    public HuffmanTree(HuffmanTree b1, HuffmanTree b2, char d) {
        // makes a new tree where b1 is the left subtree and b2 is the right subtree and d is the data in root
        root = new Node(b1.root, d, b2.root, null);
        current = null;
    }

    // use the move methods to traverse the tree
    // the move methods change the value of current
    // use these in the decoding process

    public void moveToRoot() {
        // change current to reference the root of the tree
        current = root;
    }

    public void moveToLeft() {
        // PRE: the current node is not a leaf
        current = current.left;
    }

    public void moveToRight() {
        // PRE: the current node is not a leaf
        current = current.right;
    }

    public void moveToParent() {
        // PRE: the current node is not the root
        current = current.parent;
    }

    public boolean atRoot() {
        // returns true if the current node is the root otherwise returns false
        if(current.equals(root)) {
            return true;
        }
        return false;
    }

    public boolean atLeaf() {
        // returns true if the current references a leaf otherwise return false
        if(current.left == null && current.right == null && current.parent != null) {
            return true;
        }
        return false;
    }

    public char current() {
        // returns the data value in the node referenced by current
        return current.data;
    }

    public Iterator<String> iterator(){
        //return a new path iterator object
        return new PathIterator();
    }

    public String toString() {
        // returns a string representation of the tree using postorder format
        return toString(root);
    }

    private String toString(Node r) {
        if(r == null)
            return "";

        toString(r.left);

        toString(r.right);

        return r.data + "";
    }

    public class PathIterator implements Iterator<String>{
        // the iterator returns the path (a series of 0s and 1s) to each leaf
        // DO NOT compute all paths in the constructor
        // only compute them as needed (similar to what you did in homework 2)
        // add private methods and variables as needed

        public PathIterator() {
        }

        public boolean hasNext() {
            return true;
        }

        public String next() {
            // the format of the string should be leaf value, a space, a sequence of 0s and 1s
            // the 0s and 1s indicate the path from the root the node containing the leaf value
            String result = "";
            return result;
        }

        public void remove() {
            // optional method not implemented
        }
    }

}

我了解并非所有代码都是完整的,它仍在进行中。目前,我正在尝试使用HuffmanEncode类来构建Tree。

我的问题是如何使用Binary Heap的并行数组构造二叉树?我尝试过从数组中拉出两个元素,添加它们的频率以创建一个新节点,然后将它们插入树中(如代码所示),但是我不知道如何使用HuffmanTree使其保持平行构造器将两棵树合并在一起。我如何保证一切顺利进行?

2 个答案:

答案 0 :(得分:0)

在使新节点重新插入到堆中时,需要首先将其左分支和右分支连接到从堆中拉出的两个节点。

答案 1 :(得分:0)

我已经使用min堆实现了霍夫曼编码。您可能会从中得到一些想法。

internal class HuffmanTree
{
    internal HuffmanTreeNode rootNode;
    private Dictionary<char, int> dictFrequencies;
    HuffmanPriorityQueue huffmanPriorityQueue;

    internal void Build(string input)
    {
        dictFrequencies = input.GroupBy(x => x).ToDictionary(k => k.Key, v => v.Count());
        huffmanPriorityQueue = new HuffmanPriorityQueue(dictFrequencies.Count);

        foreach (var item in dictFrequencies)
        {
            huffmanPriorityQueue.Push(new HuffmanTreeNode { Symbol = item.Key, Frequency = item.Value, Left = null, Right = null });
        }

        while(huffmanPriorityQueue.Count() >= 2)
        {
            HuffmanTreeNode leftNode = huffmanPriorityQueue.Pop();
            HuffmanTreeNode rightNode = huffmanPriorityQueue.Pop();

            HuffmanTreeNode parentNode = new HuffmanTreeNode
            {
                Frequency = leftNode.Frequency + rightNode.Frequency,
                Symbol = '*',
                Left = leftNode,
                Right = rightNode
            };

            this.rootNode = parentNode;
            huffmanPriorityQueue.Push(parentNode);
        }
    }

internal class HuffmanPriorityQueue
{
    HuffmanTreeNode[] items;
    int top = 0;

    public HuffmanPriorityQueue(int size)
    {
        items = new HuffmanTreeNode[size + 1];
    }

    internal void Push(HuffmanTreeNode node)
    {
        int i = ++top;

        while (i > 1 && items[i / 2].Frequency > node.Frequency)
        {
            items[i] = items[i / 2];
            i = i / 2;
        }

        items[i] = node;
    }

    internal HuffmanTreeNode Pop()
    {
        HuffmanTreeNode data = items[1];
        items[1] = items[top];
        items[top--] = null;

        int i = 1;
        int j = i * 2;

        while (j <= top)
        {
            if ((j + 1) <= top && items[j + 1].Frequency < items[j].Frequency)
            {
                j += 1;
            }

            if (items[j].Frequency < items[i].Frequency)
            {
                HuffmanTreeNode temp = items[j];
                items[j] = items[i];
                items[i] = temp;

                i = j;
                j = i * 2;
            }
            else
                break;
        }

        return data;
    }

    internal int Count()
    {
        return top;
    }
}

internal class HuffmanTreeNode
{
    internal char Symbol { get; set; }
    internal int Frequency { get; set; }
    internal HuffmanTreeNode Right { get; set; }
    internal HuffmanTreeNode Left { get; set; }
}