霍夫曼树产生错误的编码

时间:2017-03-14 17:50:15

标签: java huffman-code

所以我构建了一个用霍夫曼树编码字符串的程序。它工作,但它输出错误的编码。我从编码字符串“abcdefgh”得到的结果是

Char,  Freq,  Code
'a' -- 1 -- '000'
'b' -- 1 -- '111'
'c' -- 1 -- '11'
'd' -- 1 -- '101'
'e' -- 1 -- '10'
'f' -- 1 -- '100'
'g' -- 1 -- '110'
'h' -- 1 -- '1'

虽然正确答案应该是每个字母都有三位,如下所示:

char, Freq, Code
a    1   000
h    1   001
e    1   010
c    1   011
f    1   100
d    1   101
g    1   110
b    1   111

问题是我不知道为什么会这样。这是我的代码:

Node.java

    /**
     * One node in the Huffman tree. Has getters and setters and a implementation of
     * the Comparable interface so we can compare the nodes when in the heap.
     * 
     */
    public class Node implements Comparable<Node> {

    public class Node implements Comparable<Node> {

    private char data;
    private int freq;
    private Node left;
    private Node right;
    private int code = -1;
    private int length = 0;

    public Node(char data, int freq) {
        this.data = data;
        this.freq = freq;
    }

    public Node(char data, int freq, Node left, Node right) {
        this.data = data;
        this.freq = freq;
        this.left = left;
        this.right = right;
    }

    public char getData() {
        return data;
    }

    public Node getLeft() {
        return left;
    }

    public Node getRight() {
        return right;
    }

    public int getCode() {
        return code;
    }

    public String getCodeAsString() {



        String str = Integer.toBinaryString(code);
        String leadZero = "";

        for (int i = str.length(); i < length; i++ ) {
            leadZero += "0";
        }
//      System.out.println("Leading zeros: " +leadZero + " The binary string: " + str + " length " + length);

        return leadZero + str;
    }

    public void setCode(int code) {
        this.code = code;
    }

    public int getFreq() {
        return freq;
    }

    /**
     * If left or right child is null, this is a leaf.
     * 
     * @return
     */
    public boolean isLeafNode() {
        return left == null && right == null;
    }

    /**
     * Needs to be able to compare nodes in the order of their frequencies for
     * the heap.
     */
    @Override
    public int compareTo(Node o) {
        if (o instanceof Node) {
            return freq - ((Node) o).freq;
        }
        return -1;
    }

    public void setLength(int length2) {
        this.length=length2;

    }
}

HuffmanTree.java

import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.stream.Collectors;

public class HuffmanTree {

    private PriorityQueue<Node> pq;
    private Node[] nodeArray;
    private String s;
    private char chars[];
    private int freqs[];

    /**
     * Constructor that initializes variables and arrays.
     * 
     * @param s- the string to compress.
     */
    public HuffmanTree(String s) {
        System.out.println("String to encode: '" + s + "'");
        this.s = s;
        buildArrays();
        pq = new PriorityQueue<Node>(freqs.length);
        nodeArray = new Node[chars.length];

        // Create Nodes from the chars and occurrences and put them in a array.
        for (int i = 0; i < chars.length; i++) {
            nodeArray[i] = new Node(chars[i], freqs[i]);
        }
        buildTree();
    }

    private void buildTree() {
        Node left, right, top;
        // Put the Nodes from the node array in a min-heap/priorityQueue.
        for (int i = 0; i < nodeArray.length; i++) {
            pq.add(nodeArray[i]);
        }
        // Find two trees with least freq and creates a new node and inserts it.
        while (pq.size() > 1) {
            left = pq.remove();
            right = pq.remove();
            int newFreq = left.getFreq() + right.getFreq();
            top = new Node('$', newFreq, left, right);
            pq.add(top);
        }
        // Now the min heap only contains one node with the character $
        // and it has all the other nodes as children.
        // It's frequency should be the same as the total
        // number of characters in the string.
        // This is our complete tree.
        encode(pq.remove(), 0);
    }

    /**
     * Set's the encoding for every node by depth first traversal through the
     * tree. Used recursivly and using bitwise operators.
     * 
     * @param n - the current node.
     * @param c - the code for the current node.
     */
private int encode(Node n, int c) {

    if (!n.isLeafNode()) {
        System.out.println("COUNTED");
        length++;


        // While going left append 0
        c = c << 1;
        c = encode(n.getLeft(), c);
        // while going right, append 1
        length++;
        c = (c << 1) | 1;
        c = encode(n.getRight(), c);

    } else {
        // Set the code of the node.
        System.out.println("The code as int " + c + " and the length is " + length + "  char is " + n.getData());
        n.setLength(length);
        n.setCode(c);

    }
    length=0;
    return c >> 1;
}

    /**
     * Finds occurencess of each letter in the given string and initializes the
     * arrays containing the letters and their frequencies.
     * 
     * @param s
     */
    private void buildArrays() {
        List<String> original = s.chars().mapToObj(i -> (char) i).map(String::valueOf).collect(Collectors.toList());
        List<String> duplicateRemoved = s.chars().mapToObj(i -> (char) i).map(String::valueOf).distinct()
                .collect(Collectors.toList());

        ArrayList<Integer> Occurrences = new ArrayList<>();
        int counter = 1;
        for (String aList : duplicateRemoved) {
            counter = (int) original.stream().filter(s1 -> s1.equals(aList)).count();
            Occurrences.add(counter);
        }
        // Assign the values to the arrays:
        freqs = new int[duplicateRemoved.size()];
        chars = new char[duplicateRemoved.size()];
        for (int i = 0; i < chars.length; i++) {
            chars[i] = duplicateRemoved.get(i).charAt(0);
            freqs[i] = Occurrences.get(i);
        }
    }

    /**
     * Just for pretty printing all the values. Loops through the nodes and
     * arrays to print their values. Also does some calculations to show the
     * number of bits and the percentage.
     */
    public void printEncoding() {
        System.out.println("Printing encoding...");
        int CHARSIZE = 16;
        int bits = 0;
        Map<Character, String> ht = new Hashtable<>();
        System.out.println("Char,  Freq,  Code");
        for (Node n : nodeArray) {
            bits += n.getFreq() * n.getCodeAsString().length();
            System.out.println("'" + n.getData() + "' -- " + n.getFreq() + " -- '" + n.getCodeAsString() + "'");
            ht.put(n.getData(), n.getCodeAsString());
        }
        System.out.println("'" + s + "'" + " is encoded as:");
        char[] arr = s.toCharArray();
        for (char c : arr) {
            System.out.print(ht.get(c) + " ");
        }
        int original = (s.length() * CHARSIZE);
        int difference = (s.length() * CHARSIZE) - bits;
        float p1 = bits * 1f / original;
        float p2 = (1 - p1) * 100;
        System.out.println("\nOrg   compr  diff   percent");
        System.out.println(original + "----" + bits + "----" + difference + "----" + p2 + "%");
        System.out.println("\n");
    }
}

根据评论更新了代码。 正如评论所提到的,我错过了领先的零。现在我不知道如何正确修复它。我可能应该以某种方式存储每个代码的长度。

0 个答案:

没有答案