所以我构建了一个用霍夫曼树编码字符串的程序。它工作,但它输出错误的编码。我从编码字符串“abcdefgh”得到的结果是
Char, Freq, Code
'a' -- 1 -- '000'
'b' -- 1 -- '111'
'c' -- 1 -- '11'
'd' -- 1 -- '101'
'e' -- 1 -- '10'
'f' -- 1 -- '100'
'g' -- 1 -- '110'
'h' -- 1 -- '1'
虽然正确答案应该是每个字母都有三位,如下所示:
char, Freq, Code
a 1 000
h 1 001
e 1 010
c 1 011
f 1 100
d 1 101
g 1 110
b 1 111
问题是我不知道为什么会这样。这是我的代码:
Node.java
/**
* One node in the Huffman tree. Has getters and setters and a implementation of
* the Comparable interface so we can compare the nodes when in the heap.
*
*/
public class Node implements Comparable<Node> {
public class Node implements Comparable<Node> {
private char data;
private int freq;
private Node left;
private Node right;
private int code = -1;
private int length = 0;
public Node(char data, int freq) {
this.data = data;
this.freq = freq;
}
public Node(char data, int freq, Node left, Node right) {
this.data = data;
this.freq = freq;
this.left = left;
this.right = right;
}
public char getData() {
return data;
}
public Node getLeft() {
return left;
}
public Node getRight() {
return right;
}
public int getCode() {
return code;
}
public String getCodeAsString() {
String str = Integer.toBinaryString(code);
String leadZero = "";
for (int i = str.length(); i < length; i++ ) {
leadZero += "0";
}
// System.out.println("Leading zeros: " +leadZero + " The binary string: " + str + " length " + length);
return leadZero + str;
}
public void setCode(int code) {
this.code = code;
}
public int getFreq() {
return freq;
}
/**
* If left or right child is null, this is a leaf.
*
* @return
*/
public boolean isLeafNode() {
return left == null && right == null;
}
/**
* Needs to be able to compare nodes in the order of their frequencies for
* the heap.
*/
@Override
public int compareTo(Node o) {
if (o instanceof Node) {
return freq - ((Node) o).freq;
}
return -1;
}
public void setLength(int length2) {
this.length=length2;
}
}
HuffmanTree.java
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.stream.Collectors;
public class HuffmanTree {
private PriorityQueue<Node> pq;
private Node[] nodeArray;
private String s;
private char chars[];
private int freqs[];
/**
* Constructor that initializes variables and arrays.
*
* @param s- the string to compress.
*/
public HuffmanTree(String s) {
System.out.println("String to encode: '" + s + "'");
this.s = s;
buildArrays();
pq = new PriorityQueue<Node>(freqs.length);
nodeArray = new Node[chars.length];
// Create Nodes from the chars and occurrences and put them in a array.
for (int i = 0; i < chars.length; i++) {
nodeArray[i] = new Node(chars[i], freqs[i]);
}
buildTree();
}
private void buildTree() {
Node left, right, top;
// Put the Nodes from the node array in a min-heap/priorityQueue.
for (int i = 0; i < nodeArray.length; i++) {
pq.add(nodeArray[i]);
}
// Find two trees with least freq and creates a new node and inserts it.
while (pq.size() > 1) {
left = pq.remove();
right = pq.remove();
int newFreq = left.getFreq() + right.getFreq();
top = new Node('$', newFreq, left, right);
pq.add(top);
}
// Now the min heap only contains one node with the character $
// and it has all the other nodes as children.
// It's frequency should be the same as the total
// number of characters in the string.
// This is our complete tree.
encode(pq.remove(), 0);
}
/**
* Set's the encoding for every node by depth first traversal through the
* tree. Used recursivly and using bitwise operators.
*
* @param n - the current node.
* @param c - the code for the current node.
*/
private int encode(Node n, int c) {
if (!n.isLeafNode()) {
System.out.println("COUNTED");
length++;
// While going left append 0
c = c << 1;
c = encode(n.getLeft(), c);
// while going right, append 1
length++;
c = (c << 1) | 1;
c = encode(n.getRight(), c);
} else {
// Set the code of the node.
System.out.println("The code as int " + c + " and the length is " + length + " char is " + n.getData());
n.setLength(length);
n.setCode(c);
}
length=0;
return c >> 1;
}
/**
* Finds occurencess of each letter in the given string and initializes the
* arrays containing the letters and their frequencies.
*
* @param s
*/
private void buildArrays() {
List<String> original = s.chars().mapToObj(i -> (char) i).map(String::valueOf).collect(Collectors.toList());
List<String> duplicateRemoved = s.chars().mapToObj(i -> (char) i).map(String::valueOf).distinct()
.collect(Collectors.toList());
ArrayList<Integer> Occurrences = new ArrayList<>();
int counter = 1;
for (String aList : duplicateRemoved) {
counter = (int) original.stream().filter(s1 -> s1.equals(aList)).count();
Occurrences.add(counter);
}
// Assign the values to the arrays:
freqs = new int[duplicateRemoved.size()];
chars = new char[duplicateRemoved.size()];
for (int i = 0; i < chars.length; i++) {
chars[i] = duplicateRemoved.get(i).charAt(0);
freqs[i] = Occurrences.get(i);
}
}
/**
* Just for pretty printing all the values. Loops through the nodes and
* arrays to print their values. Also does some calculations to show the
* number of bits and the percentage.
*/
public void printEncoding() {
System.out.println("Printing encoding...");
int CHARSIZE = 16;
int bits = 0;
Map<Character, String> ht = new Hashtable<>();
System.out.println("Char, Freq, Code");
for (Node n : nodeArray) {
bits += n.getFreq() * n.getCodeAsString().length();
System.out.println("'" + n.getData() + "' -- " + n.getFreq() + " -- '" + n.getCodeAsString() + "'");
ht.put(n.getData(), n.getCodeAsString());
}
System.out.println("'" + s + "'" + " is encoded as:");
char[] arr = s.toCharArray();
for (char c : arr) {
System.out.print(ht.get(c) + " ");
}
int original = (s.length() * CHARSIZE);
int difference = (s.length() * CHARSIZE) - bits;
float p1 = bits * 1f / original;
float p2 = (1 - p1) * 100;
System.out.println("\nOrg compr diff percent");
System.out.println(original + "----" + bits + "----" + difference + "----" + p2 + "%");
System.out.println("\n");
}
}
根据评论更新了代码。 正如评论所提到的,我错过了领先的零。现在我不知道如何正确修复它。我可能应该以某种方式存储每个代码的长度。