我写了自己的霍夫曼代码版本,它没有任何语法错误。它会编译,但输出未显示正确的值。我的某些功能出现逻辑错误,找不到我在哪里犯错误。
我尝试使用MS Visual Studio调试器对其进行调试,但未成功。
这是我的霍夫曼密码版本。
我正在发布整个程序,因为我不确定我的错误在哪里。
/*
Author: Stevan Milic
Date: 05.05.2018.
Course: Data Structures II
Professor: Dr. Claude Chaudet
Description: Huffman Codes
*/
#include <iostream>
#include <cstdlib>
using std::cout;
#define MAX_TREE_HEIGHT 1000
// A Huffman tree node
struct MinHeapNode
{
char codeword; // I chose char because we are inputing alphabetic letters
// The reason why I chose unsigned data type is because an unsigned integer can never be negative.
// In this case the frequency and the capacity of a character cannot be negative.
unsigned freq; // Frequency of the character - how many times does it occur
struct MinHeapNode *left, *right; // Left and Right children
};
struct MinHeap // Collection of nodes
{
unsigned size; // Size of the heap
unsigned capacity; // Capacity of the heap
struct MinHeapNode** array; // Heap node pointers array
};
// Function to dynamically alocate a new heap node with provided character (codeword) and its frequency
struct MinHeapNode* newHeapNode(char codeword, unsigned freq)
{
struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode));
temp->left = temp->right = NULL;
temp->codeword = codeword;
temp->freq = freq;
return temp;
}
// Creating a new dynamically allocated min heap with given capacity
struct MinHeap* createMinHeap(unsigned capacity)
{
struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
minHeap->size = 0; // Setting the size to 0
minHeap->capacity = capacity; // Inserting the given capacity
// Inserting into the heap node pointers array
minHeap->array= (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
return minHeap;
}
// Swap function to swap two min heap nodes
void swap(struct MinHeapNode** a, struct MinHeapNode** b)
{
struct MinHeapNode* temp2 = *a;
*a = *b;
*b = temp2;
}
// minHeapify function
void minHeapify(struct MinHeap* minHeap, int index)
{
int smallest = index;
int leftSon = 2 * index + 1;
int rightSon = 2 * index + 2;
if (leftSon < minHeap->size && minHeap->array[leftSon]->freq < minHeap->array[smallest]->freq)
smallest = leftSon;
if (rightSon < minHeap->size && minHeap->array[rightSon]-> freq < minHeap->array[smallest]->freq)
smallest = rightSon;
if (smallest != index)
{
swap(&minHeap->array[smallest], &minHeap->array[index]);
minHeapify(minHeap, smallest);
}
}
// Checking if the size of the heap is 1
int heapSizeOne(struct MinHeap* minHeap)
{
return (minHeap->size == 1);
}
// Extracting minimum value node from the heap
struct MinHeapNode* extractMin(struct MinHeap* minHeap)
{
struct MinHeapNode* temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
// Inserting a new node into min heap
void insert(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode)
{
++minHeap->size;
int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq)
{
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = minHeapNode;
}
// Build function to build min heap
void build(struct MinHeap* minHeap)
{
int n = minHeap->size - 1;
for (int i = (n - 1) / 2; i >= 0; --i)
minHeapify(minHeap, i);
}
// Display function to print an array
void display(int arr[], int n)
{
int i;
for (i = 0; i < n; ++i)
std::cout << arr[i];
std::cout << "\n";
}
// Function to check if the node is a leaf
int isLeaf(struct MinHeapNode* root)
{
return !(root->left) && !(root->right);
}
// Creating a min heap with given capacity equivalent to size and inserts all the codewords and their frequency.
struct MinHeap* create(char codeword[], int freq[], int size)
{
struct MinHeap* minHeap = createMinHeap(size);
for (int i = 0; i < size; ++i)
minHeap->array[i] = newHeapNode(codeword[i], freq[i]);
minHeap->size = size;
build(minHeap);
return minHeap;
}
// Function that builds the Huffman tree
struct MinHeapNode* buildHT(char codeword[], int freq[], int size)
{
struct MinHeapNode *left, *right, *top;
// Creating a min heap with given capacity equivalent to size and inserts all the codewords and their frequency.
struct MinHeap* minHeap = create(codeword, freq, size);
// while loop runs as long as the size of heap doesn't reach 1
while (!heapSizeOne(minHeap))
{
// Getting the two minimums from min heap
left = extractMin(minHeap);
right = extractMin(minHeap);
// The frequency of top is computed as the sum of the frequencies of left and right nodes.
top = newHeapNode('_', left->freq + right->freq);
top->left = left;
top->right = right;
insert(minHeap, top);
}
// The remaining value is the root node which completes the tree
return extractMin(minHeap);
}
// Prints huffman codes from the root of
// Displaying Huffman codes
void displayHC(struct MinHeapNode* root, int arr[], int top)
{
// Left side is given the value 0
if (root->left)
{
arr[top] = 0;
displayHC(root->left, arr, top + 1);
}
// Right side is given the value 1
if (root->right)
{
arr[top] = 1;
displayHC(root->right, arr, top + 1);
}
// If this is a leaf node, print the character and its code.
if (isLeaf(root))
{
std::cout << root->codeword << ": ";
display(arr, top);
}
}
// Building a Huffman Tree and displaying the codes
void HuffmanCodes(char codeword[], int freq[], int size)
{
// Building a HT
struct MinHeapNode* root = buildHT(codeword, freq, size);
// Displaying the HT we built
int arr[MAX_TREE_HEIGHT], top = 0;
displayHC(root, arr, top);
}
// I used the example from the PP presentation in the Files section - The Hoffman Coding
int main()
{
std::cout << "A|4\t B|0\t C|2\t D|1\t C|5\t E|1\t F|0\t G|1\t H|1\t I|0\t J|0\t K|3\t L|2\t M|0\t N|1\t\nO|2\t P|0\t Q|3\t R|5\t S|4\t T|2\t U|0\t V|0\t W|1\t X|0\t Y|0\t Z|0\t _|6\n" << endl;
char arr[] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '_' };
int freq[] = { 4, 0, 2, 1, 5, 1, 0, 1, 1, 0, 0, 3, 2, 0, 1, 2, 0, 3, 5, 4, 2, 0, 0, 1, 0, 0, 6};
int size = sizeof(arr) / sizeof(arr[0]);
HuffmanCodes(arr, freq, size);
std::cout << "\n\n";
return 0;
}
例如,我希望代码输出A:0101,但它输出A:001 另一个示例,我期望C为C:1111,但输出为C:11111
答案 0 :(得分:0)
我认为您的代码没有任何问题。实际上,问题出在您的问题上,因为您假设对于给定的发行版只有一个正确的霍夫曼代码。从来都不是真的。即使长度不可互换,也可以通过在任意节点上更改0和1来获得许多可能的霍夫曼代码。在您的特定情况下,您拥有C,它属于频率为2的字母CMPU组。您可以根据需要交换它们的长度和代码,而不会影响平均代码长度。特别是两个应具有5的长度,两个应具有4的长度。检查在手动构建的输出中是否为真。对于A,频率为T的4也是一样。因此,您应该检查A和T的代码长度是否为3和4,或者反之亦然。
一种简单的检查方法是计算分布的平均代码长度,该长度应为3.886,非常接近于熵3.818。
我认为您没有检查将数据放入堆后返回的顺序,所以可能会发生奇怪的事情。