我正在努力实现一个霍夫曼树。我无法为树生成代码字典。以下是我到目前为止的情况:
class HuffmanNode:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.code = None
self.left = None
self.right = None
# add any necessary functions you need
#returns true if tree rooted at node a comes before tree rooted at node b
#function used to determine the order of the nodes in the list of huffman nodes
#input: 2 huffman nodes: a, b
#output: boolean value: True if a comes before b, else False
def comes_before (a, b) :
if a.freq == b.freq:
if a.char < b.char:
return True
else:
return False
if a.freq < b.freq:
return True
else:
return False
#COUNT OCCURENCE
#opens a text file with a givern file name and counts the
#frequency of occurences of all the characters within that file.
#store the recoreded frequency in a Python list with size 256
#input: text file (name of the file passed as a string)
#output: a Python list, size of 256, contain the frequency of occurrences
#of all the characters within the file.
#NOTE: THE INDEX OF EACH ELEMENT IS ACTUALLY THE ASCII CODE OF A CHARACTER
#AND THE VALUE OF THE ELEMENT IS THE FREQUENCY
def cnt_freq(filename):
with open(filename, encoding='utf-8-sig') as file:
frequency = [0]*256
string = file.read()
for character in string:
index = ord(character)
frequency[index] += 1
file.close()
return frequency
#FIND THE MINIMUM NODE:
#with the help from function comes_before(), this function will find the
#smallest node
#input: list of unsorted huffman nodes
#output: reference to the minimu node
def findMin(list):
min_node = list[0]
for index in range(0, len(list)):
current_node = list[index]
if comes_before(current_node,min_node):
min_node = current_node
return min_node
#BUILD A HUFFMAN TREE
#This function will scan the frequency list to find the characters that have frequency
#greater than 1. For each characters, the function creates a huffman node to hold the
#ascii value of the character and the freq. These nodes will be put together into a list.
#Then it is going to use the findMin function to group up 2 smallest nodes.
#The end result will be a huffman tree
#input: frequency list
#output: reference to the root node of the huffman tree
def create_huff_tree(char_freq):
list = []
#making a list of huffman nodes
for index in range (0,256):
if char_freq[index] != 0:
new_node = HuffmanNode(index,char_freq[index])
list.append(new_node)
while len(list) > 1:
node1 = findMin(list)
list.remove(node1)
node2 = findMin(list)
list.remove(node2)
combine_freq = node1.freq + node2.freq
combine_char = min(node1.char, node2.char)
combine_node = HuffmanNode(combine_char,combine_freq)
combine_node.left = node1
combine_node.right = node2
list.insert(0,combine_node)
return list[0]
#BUILD A LIST FOR THE CHARACTER CODE
#this function will create a list of strings, size of 256
#it will call the helper function to get the list of codes and return the list
#input: root node of the huffman tree
#output: list of code for each character
def create_code (node):
code_list = [""]*256
code_list = helper_function(node, code_list)
return code_list
#this function will traverse the huffman tree to find the leaves. (only the leaves matter)
#it will call the function generate_code to get the code of the character of the leaf
#then it will store the code into the list at appropriate position. Then it will return the list.
#input: the node to get code, the root node, the list to store code
#output: the list of codes
def helper_function(node, list):
generating_code(node)
if (node.left == None) and (node.right == None) :
list[node.char] = node.code
else:
if (node.left != None):
helper_function(node.left, list)
if (node.right != None):
helper_function(node.right, list)
return list
def generating_code(node, temp = ""):
if (node.left != None):
history_left = temp
temp = temp + "0"
generating_code(node.left, temp)
temp = history_left
if (node.right != None):
history_temp = temp
temp = temp + "1"
generating_code(node.right, temp)
temp = history_temp
if (node.left == None) and (node.right == None):
node.code = temp
print (chr(node.char),temp)
#reads an input text file and writes, using the huffman code, the encoded
#text into an output file
#input: name of input and output file (as strings)
#output: output file holds the encoded text
def huffman_encode(in_file, out_file):
freqlist = cnt_freq(in_file)
hufftree = create_huff_tree(freqlist)
codes = create_code(hufftree)
fout = open(out_file, 'w')
with open(in_file, encoding='utf-8-sig') as fin:
string = fin.read()
for character in string:
converted_code = codes[ord(character)]
fout.write(converted_code)
fin.close()
fout.close()
def huffman_decode(freqs, encoded_file, decode_file):
pass
def tree_preord (node):
pass
所以我遇到的麻烦是当我尝试运行generate_code函数时,print(chr(node.char),temp)生成以下内容:
a 0000
f 0001
b 001
c 01
d 1
a 000
f 001
b 01
c 1
a 00
f 01
b 1
a 0
f 1
a
f
b
c
d
F.a 0000
f 0001
b 001
c 01
d 1
a 000
f 001
b 01
c 1
a 00
f 01
b 1
a 0
f 1
a
f
b
c
d
F.
所以,正如你所看到的,对于前5行,该函数确实为每个字符获得了正确的代码......对于我的测试用例。但随后,它继续前进,最终搞砸了一切。 任何人都可以帮我这个功能???? 提前谢谢!!