在PYTHON中为Huffman Tree创建代码字典

时间:2017-10-28 02:27:47

标签: python dictionary tree huffman-code

我正在努力实现一个霍夫曼树。我无法为树生成代码字典。以下是我到目前为止的情况:

class HuffmanNode:
   def __init__(self, char, freq):
      self.char = char
      self.freq = freq
      self.code = None
      self.left = None
      self.right = None
      # add any necessary functions you need

#returns true if tree rooted at node a comes before tree rooted at node b
#function used to determine the order of the nodes in the list of huffman nodes 
#input: 2 huffman nodes: a, b
#output: boolean value: True if a comes before b, else False
def comes_before (a, b) :
   if a.freq == b.freq:
      if a.char < b.char:
             return True
      else:
             return False
   if a.freq < b.freq:
      return True
   else:
      return False

#COUNT OCCURENCE
#opens a text file with a givern file name and counts the
#frequency of occurences of all the characters within that file.
#store the recoreded frequency in a Python list with size 256
#input: text file (name of the file passed as a string)
#output: a Python list, size of 256, contain the frequency of occurrences
#of all the characters within the file.
#NOTE: THE INDEX OF EACH ELEMENT IS ACTUALLY THE ASCII CODE OF A CHARACTER
#AND THE VALUE OF THE ELEMENT IS THE FREQUENCY
def cnt_freq(filename):
   with open(filename, encoding='utf-8-sig') as file:
      frequency = [0]*256
      string = file.read()
      for character in string:
          index = ord(character)
          frequency[index] += 1
   file.close()
   return frequency

#FIND THE MINIMUM NODE:
#with the help from function comes_before(), this function will find the
#smallest node
#input: list of unsorted huffman nodes
#output: reference to the minimu node
def findMin(list):
    min_node = list[0]
    for index in range(0, len(list)):
         current_node = list[index]
         if comes_before(current_node,min_node):
             min_node = current_node
    return min_node    

#BUILD A HUFFMAN TREE
#This function will scan the frequency list to find the characters that have frequency
#greater than 1. For each characters, the function creates a huffman node to hold the
#ascii value of the character and the freq. These nodes will be put together into a list.
#Then it is going to use the findMin function to group up 2 smallest nodes.
#The end result will be a huffman tree
#input: frequency list
#output: reference to the root node of the huffman tree    
def create_huff_tree(char_freq):
    list = []
     #making a list of huffman nodes
    for index in range (0,256):
         if char_freq[index] != 0:
             new_node = HuffmanNode(index,char_freq[index])
             list.append(new_node)
    while len(list) > 1:
         node1 = findMin(list)
         list.remove(node1)
         node2 = findMin(list)
         list.remove(node2)
         combine_freq = node1.freq + node2.freq
         combine_char = min(node1.char, node2.char)
         combine_node = HuffmanNode(combine_char,combine_freq)
         combine_node.left = node1
         combine_node.right = node2
         list.insert(0,combine_node)
    return list[0]


#BUILD A LIST FOR THE CHARACTER CODE
#this function will create a list of strings, size of 256
#it will call the helper function to get the list of codes and return the list
#input: root node of the huffman tree
#output: list of code for each character
def create_code (node):
   code_list = [""]*256
   code_list = helper_function(node, code_list)
   return code_list

#this function will traverse the huffman tree to find the leaves. (only the leaves matter)
#it will call the function generate_code to get the code of the character of the leaf
#then it will store the code into the list at appropriate position. Then it will return the list.
#input: the node to get code, the root node, the list to store code
#output: the list of codes
def helper_function(node, list):
     generating_code(node)
     if (node.left == None) and (node.right == None)  :
         list[node.char] = node.code
     else:
         if (node.left != None):
             helper_function(node.left, list)
         if (node.right != None):
             helper_function(node.right, list)
     return list

def generating_code(node, temp = ""):
   if (node.left != None):
      history_left = temp
      temp = temp + "0"
      generating_code(node.left, temp)
      temp = history_left
   if (node.right != None):
      history_temp = temp
      temp = temp + "1"
      generating_code(node.right, temp)
      temp = history_temp
   if (node.left == None) and (node.right == None):
      node.code = temp 
      print (chr(node.char),temp)   


#reads an input text file and writes, using the huffman code, the encoded
#text into an output file
#input: name of input and output file (as strings)
#output: output file holds the encoded text

def huffman_encode(in_file, out_file):
   freqlist = cnt_freq(in_file)
   hufftree = create_huff_tree(freqlist)
   codes = create_code(hufftree)
   fout = open(out_file, 'w')
   with open(in_file, encoding='utf-8-sig') as fin:
         string = fin.read()
         for character in string:
            converted_code = codes[ord(character)]
            fout.write(converted_code)
   fin.close()
   fout.close()         

def huffman_decode(freqs, encoded_file, decode_file):
   pass

def tree_preord (node):
   pass

所以我遇到的麻烦是当我尝试运行generate_code函数时,print(chr(node.char),temp)生成以下内容:

a 0000
f 0001
b 001
c 01
d 1
a 000
f 001
b 01
c 1
a 00
f 01
b 1
a 0
f 1
a 
f 
b 
c 
d 
F.a 0000
f 0001
b 001
c 01
d 1
a 000
f 001
b 01
c 1
a 00
f 01
b 1
a 0
f 1
a 
f 
b 
c 
d 
F.

所以,正如你所看到的,对于前5行,该函数确实为每个字符获得了正确的代码......对于我的测试用例。但随后,它继续前进,最终搞砸了一切。 任何人都可以帮我这个功能???? 提前谢谢!!

0 个答案:

没有答案