以下是我作为初学者使用的用于压缩十六进制数据的代码,我只是使用此粗略的代码设法压缩了数据。但是我需要帮助解码这种霍夫曼编码。 *
from heapq import heappush, heappop, heapify
from collections import defaultdict
import zlib
import sys
import csv
def encode(symb2freq):
"""Huffman encode the given dict mapping symbols to weights"""
heap = [[wt, [sym, ""]] for sym, wt in symb2freq.items()]
heapify(heap)
while len(heap) > 1:
lo = heappop(heap)
hi = heappop(heap)
for pair in lo[1:]:
pair[1] = '0' + pair[1]
for pair in hi[1:]:
pair[1] = '1' + pair[1]
heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
return sorted(heappop(heap)[1:], key=lambda p: (len(p[-1]), p))
txt = open('/Users/*****/Desktop/1data_cut.csv','r').read()
symb2freq = defaultdict(int)
for ch in txt:
symb2freq[ch] += 1
# in Python 3.1+:
# symb2freq = collections.Counter(txt)
huff = encode(symb2freq)
print('size:',sys.getsizeof(txt))
print ("Symbol\tWeight\tHuffman Code")
data=[]
for p in huff:
print ("%s\t%s\t%s" % (p[0], symb2freq[p[0]], p[1]))
data.append(p)
##save=open('3data.csv', 'a')
with open('/Users/****/Desktop/3data.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(data)
data1 = open('/Users/****/Desktop/3data.csv','r').read()
print('size:',sys.getsizeof(data1))
data1 = data1.encode()
compressed=(zlib.compress(data1))
print('size:',sys.getsizeof(compressed))
print(compressed)
decompressed=(zlib.decompress(compressed))
print('size:',sys.getsizeof(decompressed))
c=decompressed.decode()
print('size:',sys.getsizeof(c))
print(c)
print(data)
print(type(symb2freq))
*
The output of the given script is as follows:
size: 32802
Symbol Weight Huffman Code
F 9273 10
0 3951 010
4 4983 111
8 2622 000
1 2013 0110
C 2522 1101
E 2139 0111
2 721 00101
3 739 00110
D 615 00100
5 466 001110
6 499 001111
7 546 110001
9 569 110010
A 580 110011
B 515 110000
size: 315
size: 133
b"x\x9cE\x8d9\x12\x800\x0c\x03{^\xc1\xa4q\x93\xc2\xe6\xa6\x0c\xd7'2\xfc\xff\x1bL\x12K\x94Z\xad\xec\x90\xe5\x91\xd8\x8b\xa9\xbc!\x86,Z\x922N\xb54\xf3\xb8\xd5V\xd1Z\x93i\x9f\xcdV\xe8\xb7\xf7\xc8C\x9b\xff\xc2\xe8\x80\x17.\x18\x003\x0c*\x0b\t\xae\xac\xfeVyw\x07\xe1*\x91\xc09\xb8*N\xf7\x01hh2\x86"
size: 299
size: 315
"['F', '10']","['0', '010']","['4', '111']","['8', '000']","['1', '0110']","['C', '1101']","['E', '0111']","['2', '00101']","['3', '00110']","['D', '00100']","['5', '001110']","['6', '001111']","['7', '110001']","['9', '110010']","['A', '110011']","['B', '110000']"
[['F', '10'], ['0', '010'], ['4', '111'], ['8', '000'], ['1', '0110'], ['C', '1101'], ['E', '0111'], ['2', '00101'], ['3', '00110'], ['D', '00100'], ['5', '001110'], ['6', '001111'], ['7', '110001'], ['9', '110010'], ['A', '110011'], ['B', '110000']]
<class 'collections.defaultdict'>
我需要对霍夫曼编码的输出进行解码,但是经过几次尝试我还是找不到合适的脚本。