我正在构建LZ77压缩。我已将整个文件作为单个字符串读取并尝试压缩它。还有其他办法吗?我将在下面附上我的代码,告诉我是否有任何更改,以便程序即使读取大文件也能非常快速地进行压缩..
导入fileinput
类分配:
def pattern(self, data):
self.skip = []
self.m = len(data)
for k in range(256): self.skip.append(self.m)
for k in range(self.m - 1): self.skip[ord(data[k])] = self.m - k - 1
self.skip = tuple(self.skip)
self.data = data
def find(self, text):
n = len(text)
if self.m > n: return -1
k = self.m - 1
while k < n:
j = self.m - 1; i = k
while j >= 0 and text[i] == self.data[j]:
j -= 1; i -= 1
if j == -1: return i + 1
k += self.skip[ord(text[k])]
return -1
类LZ77:
def __init__(self, data):
self.position = 0
self.window = ""
self.stream = data
self.streamSize = len(self.stream)
self.search = Assign()
def Encode(self):
p = 0
c = ''
lastresult = 0
found = 0
for i in range(self.streamSize):
self.search.pattern(self.stream[self.position:self.position+i+1])
result = self.search.find(self.window)
if result < 0: break
lastresult = result
found = 1
c = self.stream[self.position+i]
p = lastresult
B = 0
if i > 0: B = self.position - p
L = i
if self.streamSize > 0:
self.position += i + 1
self.streamSize -= i + 1
self.window = self.stream[:self.position]
#print B,L,c
return ((B, L), c)
def Encoder(self):
output = ""
length = self.streamSize
while self.streamSize > 0:
((B, L), C) = self.Encode()
output += str(B) + str(L) + C
return (output)
def aiyoo(filename):
enter = raw_input("enter the filename to which the original file is to e compressed to")
enter1 = enter
fob1 = open(enter,'wb')
print filename
fob = open(filename,'rb')
original = ''
for i in fob:
original += i
lz = LZ77(original)
stream = lz.Encoder()
fob1.write(stream)
fob.close()
fob1.close()
提前致谢