以下是Python中众所周知的Rice编码(= Golomb代码M = 2^k
http://en.wikipedia.org/wiki/Golomb_coding)的实现,广泛用于压缩算法。
不幸的是它很慢。这种低速的原因是什么? (StringIO
?数据是逐字节写入的吗?)
为了加速编码,您会建议使用什么? 你会用什么技巧加速Cython的速度?
import struct
import StringIO
def put_bit(f, b):
global buff, filled
buff = buff | (b << (7-filled))
if (filled == 7):
f.write(struct.pack('B',buff))
buff = 0
filled = 0
else:
filled += 1
def rice_code(f, x, k):
q = x / (1 << k)
for i in range(q):
put_bit(f, 1)
put_bit(f, 0)
for i in range(k-1, -1, -1):
put_bit(f, (x >> i) & 1)
def compress(L, k):
f = StringIO.StringIO()
global buff, filled
buff = 0
filled = 0
for x in L: # encode all numbers
rice_code(f, x, k)
for i in range(8-filled): # write the last byte (if necessary pad with 1111...)
put_bit(f, 1)
return f.getvalue()
if __name__ == '__main__':
print struct.pack('BBB', 0b00010010, 0b00111001, 0b01111111) #see http://fr.wikipedia.org/wiki/Codage_de_Rice#Exemples
print compress([1,2,3,10],k = 3)
PS:此问题是否应移至https://codereview.stackexchange.com/?
答案 0 :(得分:1)
在构建压缩结果时,我会使用C风格的缓冲区而不是StringIO,我会尝试在编码循环中仅使用C风格的临时代码。我还注意到你可以预先初始化缓冲区以填充设置位(&#39; 1&#39;位),这样可以使编码值更快,因为你可以简单地跳过这些位。输出缓冲区。我重写了压缩功能并考虑了这些因素,并测量了结果的速度,似乎我的版本比编码器快十倍,但结果代码的可读性较差。
这是我的版本:
cimport cpython.string
cimport libc.stdlib
cimport libc.string
import struct
cdef int BUFFER_SIZE = 4096
def compress(L, k):
result = ''
cdef unsigned cvalue
cdef char *position
cdef int bit, nbit
cdef unsigned q, r
cdef unsigned ck = k
cdef unsigned mask = (1 << ck) - 1
cdef char *buff = <char *>libc.stdlib.malloc(BUFFER_SIZE)
if buff is NULL:
raise MemoryError
try:
# Initialize the buffer space is assumed to contain all set bits
libc.string.memset(buff, 0xFF, BUFFER_SIZE)
position = buff
bit = 7
for value in L:
cvalue = value
q = cvalue >> ck
r = cvalue & mask
# Skip ahead some number of pre-set one bits for the quotient
position += q / 8
bit -= q % 8
if bit < 0:
bit += 8
position += 1
# If we have gone off the end of the buffer, extract
# the result and reset buffer pointers
while position - buff >= BUFFER_SIZE:
block = cpython.string.PyString_FromStringAndSize(
buff, BUFFER_SIZE)
result = result + block
libc.string.memset(buff, 0xFF, BUFFER_SIZE)
position = position - BUFFER_SIZE
# Clear the final bit to indicate the end of the quotient
position[0] = position[0] ^ (1 << bit)
if bit > 0:
bit = bit - 1
else:
position += 1
bit = 7
# Check for buffer overflow
if position - buff >= BUFFER_SIZE:
block = cpython.string.PyString_FromStringAndSize(
buff, BUFFER_SIZE)
result = result + block
libc.string.memset(buff, 0xFF, BUFFER_SIZE)
position = buff
# Encode the remainder bits one by one
for nbit in xrange(k - 1, -1, -1):
position[0] = (position[0] & ~(1 << bit)) | \
(((r >> nbit) & 1) << bit)
if bit > 0:
bit = bit - 1
else:
position += 1
bit = 7
# Check for buffer overflow
if position - buff >= BUFFER_SIZE:
block = cpython.string.PyString_FromStringAndSize(
buff, BUFFER_SIZE)
result = result + block
libc.string.memset(buff, 0xFF, BUFFER_SIZE)
position = buff
# Advance if we have partially used the last byte
if bit < 7:
position = position + 1
# Extract the used portion of the buffer
block = cpython.string.PyString_FromStringAndSize(
buff, position - buff)
result = result + block
return result
finally:
libc.stdlib.free(buff)
def test():
a = struct.pack('BBB', 0b00010010, 0b00111001, 0b01111111) #see http://fr.wikipedia.org/wiki/Codage_de_Rice#Exemples
b = compress([1,2,3,10],k = 3)
assert a == b