我希望将变量和可数 位流解码并解码为二进制字符串,数字, 64个碱基编码字符串。流的最大长度约为21 + 20 = 41位,但可以稍长43,45。
假设位将由某个数组表示。
bits = [1]
encoded = someEncoder(bits)
decoded = someDecoder(encoded)
assert bits == decoded
比特流可以更长,例如:
import random
def generateRandomBits(l):
bitsBytes = random.getrandbits(l)
bits = []
for i in range(l):
bitsBytes
bits.append(int(bitsBytes >> i & 1))
return bits
bits = generateRandomBits(21)
# [0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1]
考虑它是变量位列表,而编码/解码必须无损:
a = someEncoder([0])
b = someEncoder([0, 0])
assert a != b
答案 0 :(得分:7)
要将表示位的整数转换为8位字节(仍为整数),请使用位移:
result = 0
for bit in bits:
result = result << 1 | bit
或reduce()
:
result = reduce(lambda n, b: n << 1 | b, bits)
这可以转换为带有format(result, 'b')
的二进制字符串,可选择宽度和0
前缀,以零填充到一定长度。
演示:
>>> bits = [0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1]
>>> reduce(lambda n, b: n << 1 | b, bits)
934809
>>> result = 0
>>> for bit in bits:
... result = result << 1 | bit
...
>>> result
934809
>>> format(result, '021b')
'011100100001110011001'
如果您需要直接转到二进制字符串,只需将这些位映射到字符串并加入:
>>> ''.join(map(str, bits))
'011100100001110011001'
可以使用str.zfill()
:
>>> ''.join(map(str, bits)).zfill(64)
'0000000000000000000000000000000000000000000011100100001110011001'
答案 1 :(得分:5)
根据@ Martijn的回答,我想你想要以下内容。这使用了对前导零编码的思想来区分前导零的变量长度:
import unittest
import itertools
def encode(bits):
'''Encode n bits to an integer using a leading 1 to mark the length of the bit stream.
Example: [0,0,0] == 0b1000 == 16
'''
# start with an initial 1.
# iteratively shift current value left and OR in new bit.
return reduce(lambda n,b: n << 1 | b,bits,1)
def decode(encoded):
'''Decode a positive integer into a list of 0/1 values.
The most significant bit marks the length and is removed.
Example: 137 = 0b10001001 = [0,0,0,1,0,0,1]
'''
if encoded < 1:
raise ValueError('encoded must be > 0')
return [1 if c=='1' else 0 for c in format(encoded,'b')][1:]
class Cases(unittest.TestCase):
def testEncodeZeros(self):
for i in xrange(100):
bits = [0]*i
self.assertEqual(encode(bits),2**i)
def testEncodeOnes(self):
for i in xrange(100):
bits = [1]*i
self.assertEqual(encode(bits),2**(i+1)-1)
def testDecodeZeros(self):
for i in xrange(100):
encoded = 2**i
self.assertEqual(decode(encoded),[0]*i)
def testDecodeOnes(self):
for i in xrange(100):
encoded = 2**(i+1)-1
self.assertEqual(decode(encoded),[1]*i)
def testEncodeDecode(self):
for n in xrange(10):
for bits in itertools.product([0,1],repeat=n):
self.assertEqual(decode(encode(bits)),list(bits))
def testDecodeZero(self):
self.assertRaises(ValueError,decode,0)
def testDecodeNegative(self):
self.assertRaises(ValueError,decode,-1)
if __name__ == '__main__':
unittest.main()
输出:
testDecodeNegative (__main__.Cases) ... ok
testDecodeOnes (__main__.Cases) ... ok
testDecodeZero (__main__.Cases) ... ok
testDecodeZeros (__main__.Cases) ... ok
testEncodeDecode (__main__.Cases) ... ok
testEncodeOnes (__main__.Cases) ... ok
testEncodeZeros (__main__.Cases) ... ok
----------------------------------------------------------------------
Ran 7 tests in 0.041s
OK
>>> encode([0,0,0,0])
16
>>> decode(16)
[0, 0, 0, 0]
>>> encode([])
1
>>> decode(1)
[]
>>> encode([1,0,0,1])
25
>>> decode(25)
[1, 0, 0, 1]
>>> bin(25)
'0b11001'
>>> encode([0,0,0,1,0,0,1])
137
>>> bin(137)
'0b10001001'
>>> decode(137)
[0, 0, 0, 1, 0, 0, 1]