我一直在用python创建自己的md5哈希程序。我最初使用在线deck of slides,然后从wikipedia page转换了一些伪代码。
为了处理位,我使用了python位数组库和struct将位分解为整数。
当我在一个空文件上运行代码时,它与正确的文件产生不同的结果。我希望有人能帮助我解决这个问题。
代码如下:
#!/usr/bin/env python3
from bitarray import bitarray
import math
import struct
with open('./hash_str.txt', 'r') as open_file:
hash_str = open_file.read()
# The following stuff is preprocessing padding
result = []
for x in hash_str:
result.append(format(ord(x), '08b'))
a = ''.join(result)
orig_len = len(a)# % (2**64)
if orig_len % 512 != 448:
a = a+'1'
while len(a) % 512 != 448:
a = a + '0'
b = bitarray(format(orig_len, '064b'))
a = bitarray(a) + b
wordA = bitarray()
wordA.frombytes(bytes.fromhex('01234567'))
wordB = bitarray()
wordB.frombytes(bytes.fromhex('89abcdef'))
wordC = bitarray()
wordC.frombytes(bytes.fromhex('fedcba98'))
wordD = bitarray()
wordD.frombytes(bytes.fromhex('76543210'))
K = [math.floor(abs(math.sin(i + 1)) * 2**32) for i in range(64)] # checked
s = [7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21]
def F(B, C, D):
return (B & C) | (~B & D)
def G(B, C, D):
return (B & D) | (C & ~D)
def H(B, C, D):
return B ^ C ^ D
def I(B, C, D):
return C ^ (B | ~D)
def remover(bitstr, tolen=32):
return bitstr[len(bitstr)-tolen:]
# Breaking everything up into words
for bigbyte in range(int(a.length() / 512)):
prechunk = a[bigbyte*512: (bigbyte+1)*512]
chunk = []
for tzt in range(16):
chunk.append(prechunk[tzt*32: (tzt+1)*32])
tmpA = wordA
tmpB = wordB
tmpC = wordC
tmpD = wordD
# Main Loop
for round in range(64):
print('round', round)
if round >= 0 and round <= 15:
cycler = F(tmpB, tmpC, tmpD)
sp_round = round
elif round >= 16 and round <= 31:
cycler = G(tmpB, tmpC, tmpD)
sp_round = (5*round + 1) % 16
elif round >= 32 and round <= 47:
cycler = H(tmpB, tmpC, tmpD)
sp_round = (3*round + 5) % 16
elif round >= 48 and round <= 63:
cycler = I(tmpB, tmpC, tmpD)
sp_round = (7*round) % 16
topad = [cycler.length(), tmpA.length()]
cycler = bitarray(remover(format(struct.unpack("<L", cycler.tobytes())[0] + struct.unpack("<L", tmpA.tobytes())[0] + K[round] + struct.unpack("<L", chunk[sp_round].tobytes())[0], '0'+str(max(topad))+'b')))
tmpA = tmpD
tmpD = tmpC
tmpC = tmpB
current = struct.unpack("<L", cycler.tobytes())[0]
toadd = (current << s[round]) | (current >> ((len(bin(current)[2:])*8) - s[round]))
tmpB = bitarray(remover(format(struct.unpack("<L", tmpB.tobytes())[0] + toadd, '0'+str(max([tmpB.length(), cycler.length()]))+'b')))
topad = [wordA.length(), tmpA.length()]
wordA = bitarray(remover(format(struct.unpack("<L", wordA.tobytes())[0] + struct.unpack("<L", tmpA.tobytes())[0], '0'+str(max(topad))+'b')))
topad = [wordB.length(), tmpB.length()]
wordB = bitarray(remover(format(struct.unpack("<L", wordB.tobytes())[0] + struct.unpack("<L", tmpB.tobytes())[0], '0'+str(max(topad))+'b')))
topad = [wordC.length(), tmpC.length()]
wordC = bitarray(remover(format(struct.unpack("<L", wordC.tobytes())[0] + struct.unpack("<L", tmpC.tobytes())[0], '0'+str(max(topad))+'b')))
topad = [wordD.length(), tmpD.length()]
wordD = bitarray(remover(format(struct.unpack("<L", wordD.tobytes())[0] + struct.unpack("<L", tmpD.tobytes())[0], '0'+str(max(topad))+'b')))
final = wordA+wordB+wordC+wordD
print(final.tobytes().hex())