Question

我一直在用python创建自己的md5哈希程序。我最初使用在线deck of slides，然后从wikipedia page转换了一些伪代码。

为了处理位，我使用了python位数组库和struct将位分解为整数。

当我在一个空文件上运行代码时，它与正确的文件产生不同的结果。我希望有人能帮助我解决这个问题。

代码如下：

#!/usr/bin/env python3
from bitarray import bitarray
import math
import struct

with open('./hash_str.txt', 'r') as open_file:
    hash_str = open_file.read()

#  The following stuff is preprocessing padding
result = []
for x in hash_str:
    result.append(format(ord(x), '08b'))
a = ''.join(result)
orig_len = len(a)# % (2**64)
if orig_len % 512 != 448:
    a = a+'1'
    while len(a) % 512 != 448:
        a = a + '0'

b = bitarray(format(orig_len, '064b'))

a = bitarray(a) + b

wordA = bitarray()
wordA.frombytes(bytes.fromhex('01234567'))
wordB = bitarray()
wordB.frombytes(bytes.fromhex('89abcdef'))
wordC = bitarray()
wordC.frombytes(bytes.fromhex('fedcba98'))
wordD = bitarray()
wordD.frombytes(bytes.fromhex('76543210'))

K = [math.floor(abs(math.sin(i + 1)) * 2**32) for i in range(64)] # checked
s = [7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
     5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20,
     4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
     6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21]

def F(B, C, D):
    return (B & C) | (~B & D)

def G(B, C, D):
    return (B & D) | (C & ~D)

def H(B, C, D):
    return B ^ C ^ D

def I(B, C, D):
    return C ^ (B | ~D)

def remover(bitstr, tolen=32):
    return bitstr[len(bitstr)-tolen:]

# Breaking everything up into words
for bigbyte in range(int(a.length() / 512)):
    prechunk = a[bigbyte*512: (bigbyte+1)*512]
    chunk = []
    for tzt in range(16):
        chunk.append(prechunk[tzt*32: (tzt+1)*32])

    tmpA = wordA
    tmpB = wordB
    tmpC = wordC
    tmpD = wordD

    # Main Loop
    for round in range(64):
        print('round', round)
        if round >= 0 and round <= 15:
            cycler = F(tmpB, tmpC, tmpD)
            sp_round = round
        elif round >= 16 and round <= 31:
            cycler = G(tmpB, tmpC, tmpD)
            sp_round = (5*round + 1) % 16
        elif round >= 32 and round <= 47:
            cycler = H(tmpB, tmpC, tmpD)
            sp_round = (3*round + 5) % 16
        elif round >= 48 and round <= 63:
            cycler = I(tmpB, tmpC, tmpD)
            sp_round = (7*round) % 16
        topad = [cycler.length(), tmpA.length()]
        cycler = bitarray(remover(format(struct.unpack("<L", cycler.tobytes())[0] + struct.unpack("<L", tmpA.tobytes())[0] + K[round] + struct.unpack("<L", chunk[sp_round].tobytes())[0], '0'+str(max(topad))+'b')))
        tmpA = tmpD
        tmpD = tmpC
        tmpC = tmpB
        current = struct.unpack("<L", cycler.tobytes())[0]
        toadd = (current << s[round]) | (current >> ((len(bin(current)[2:])*8) - s[round]))
        tmpB = bitarray(remover(format(struct.unpack("<L", tmpB.tobytes())[0] + toadd, '0'+str(max([tmpB.length(), cycler.length()]))+'b')))

    topad = [wordA.length(), tmpA.length()]
    wordA = bitarray(remover(format(struct.unpack("<L", wordA.tobytes())[0] + struct.unpack("<L", tmpA.tobytes())[0], '0'+str(max(topad))+'b')))
    topad = [wordB.length(), tmpB.length()]
    wordB = bitarray(remover(format(struct.unpack("<L", wordB.tobytes())[0] + struct.unpack("<L", tmpB.tobytes())[0], '0'+str(max(topad))+'b')))
    topad = [wordC.length(), tmpC.length()]
    wordC = bitarray(remover(format(struct.unpack("<L", wordC.tobytes())[0] + struct.unpack("<L", tmpC.tobytes())[0], '0'+str(max(topad))+'b')))
    topad = [wordD.length(), tmpD.length()]
    wordD = bitarray(remover(format(struct.unpack("<L", wordD.tobytes())[0] + struct.unpack("<L", tmpD.tobytes())[0], '0'+str(max(topad))+'b')))

final = wordA+wordB+wordC+wordD
print(final.tobytes().hex())

Python MD5程序产生不正确的结果

0 个答案: