Question

我目前有一个非常基本的MD5哈希算法。（我只能调用atm是MD5（const char *））但是，它只限于小文件。即32位系统只能给我高达4GB或更少的文件。再加上为什么在世界上，我是否想要将任何甚至接近1GB的内容加载到内存中。 ;）这让我想到了这个问题......

如何散列大文件？我注意到当将文件的一部分加载到内存中时，OpenSSL（我将来某个时候会使用）使用MD5哈希更新功能。那么在更新＆＃39;时会发生什么？一个MD5哈希？互联网上是否有伪代码可以在任何地方执行此操作或示例？

P.S我是加密世界的新手。如果我向任何人提出任何跟进问题的答案，请原谅我。喜欢在我采取简单方法之前努力尝试。最好的学习方式！ ;）

我的MD5标题

    #ifndef MD5_H
    #define MD5_H

    #include <stdlib.h>
    #include <stdio.h>
    #include <string.h>
    #include <math.h>
    #include <ctype.h>

    #define MAX_MD5_HASH_LENGTH 32


    typedef union uwb {
        unsigned w;
        unsigned char b[4];
    } WBunion;

    typedef unsigned Digest[4];

    unsigned f0( unsigned abcd[] );

    unsigned f1( unsigned abcd[] );

    unsigned f2( unsigned abcd[] );

    unsigned f3( unsigned abcd[] );

    typedef unsigned (*DgstFctn)(unsigned a[]);

    unsigned *calcKs( unsigned *k);
    unsigned rol( unsigned v, short amt );
    unsigned *md5( const char *msg, int mlen);

    char* convertRawMd5HashToString(unsigned* rawMd5);
    int isValidMd5(const char* md5String);

    #endif

源文件

#include "md5.h"

unsigned *calcKs( unsigned *k)
{
    double s, pwr;
    int i;

    pwr = pow( 2, MAX_MD5_HASH_LENGTH);
    for (i=0; i<64; i++) {
        s = fabs(sin(1+i));
        k[i] = (unsigned)( s * pwr );
    }
    return k;
}

// ROtate v Left by amt bits
unsigned rol( unsigned v, short amt )
{
    unsigned  msk1 = (1<<amt) -1;
    return ((v>>(MAX_MD5_HASH_LENGTH-amt)) & msk1) | ((v<<amt) & ~msk1);
}

unsigned *md5( const char *message, int messageLength) 
{
    static const Digest h0 = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476 };
    static const DgstFctn ff[] = { &f0, &f1, &f2, &f3 };
    static const short M[] = { 1, 5, 3, 7 };
    static const short O[] = { 0, 1, 5, 0 };
    static const short rot0[] = { 7,12,17,22};
    static const short rot1[] = { 5, 9,14,20};
    static const short rot2[] = { 4,11,16,23};
    static const short rot3[] = { 6,10,15,21};
    static const short *rots[] = {rot0, rot1, rot2, rot3 };
    static unsigned kspace[64];
    static unsigned *k;

    static Digest h;
    Digest abcd;
    DgstFctn fctn;
    short m, o, g;
    unsigned f;
    short *rotn;
    union {
        unsigned w[16];
        char     b[64];
    }mm;
    int os = 0;
    int grp, grps, q, p;
    unsigned char *msg2;

    if (k==NULL) k= calcKs(kspace);

    for (q=0; q<4; q++) h[q] = h0[q];   // initialize
    {
        grps  = 1 + (messageLength+8)/64;
        msg2 = malloc( 64*grps);
        memcpy( msg2, message, messageLength);
        msg2[messageLength] = (unsigned char)0x80;  
        q = messageLength + 1;
        while (q < 64*grps){ msg2[q] = 0; q++ ; }
        {
            WBunion u;
            u.w = 8*messageLength;
            q -= 8;
            memcpy(msg2+q, &u.w, 4 );
        }
    }

    for (grp=0; grp<grps; grp++)
    {
        memcpy( mm.b, msg2+os, 64);
        for(q=0;q<4;q++) abcd[q] = h[q];
        for (p = 0; p<4; p++) {
            fctn = ff[p];
            rotn = rots[p];
            m = M[p]; o= O[p];
            for (q=0; q<16; q++) {
                g = (m*q + o) % 16;
                f = abcd[1] + rol( abcd[0]+ fctn(abcd) + k[q+16*p] + mm.w[g], rotn[q%4]);

                abcd[0] = abcd[3];
                abcd[3] = abcd[2];
                abcd[2] = abcd[1];
                abcd[1] = f;
            }
        }
        for (p=0; p<4; p++)
            h[p] += abcd[p];
        os += 64;
    }

    if( msg2 )
        free( msg2 );

    return h;
}

char* convertRawMd5HashToString(unsigned* rawMd5)
{
    static char* outputBuffer[MAX_MD5_HASH_LENGTH];
    memset(outputBuffer, 0, MAX_MD5_HASH_LENGTH);

    int j, k;
    WBunion u;
    for (j=0;j<4; j++){
        u.w = rawMd5[j];
        for (k=0;k<4;k++) sprintf(outputBuffer, "%s%02x", outputBuffer, u.b[k]);
    }

    return outputBuffer;
}

unsigned f0( unsigned abcd[] ){
    return ( abcd[1] & abcd[2]) | (~abcd[1] & abcd[3]); }

unsigned f1( unsigned abcd[] ){
    return ( abcd[3] & abcd[1]) | (~abcd[3] & abcd[2]);}

unsigned f2( unsigned abcd[] ){
    return  abcd[1] ^ abcd[2] ^ abcd[3];}

unsigned f3( unsigned abcd[] ){
    return abcd[2] ^ (abcd[1] |~ abcd[3]);}

int isValidMd5(const char* md5String)
{
    if(strlen(md5String) != MAX_MD5_HASH_LENGTH)
        return 0;

    for (int i = 0; i < MAX_MD5_HASH_LENGTH; ++i) {
        char c = tolower(md5String[i]);
        if((c >= 'a' && c <= 'f') || isdigit(c)) {
            continue;
        } else {
            return 0;
        }
    }

    return 1;
}

我无法找到原作者，但如果有人知道是谁写了这个片段的大部分内容，请告诉我。谢谢。：）

Answer 1

填充发生在第一个for循环中，你需要将其推迟到数据结束。然后你可以通过第二个for循环运行尽可能多的数据，直到你到达结尾，然后添加填充。这也将允许代码进行更改，以便在不需要进行数据时进行复制。将其拆分为init，update和finalize函数。这不应该是难以编码。

当然更好的想法是使用已经将功能拆分为init，update，finalize的版本。有趣的是，Apple Common Crypto是开源的，用“C”编写，看一看。 MD code代码位于CommonDigestPriv.h。

更新MD5哈希？

我的MD5标题

源文件

1 个答案: