包裹的FileStream与文件创建了不同的哈希

时间:2020-01-16 10:17:10

标签: c# hash sha256

我需要读取一个大文件,删除一些空格字符,压缩内容并从中创建一个双SHA256哈希。为了避免将整个文件加载到内存中(这是不可能的,因为某些文件有数百MB)并避免性能瓶颈,我希望只读取一次整个文件。因此,我专门包装了FileStream并完成了工作。 DeflaterOutputStream来自SharpZipLib。

public class DeflateAndHashStream : Stream
{
    private readonly FileStream _input;
    private readonly MemoryStream _compressedFile;
    private readonly DeflaterOutputStream _deflate;

    public DeflateAndHashStream(FileStream input)
    {
        _input = input;
        _compressedFile = new MemoryStream();
        _deflate = new DeflaterOutputStream(_compressedFile);
    }

    public override void Flush()
    {
        throw new NotImplementedException();
    }

    public override long Seek(long offset, SeekOrigin origin)
    {
        throw new NotImplementedException();
    }

    public override void SetLength(long value)
    {
        throw new NotImplementedException();
    }

    public override int Read(byte[] buffer, int offset, int count)
    {
        byte[] internalBuffer = new byte[buffer.Length];

        //read bufferbytes from the file
        int readedBytes = _input.Read(internalBuffer, 0, buffer.Length);

        //Are we done reading the file?
        if (readedBytes == 0) return 0;

        //remove whitespaces from internalBuffer
        //method takes bytearray and cut whitespace chars in place
        //returns the count of removed characters
        //so this line corrects the readed bytes
        readedBytes -= RemoveWhitespace(ref internalBuffer);

        //make the result available for stream chaining
        buffer = internalBuffer;
        //in parall compress the file into internal memorystream
        _deflate.Write(internalBuffer, 0, internalBuffer.Length);

        return readedBytes;
    }

    public byte[] GetCompressedData()
    {
        _deflate.Flush();
        _deflate.Finish();
        return _compressedFile.ToArray();
    }

    public override void Write(byte[] buffer, int offset, int count)
    {
        throw new NotImplementedException();
    }

    public override bool CanRead => _input.CanRead;
    public override bool CanSeek => _input.CanSeek;
    public override bool CanWrite => _input.CanWrite;
    public override long Length => _input.Length;
    public override long Position
    {
        get => _input.Position;
        set => _input.Position = value;
    }

    private int RemoveWhitespace(ref byte[] digest)
    {
        var output = new MemoryStream();
        int removedWhiteSpaces = 0;
        foreach (var actualByte in digest)
            switch (actualByte)
            {
                case 10:
                case 13:
                case 26:
                    // ignore this character
                    removedWhiteSpaces++;
                    break;

                default:
                    output.WriteByte(actualByte);
                    break;
            }

        digest = output.ToArray();
        return removedWhiteSpaces;
    }
}

我这样叫DeflateAndHashStream

public string[] CreateHashAndZipFile(string filePath)
    {
        if(!File.Exists(filePath)) throw new FileNotFoundException();

        string[] result = new string[2];

        using (FileStream fs = new FileStream(filePath, FileMode.Open))
        {
            using (DeflateAndHashStream defhash = new DeflateAndHashStream(fs))
            {
                using (SHA256 sha = new SHA256Managed())
                {
                    result[0] = Convert.ToBase64String(sha.ComputeHash(sha.ComputeHash(defhash)));
                    result[1] = Convert.ToBase64String(defhash.GetCompressedData());
                }
            }
        }

        return result;
    }

不幸的是,这会创建一个完全不同的散列,如下所示:

string test1;
using (FileStream fs = new FileStream("test.txt", FileMode.Open))
{
    using (SHA256Managed sha = new SHA256Managed())
    {
        test1 = Convert.ToBase64String(sha.ComputeHash(sha.ComputeHash(fs)));
    }
}

使用仅包含单个A(0x65)的文件test.txt

我在做什么错了?

0 个答案:

没有答案