我正在寻找散列大文件内容的解决方案(32位操作系统中的文件可能超过2GB)。那有什么简单的解决方案吗?或者只是按部分阅读并加载到缓冲区?
答案 0 :(得分:11)
Driis的解决方案听起来更灵活,但HashAlgorithm.ComputeHash
也会接受Stream
作为参数。
答案 1 :(得分:5)
使用TransformBlock
和TransformFinalBlock
逐块计算哈希值,因此您无需将整个文件读入内存。 (第一个链接中有一个很好的例子 - and another one in this previous question)。
答案 2 :(得分:1)
如果您选择使用TransformBlock
,则可以安全地忽略最后一个参数并将outputBuffer设置为null
。 TransformBlock将从输入复制到输出数组 - 但为什么你只想简单地复制位?
此外,所有mscorlib HashAlgorithms都可以正常工作,即块大小似乎不会影响哈希输出;以及是否在一个数组中传递数据然后通过更改inputOffset
或者通过传递较小的单独数组进行散列来散列数据并不重要。我使用以下代码验证了这一点:
(这只是稍长一点,所以人们可以自己验证HashAlgorithm
实施是否合理。
public static void Main() {
RandomNumberGenerator rnd = RandomNumberGenerator.Create();
byte[] input = new byte[20];
rnd.GetBytes(input);
Console.WriteLine("Input Data: " + BytesToStr(input));
var hashAlgoTypes = Assembly.GetAssembly(typeof(HashAlgorithm)).GetTypes()
.Where(t => typeof(HashAlgorithm).IsAssignableFrom(t) && !t.IsAbstract);
foreach (var hashType in hashAlgoTypes)
new AlgoTester(hashType).AssertOkFor(input.ToArray());
}
public static string BytesToStr(byte[] bytes) {
StringBuilder str = new StringBuilder();
for (int i = 0; i < bytes.Length; i++)
str.AppendFormat("{0:X2}", bytes[i]);
return str.ToString();
}
public class AlgoTester {
readonly byte[] key;
readonly Type type;
public AlgoTester(Type type) {
this.type=type;
if (typeof(KeyedHashAlgorithm).IsAssignableFrom(type))
using(var algo = (KeyedHashAlgorithm)Activator.CreateInstance(type))
key = algo.Key.ToArray();
}
public HashAlgorithm MakeAlgo() {
HashAlgorithm algo = (HashAlgorithm)Activator.CreateInstance(type);
if (key != null)
((KeyedHashAlgorithm)algo).Key = key;
return algo;
}
public byte[] GetHash(byte[] input) {
using(HashAlgorithm sha = MakeAlgo())
return sha.ComputeHash(input);
}
public byte[] GetHashOneBlock(byte[] input) {
using(HashAlgorithm sha = MakeAlgo()) {
sha.TransformFinalBlock(input, 0, input.Length);
return sha.Hash;
}
}
public byte[] GetHashMultiBlock(byte[] input, int size) {
using(HashAlgorithm sha = MakeAlgo()) {
int offset = 0;
while (input.Length - offset >= size)
offset += sha.TransformBlock(input, offset, size, input, offset);
sha.TransformFinalBlock(input, offset, input.Length - offset);
return sha.Hash;
}
}
public byte[] GetHashMultiBlockInChunks(byte[] input, int size) {
using(HashAlgorithm sha = MakeAlgo()) {
int offset = 0;
while (input.Length - offset >= size)
offset += sha.TransformBlock(input.Skip(offset).Take(size).ToArray()
, 0, size, null, -24124512);
sha.TransformFinalBlock(input.Skip(offset).ToArray(), 0
, input.Length - offset);
return sha.Hash;
}
}
public void AssertOkFor(byte[] data) {
var direct = GetHash(data);
var indirect = GetHashOneBlock(data);
var outcomes =
new[] { 1, 2, 3, 5, 10, 11, 19, 20, 21 }.SelectMany(i =>
new[]{
new{ Hash=GetHashMultiBlock(data,i), Name="ByMSDN"+i},
new{ Hash=GetHashMultiBlockInChunks(data,i), Name="InChunks"+i}
}).Concat(new[] { new { Hash = indirect, Name = "OneBlock" } })
.Where(result => !result.Hash.SequenceEqual(direct)).ToArray();
Console.Write("Testing: " + type);
if (outcomes.Any()) {
Console.WriteLine("not OK.");
Console.WriteLine(type.Name + " direct was: " + BytesToStr(direct));
} else Console.WriteLine(" OK.");
foreach (var outcome in outcomes)
Console.WriteLine(type.Name + " differs with: " + outcome.Name + " "
+ BytesToStr(outcome.Hash));
}
}