在我们的fsharp代码中,自动生成的gethashcode实现显示了非常糟糕的性能和大的冲突率。这是gethashcode生成器的fsharp实现中的问题还是只是边缘情况?
open System
open System.Collections.Generic
let check keys e name =
let dict = new Dictionary<_,_>(Array.length keys, e)//, HashIdentity.Structural)
let stopWatch = System.Diagnostics.Stopwatch.StartNew()
let add k = dict.Add(k, 1.02)
Array.iter add keys
stopWatch.Stop()
let hsahes = new HashSet<int>()
let add_hash x = hsahes.Add(e.GetHashCode(x)) |> not
let collisions = Array.filter add_hash keys |> Array.length
printfn "%s %f sec %f collisions" name stopWatch.Elapsed.TotalSeconds (double(collisions) / double(keys.Length))
type StructTuple<'T,'T2> =
struct
val fst: 'T
val snd : 'T2
new(fst: 'T, snd : 'T2) = {fst = fst; snd = snd}
end
let bad_keys = seq{
let rnd = new Random();
while true do
let j = uint32(rnd.Next(0, 3346862))
let k = uint16 (rnd.Next(0, 658))
yield StructTuple(j,k)
}
let good_keys = seq{
for k in 0us..658us do
for j in 0u.. 3346862u do
yield StructTuple(j,k)
}
module CmpHelpers =
let inline combine (h1:int) (h2:int) = (h1 <<< 5) + h1 ^^^ h2;
type StructTupleComparer<'T,'T2>() =
let cmparer = EqualityComparer<Object>.Default
interface IEqualityComparer<StructTuple<'T,'T2>> with
member this.Equals (a,b) = cmparer.Equals(a.fst, b.fst) && cmparer.Equals(a.snd, b.snd)
member this.GetHashCode (x) = CmpHelpers.combine (cmparer.GetHashCode(x.fst)) (cmparer.GetHashCode(x.snd))
type AutoGeneratedStructTupleComparer<'T,'T2>() =
let cmparer = LanguagePrimitives.GenericEqualityComparer
interface IEqualityComparer<StructTuple<'T,'T2>> with
member this.Equals (a:StructTuple<'T,'T2>,b:StructTuple<'T,'T2>) =
LanguagePrimitives.HashCompare.GenericEqualityERIntrinsic<'T> a.fst b.fst
&& LanguagePrimitives.HashCompare.GenericEqualityERIntrinsic<'T2> a.snd b.snd
member this.GetHashCode (x:StructTuple<'T,'T2>) =
let mutable num = 0
num <- -1640531527 + (LanguagePrimitives.HashCompare.GenericHashWithComparerIntrinsic<'T2> cmparer x.snd + ((num <<< 6) + (num >>> 2)))
-1640531527 + (LanguagePrimitives.HashCompare.GenericHashWithComparerIntrinsic<'T> cmparer x.fst + ((num <<< 6) + (num >>> 2)));
let uniq (sq:seq<'a>) = Array.ofSeq (new HashSet<_>(sq))
[<EntryPoint>]
let main argv =
let count = 15000000
let keys = good_keys |> Seq.take count |> uniq
printfn "good keys"
check keys (new StructTupleComparer<_,_>()) "struct custom"
check keys HashIdentity.Structural "struct auto"
check keys (new AutoGeneratedStructTupleComparer<_,_>()) "struct auto explicit"
let keys = bad_keys |> Seq.take count |> uniq
printfn "bad keys"
check keys (new StructTupleComparer<_,_>()) "struct custom"
check keys HashIdentity.Structural "struct auto"
check keys (new AutoGeneratedStructTupleComparer<_,_>()) "struct auto explicit"
Console.ReadLine() |> ignore
0 // return an integer exit code
输出
好的钥匙
struct custom 1.506934 sec 0.000000 collisions
struct auto 4.832881 sec 0.776863 collisions
struct auto explicit 3.166931 sec 0.776863 collisions
坏键
struct custom 3.631251 sec 0.061893 collisions
struct auto 10.340693 sec 0.777034 collisions
struct auto explicit 8.893612 sec 0.777034 collisions
答案 0 :(得分:3)
我不是用于生成自动生成的Equals和GetHashCode的整体算法的专家,但它似乎只是在这里产生一些非最优的东西。我不知道这是否适用于通用自动生成的实现,或者是否有可靠的自动生成接近最优的实现方式。
值得注意的是,如果您只使用标准元组,则自动生成的散列和比较会提供与自定义实现相同的冲突率和相同的性能。使用最新的F#4.0位(此区域最近有a significant perf improvement),自动生成的东西变得比自定义实现快得多。
我的号码:
// F# 3.1, struct tuples
good keys
custom 0.951254 sec 0.000000 collisions
auto 2.737166 sec 0.776863 collisions
bad keys
custom 2.923103 sec 0.061869 collisions
auto 7.706678 sec 0.777040 collisions
// F# 3.1, standard tuples
good keys
custom 0.995701 sec 0.000000 collisions
auto 0.965949 sec 0.000000 collisions
bad keys
custom 3.091821 sec 0.061869 collisions
auto 2.924721 sec 0.061869 collisions
// F# 4.0, standard tuples
good keys
custom 1.018672 sec 0.000000 collisions
auto 0.619066 sec 0.000000 collisions
bad keys
custom 3.082988 sec 0.061869 collisions
auto 1.829720 sec 0.061869 collisions
答案 1 :(得分:1)
在fsharp问题跟踪器中打开了问题。接受为错误https://github.com/fsharp/fsharp/issues/343