为什么Seq.init比带'for'的序列表达式慢?

时间:2017-04-29 13:03:23

标签: f# lazy-sequences

以下代码显示,使用包含for的序列表达式生成序列的速度比使用Seq.init生成相同序列快约五倍。

open System
let rand count =
    let rnd = Random() // if this value is not created all numbers are equal
    seq {for i in 0..(count - 1) -> rnd.NextDouble()}


/// Perhaps more "functional" than rand but slower
let rand2 count =
    let rnd = Random()
    let rnd2 (i: int) = rnd.NextDouble()
    Seq.init count rnd2

> rand 1000000 |> List.ofSeq |> List.head;;
Real: 00:00:00.092, CPU: 00:00:00.093, GC gen0: 3, gen1: 2, gen2: 0
val it : float = 0.1358240168

> rand2 1000000 |> List.ofSeq |> List.head;;
Real: 00:00:00.473, CPU: 00:00:00.484, GC gen0: 21, gen1: 20, gen2: 1
val it : float = 0.4128856414

问题:

1)速度差异的原因是什么?

2)在某种意义上,Seq.init替代方案是否比序列表达替代方法“更具功能性”?

3)两种替代方案在线程安全方面是否相同?

1 个答案:

答案 0 :(得分:15)

  1. 速度差异的原因是什么?

    Seq.init速度很慢,因为它使用的Seq.upto很慢。 Seq.upto速度很慢,主要是因为它为管道中的每个对象创建了一个Lazy实例。这也解释了GC压力。

    在Fsharp.Core的当前状态下,如果您需要性能Seq,则不是正确的选择。

    虽然合并了manostick PR,但这会改变。

    此外,甚至

    seq {for i in 0..(count - 1) -> rnd.NextDouble()}
    
    nessos或manostick改进Seq等管道相比,

    速度较慢。

  2. Seq.init替代方案在某种意义上“更具功能性”,而不是序列表达方式吗?

    序列表达式又称序列理解与数学中的集合理解有关。 IMO对他们都有“功能”。

  3. 这两种方案在线程安全方面是否相同?

    是的,因为它既不提供线程安全性。

  4. PS。 SeqLINQ缓慢的另一个原因是它们依赖于拉动管道。推管道更快。 Nessos和manofstick管道支持AFAICT并尽可能选择推送。

    P.S。我写了一个不同流水线的快速性能比较。结果不是用于隔离实际管道性能的列表,而不是创建列表的成本。我还改变了内部和外部迭代的次数,以便检测创建管道的开销:

    // A simplistic push-pipeline
    type Receiver<'T> = 'T -> bool
    type Stream<'T>   = Receiver<'T> -> unit
    
    module Stream =
      let inline init count gen : Stream<_> =
        fun r ->
          let rec loop i =
            if i < count && r (gen i) then
              loop (i + 1)
          loop 0
    
      let inline sum (s : Stream<_>) : _ =
        let mutable a = LanguagePrimitives.GenericZero<_>
        s (fun v -> a <- a + v; true)
        a
    
    let rnd = System.Random ()
    let gen = fun _ -> rnd.NextDouble ()
    
    let clock =
      let sw = System.Diagnostics.Stopwatch ()
      sw.Start ()
      fun () -> sw.ElapsedMilliseconds
    
    open System
    
    let timeIt n a = 
      let r                 = a ()  // Warm-up
    
      GC.Collect (2, GCCollectionMode.Forced, true)
    
      let inline cc g       = GC.CollectionCount g
      let bcc0, bcc1, bcc2  = cc 0, cc 1, cc 2
      let before            = clock ()
    
      for i = 1 to n do
        a () |> ignore
    
      let after             = clock ()
      let acc0, acc1, acc2  = cc 0, cc 1, cc 2
    
      after - before, acc0 - bcc0, acc1 - bcc1, acc2 - bcc2, r
    
    open System.Linq
    
    [<EntryPoint>]
    let main argv =
      let count = 10000000
      let outers =
        [|
          1000000
          10000
          100  
          1
        |]
    
      for outer in outers do
        let inner = count / outer
        let testCases = 
          [|
            "Push stream"       , fun ()  -> Stream.init inner gen                  |> Stream.sum
            "LINQ"              , fun ()  -> (Enumerable.Range (0, inner)).Select(gen).Sum()
            "Seq.init"          , fun ()  -> Seq.init    inner gen                  |> Seq.sum
            "Seq comprehension" , fun ()  -> seq { for i in 0..inner - 1 -> gen i } |> Seq.sum
            "Tail-recursion"    , fun ()  -> 
              let rec loop a i =
                if i < inner then
                  loop (a + gen i) (i + 1)
                else
                  a
              loop 0. 0
          |]
        printfn "Using outer = %A, inner = %A, total is: %A" outer inner count
        for nm, a in testCases do
          printfn "  Running test case: %A" nm
          let tm, cc0, cc1, cc2, r = timeIt outer a
          printfn "   it took %A ms (%A, %A, %A), result is: %A" tm cc0 cc1 cc2 r
      0
    

    结果如下(.NET 4.6.2,x64,Release):

    Using outer = 1000000, inner = 10, total is: 10000000
      Running test case: "Push stream"
      it took 145L ms (22, 0, 0), result is: 5.348407591
      Running test case: "LINQ"
      it took 296L ms (63, 0, 0), result is: 5.056716735
      Running test case: "Seq.init"
      it took 1490L ms (724, 0, 0), result is: 3.977087705
      Running test case: "Seq comprehension"
      it took 333L ms (66, 0, 0), result is: 5.208401204
      Running test case: "Tail-recursion"
      it took 109L ms (0, 0, 0), result is: 5.898073628
    Using outer = 10000, inner = 1000, total is: 10000000
      Running test case: "Push stream"
      it took 118L ms (0, 0, 0), result is: 510.943297
      Running test case: "LINQ"
      it took 210L ms (0, 0, 0), result is: 488.3970571
      Running test case: "Seq.init"
      it took 1411L ms (661, 0, 0), result is: 505.2632877
      Running test case: "Seq comprehension"
      it took 264L ms (0, 0, 0), result is: 502.1710107
      Running test case: "Tail-recursion"
      it took 101L ms (0, 0, 0), result is: 487.9451813
    Using outer = 100, inner = 100000, total is: 10000000
      Running test case: "Push stream"
      it took 118L ms (0, 0, 0), result is: 49850.99306
      Running test case: "LINQ"
      it took 202L ms (0, 0, 0), result is: 50113.06557
      Running test case: "Seq.init"
      it took 1398L ms (661, 0, 0), result is: 49923.14521
      Running test case: "Seq comprehension"
      it took 262L ms (0, 0, 0), result is: 50196.00191
      Running test case: "Tail-recursion"
      it took 98L ms (0, 0, 0), result is: 49878.16573
    Using outer = 1, inner = 10000000, total is: 10000000
      Running test case: "Push stream"
      it took 117L ms (0, 0, 0), result is: 5000088.583
      Running test case: "LINQ"
      it took 201L ms (0, 0, 0), result is: 5000569.657
      Running test case: "Seq.init"
      it took 1388L ms (661, 0, 0), result is: 5000169.339
      Running test case: "Seq comprehension"
      it took 260L ms (0, 0, 0), result is: 5000263.083
      Running test case: "Tail-recursion"
      it took 97L ms (0, 0, 0), result is: 4999990.197
    Press any key to continue . . .
    

    所以Seq.init做得最差,“Tail-recursion”(本质上是一个循环)在CPU性能和内存使用方面都做得最好。

    实际上生成随机数本身需要一些时间,所以如果我改用id,数字看起来像这样:

    Using outer = 1000000, inner = 10, total is: 10000000
      Running test case: "Push stream"
      it took 47L ms (22, 0, 0), result is: 45.0
      Running test case: "LINQ"
      it took 211L ms (63, 0, 0), result is: 45.0
      Running test case: "Seq.init"
      it took 1364L ms (724, 0, 0), result is: 45.0
      Running test case: "Seq comprehension"
      it took 241L ms (66, 0, 0), result is: 45.0
      Running test case: "Tail-recursion"
      it took 10L ms (0, 0, 0), result is: 45.0
    Using outer = 10000, inner = 1000, total is: 10000000
      Running test case: "Push stream"
      it took 28L ms (0, 0, 0), result is: 499500.0
      Running test case: "LINQ"
      it took 125L ms (0, 0, 0), result is: 499500.0
      Running test case: "Seq.init"
      it took 1285L ms (661, 0, 0), result is: 499500.0
      Running test case: "Seq comprehension"
      it took 170L ms (0, 0, 0), result is: 499500.0
      Running test case: "Tail-recursion"
      it took 8L ms (0, 0, 0), result is: 499500.0
    Using outer = 100, inner = 100000, total is: 10000000
      Running test case: "Push stream"
      it took 27L ms (0, 0, 0), result is: 4999950000.0
      Running test case: "LINQ"
      it took 121L ms (0, 0, 0), result is: 4999950000.0
      Running test case: "Seq.init"
      it took 1289L ms (661, 0, 0), result is: 4999950000.0
      Running test case: "Seq comprehension"
      it took 169L ms (0, 0, 0), result is: 4999950000.0
      Running test case: "Tail-recursion"
      it took 9L ms (0, 0, 0), result is: 4999950000.0
    Using outer = 1, inner = 10000000, total is: 10000000
      Running test case: "Push stream"
      it took 28L ms (0, 0, 0), result is: 4.9999995e+13
      Running test case: "LINQ"
      it took 121L ms (0, 0, 0), result is: 4.9999995e+13
      Running test case: "Seq.init"
      it took 1289L ms (661, 0, 0), result is: 4.9999995e+13
      Running test case: "Seq comprehension"
      it took 169L ms (0, 0, 0), result is: 4.9999995e+13
      Running test case: "Tail-recursion"
      it took 8L ms (0, 0, 0), result is: 4.9999995e+13