解决F#中的背包概率:性能

时间:2013-07-03 10:40:49

标签: performance optimization f# knapsack-problem

我发现了一篇文章:
Solving the 0-1 knapsack problem using continuation-passing style with memoization in F#

关于在F#中实现的背包问题。当我学习这门语言时,我发现这非常有趣,并试图对此进行一些研究。这是我制作的代码:

open System
open System.IO 
open System.Collections.Generic

let parseToTuple (line : string) =
    let parsedLine = line.Split(' ') |> Array.filter(not << String.IsNullOrWhiteSpace)         |> Array.map Int32.Parse
    (parsedLine.[0], parsedLine.[1])

let memoize f =
    let cache = Dictionary<_, _>()
    fun x ->
        if cache.ContainsKey(x)
            then cache.[x]
        else
            let res = f x
            cache.[x] <- res
            res

type Item =
    {
        Value : int
        Size  : int
    }  

type ContinuationBuilder() = 
    member b.Bind(x, f) = fun k -> x (fun x -> f x k)
    member b.Return x = fun k ->  k x
    member b.ReturnFrom x = x

let cont = ContinuationBuilder()

let set1 =
    [
        (4, 11)
        (8, 4)
        (10, 5)
        (15, 8)
        (4, 3)
    ]

let set2 =
    [
        (50, 341045); (1906, 4912); (41516, 99732); (23527, 56554); (559, 1818); (45136, 108372); (2625, 6750); (492, 1484)
        (1086, 3072); (5516, 13532); (4875, 12050); (7570, 18440); (4436, 10972); (620, 1940); (50897, 122094); (2129, 5558)
        (4265, 10630); (706, 2112); (2721, 6942); (16494, 39888); (29688, 71276); (3383, 8466); (2181, 5662); (96601, 231302)
        (1795, 4690); (7512, 18324); (1242, 3384); (2889, 7278); (2133, 5566); (103, 706); (4446, 10992); (11326, 27552)
        (3024, 7548); (217, 934); (13269, 32038); (281, 1062); (77174, 184848); (952, 2604); (15572, 37644); (566, 1832)
        (4103, 10306); (313, 1126); (14393, 34886); (1313, 3526); (348, 1196); (419, 1338); (246, 992); (445, 1390)
        (23552, 56804); (23552, 56804); (67, 634)
    ]

[<EntryPoint>] 
let main args =
    // prepare list of items from a file args.[0]
    let header, items = set1
                        |> function
                           | h::t -> h, t
                           | _    -> raise (Exception("Wrong data format"))

    let N, K = header
    printfn "N = %d, K = %d" N K
    let items = List.map (fun x -> {Value = fst x ; Size = snd x}) items |> Array.ofList

    let rec combinations =
        let innerSolver key =
            cont
                {
                    match key with
                    | (i, k) when i = 0 || k = 0        -> return 0
                    | (i, k) when items.[i-1].Size > k  -> return! combinations (i-1, k)
                    | (i, k)                            -> let item = items.[i-1]
                                                           let! v1 = combinations (i-1, k)
                                                           let! beforeItem = combinations (i-1, k-item.Size)
                                                           let v2 = beforeItem + item.Value
                                                           return max v1 v2
                }
        memoize innerSolver

    let res = combinations (N, K) id
    printfn "%d" res
    0

然而,这个实现的问题在于它的速度很慢(实际上我无法解决50个项目和容量大约300000的问题,这可以通过我在C#中的天真实现在不到1秒内解决)。 / p>

你能否告诉我,我是否在某个地方犯了错误?或者也许实现是正确的,这只是解决这个问题的低效方法。

2 个答案:

答案 0 :(得分:7)

当您天真地应用这样的通用记事本并使用延续传递时,记忆缓存中的值为延续,而不是常规的“最终”结果。因此,当你获得缓存命中时,你没有得到最终结果,你得到一些函数,它承诺在你调用它时计算结果。这个调用可能很昂贵,可能会调用各种其他延续,最终可能会再次点击memoization缓存等等。

有效地记忆延续传递函数,使得a)缓存完全生效和b)函数仍然是尾递归是非常困难的。阅读this讨论,并在您完全理解这一点后再回来。 ; - )

您链接的博客文章的作者正在使用更复杂,更不通用的记事本,该记事本专门针对该问题。不可否认,我还没有完全理解它(博客上的代码不完整/破碎,很难尝试),但我认为它的要点是它在缓存最终整数之前“强制”延续链结果

为了说明这一点,这里是您的代码的快速重构,它是完全独立的并且可以追溯相关信息:

open System
open System.Collections.Generic

let mutable cacheHits = 0
let mutable cacheMisses = 0

let memoize f =
    let cache = Dictionary<_, _>()
    fun x ->
        match cache.TryGetValue(x) with
        | (true, v) -> 
            cacheHits <- cacheHits + 1
            printfn "Hit for %A - Result is %A" x v
            v
        | _ ->
            cacheMisses <- cacheMisses + 1
            printfn "Miss for %A" x
            let res = f x
            cache.[x] <- res
            res

type Item = { Value : int; Size  : int }  

type ContinuationBuilder() = 
    member b.Bind(x, f) = fun k -> x (fun x -> f x k)
    member b.Return x = fun k ->  k x
    member b.ReturnFrom x = x

let cont = ContinuationBuilder()

let genItems n = 
   [| for i = 1 to n do
         let size = i % 5
         let value = (size * i)
         yield { Value = value; Size = size }
   |]

let N, K = (5, 100)
printfn "N = %d, K = %d" N K

let items = genItems N

let rec combinations_cont =
    memoize (
     fun key ->
       cont {
                match key with
                | (0, _) | (_, 0)                   -> return 0
                | (i, k) when items.[i-1].Size > k  -> return! combinations_cont (i - 1, k) 
                | (i, k)                            -> let item = items.[i-1]
                                                       let! v1 = combinations_cont (i-1, k)
                                                       let! beforeItem = combinations_cont (i-1, k - item.Size)
                                                       let v2 = beforeItem + item.Value
                                                       return max v1 v2
        }
    )

let res = combinations_cont (N, K) id
printfn "Answer: %d" res
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""

let rec combinations_plain =
    memoize (
     fun key ->
                match key with
                | (i, k) when i = 0 || k = 0        -> 0
                | (i, k) when items.[i-1].Size > k  -> combinations_plain (i-1, k) 
                | (i, k)                            -> let item = items.[i-1]
                                                       let v1 = combinations_plain (i-1, k)
                                                       let beforeItem = combinations_plain (i-1, k-item.Size)
                                                       let v2 = beforeItem + item.Value
                                                       max v1 v2
    )

cacheHits <- 0
cacheMisses <- 0

let res2 = combinations_plain (N, K)
printfn "Answer: %d" res2
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses

正如您所看到的,CPS版本是缓存延续(不是整数),并且在调用延续时会有很多额外的活动进行到底。

如果将问题大小提升到let (N, K) = (20, 100)(并删除记事本中的printfn语句),您将看到CPS版本最终执行超过100万次缓存查找,与普通版本相比只做几百个。

答案 1 :(得分:6)

在FSI中运行此代码:

open System
open System.Diagnostics
open System.Collections.Generic

let time f =
    System.GC.Collect()
    let sw = Stopwatch.StartNew()
    let r = f()
    sw.Stop()
    printfn "Took: %f" sw.Elapsed.TotalMilliseconds
    r

let mutable cacheHits = 0
let mutable cacheMisses = 0

let memoize f =
    let cache = Dictionary<_, _>()
    fun x ->
        match cache.TryGetValue(x) with
        | (true, v) -> 
            cacheHits <- cacheHits + 1
            //printfn "Hit for %A - Result is %A" x v
            v
        | _ ->
            cacheMisses <- cacheMisses + 1
            //printfn "Miss for %A" x
            let res = f x
            cache.[x] <- res
            res

type Item = { Value : int; Size  : int }  

type ContinuationBuilder() = 
    member b.Bind(x, f) = fun k -> x (fun x -> f x k)
    member b.Return x = fun k ->  k x
    member b.ReturnFrom x = x

let cont = ContinuationBuilder()

let genItems n = 
    [| for i = 1 to n do
            let size = i % 5
            let value = (size * i)
            yield { Value = value; Size = size }
    |]

let N, K = (80, 400)
printfn "N = %d, K = %d" N K

let items = genItems N

//let rec combinations_cont =
//    memoize (
//     fun key ->
//       cont {
//                match key with
//                | (0, _) | (_, 0)                   -> return 0
//                | (i, k) when items.[i-1].Size > k  -> return! combinations_cont (i - 1, k) 
//                | (i, k)                            -> let item = items.[i-1]
//                                                       let! v1 = combinations_cont (i-1, k)
//                                                       let! beforeItem = combinations_cont (i-1, k - item.Size)
//                                                       let v2 = beforeItem + item.Value
//                                                       return max v1 v2
//        }
//    )
//
//
//cacheHits <- 0
//cacheMisses <- 0

//let res = time(fun () -> combinations_cont (N, K) id)
//printfn "Answer: %d" res
//printfn "Memo hits: %d" cacheHits
//printfn "Memo misses: %d" cacheMisses
//printfn ""

let rec combinations_plain =
    memoize (
        fun key ->
                match key with
                | (i, k) when i = 0 || k = 0        -> 0
                | (i, k) when items.[i-1].Size > k  -> combinations_plain (i-1, k) 
                | (i, k)                            -> let item = items.[i-1]
                                                       let v1 = combinations_plain (i-1, k)
                                                       let beforeItem = combinations_plain (i-1, k-item.Size)
                                                       let v2 = beforeItem + item.Value
                                                       max v1 v2
    )

cacheHits <- 0
cacheMisses <- 0

printfn "combinations_plain"
let res2 = time (fun () -> combinations_plain (N, K))
printfn "Answer: %d" res2
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""

let recursivelyMemoize f =
    let cache = Dictionary<_, _>()
    let rec memoizeAux x =
        match cache.TryGetValue(x) with
        | (true, v) -> 
            cacheHits <- cacheHits + 1
            //printfn "Hit for %A - Result is %A" x v
            v
        | _ ->
            cacheMisses <- cacheMisses + 1
            //printfn "Miss for %A" x
            let res = f memoizeAux x
            cache.[x] <- res
            res
    memoizeAux

let combinations_plain2 =
    let combinations_plain2Aux combinations_plain2Aux key =
                match key with
                | (i, k) when i = 0 || k = 0        -> 0
                | (i, k) when items.[i-1].Size > k  -> combinations_plain2Aux (i-1, k) 
                | (i, k)                            -> let item = items.[i-1]
                                                       let v1 = combinations_plain2Aux (i-1, k)
                                                       let beforeItem = combinations_plain2Aux (i-1, k-item.Size)
                                                       let v2 = beforeItem + item.Value
                                                       max v1 v2
    let memoized = recursivelyMemoize combinations_plain2Aux
    fun x -> memoized x

cacheHits <- 0
cacheMisses <- 0

printfn "combinations_plain2"
let res3 = time (fun () -> combinations_plain2 (N, K))
printfn "Answer: %d" res3
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""

let recursivelyMemoizeCont f =
    let cache = Dictionary HashIdentity.Structural
    let rec memoizeAux x k =
        match cache.TryGetValue(x) with
        | (true, v) -> 
            cacheHits <- cacheHits + 1
            //printfn "Hit for %A - Result is %A" x v
            k v
        | _ ->
            cacheMisses <- cacheMisses + 1
            //printfn "Miss for %A" x
            f memoizeAux x (fun y ->
                cache.[x] <- y
                k y)
    memoizeAux

let combinations_cont2 =
    let combinations_cont2Aux combinations_cont2Aux key =
        cont {
                match key with
                | (0, _) | (_, 0)                   -> return 0
                | (i, k) when items.[i-1].Size > k  -> return! combinations_cont2Aux (i - 1, k) 
                | (i, k)                            -> let item = items.[i-1]
                                                       let! v1 = combinations_cont2Aux (i-1, k)
                                                       let! beforeItem = combinations_cont2Aux (i-1, k - item.Size)
                                                       let v2 = beforeItem + item.Value
                                                       return max v1 v2
        }
    let memoized = recursivelyMemoizeCont combinations_cont2Aux
    fun x -> memoized x id

cacheHits <- 0
cacheMisses <- 0

printfn "combinations_cont2"
let res4 = time (fun () -> combinations_cont2 (N, K))
printfn "Answer: %d" res4
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""

我得到了这些结果:

N = 80, K = 400
combinations_plain
Took: 7.191000
Answer: 6480
Memo hits: 6231
Memo misses: 6552

combinations_plain2
Took: 6.310800
Answer: 6480
Memo hits: 6231
Memo misses: 6552

combinations_cont2
Took: 17.021200
Answer: 6480
Memo hits: 6231
Memo misses: 6552
  • combinations_plain来自拉特金的回答。
  • combinations_plain2明确公开递归memoization步骤。
  • combinations_cont2将递归memoization函数调整为一个记忆延续结果的函数。
  • combinations_cont2通过在继续之前拦截结果,然后将其传递给实际的继续。对相同密钥的后续调用提供了一个延续,并且这个延续是我们最初截获的答案。

这表明我们能够:

  1. 使用延续传递样式进行记忆。
  2. 为vanilla memoized版本实现类似( ish )的性能特征。
  3. 我希望这可以解决一些问题。对不起,我的博客代码片段不完整(我想我最近重新格式化时可能会丢失它)。