比“经典”更易变的序列

时间:2012-07-11 15:05:38

标签: f#

对于笛卡儿生产,有一个足够好的函数 - 序列,其定义如下:

let rec sequence = function 
  | []      -> Seq.singleton [] 
  | (l::ls) -> seq { for x in l do for xs in sequence ls do yield (x::xs) } 

但看看结果:

  
    

序列[[1..2]; [1..10000]] |> Seq.skip 1000 ;;     val it:seq = seq [[1; 1001]; [1; 1002]; [1; 1003]; [1; 1004]; ...]

  

正如我们所看到的,产品的第一个“坐标”变化非常缓慢,它将在第二个列表结束时更改值。

我编写了自己的序列如下(以下评论):

/// Sum of all producted indeces = n
let rec hyper'plane'indices indexsum maxlengths =
    match maxlengths with 
    | [x]        -> if indexsum < x then [[indexsum]] else []
    | (i::is)   -> [for x in [0 .. min indexsum (i-1)] do for xs in hyper'plane'indices (indexsum-x) is do yield (x::xs)]
    | []        -> [[]]

let finite'sequence = function
    | [] -> Seq.singleton []
    | ns -> 
        let ars = [ for n in ns -> Seq.toArray n ]
        let length'list = List.map Array.length ars
        let nmax = List.max length'list
        seq { 
            for n in [0 .. nmax] do 
            for ixs in hyper'plane'indices n length'list do 
                yield (List.map2 (fun (a:'a[]) i -> a.[i]) ars ixs) 
        } 

关键思想是将(两个)列表视为(两个)正交维度,其中每个元素在列表中由其索引标记。因此,我们可以通过超平面枚举笛卡尔积的每个部分中的每个元素来枚举所有元素(在2D情况下,这是一条线)。换句话说,想象一下excel的工作表,其中第一列包含从[1; 1]到[1; 10000]的值,第二列包含从[2; 1]到[2; 10000]的值。并且具有数字1的“超平面”是连接单元A2和单元B1的线。对于我们的例子

  
    

hyper'plane'indices 0 [2; 10000] ;; val it:int list list = [[0; 0]]
    hyper'plane'indices 1 [2; 10000] ;; val it:int list list = [[0; 1]; [1; 0]]
    hyper'plane'indices 2 [2; 10000] ;; val it:int list list = [[0; 2]; [1; 1]]
    hyper'plane'indices 3 [2; 10000] ;; val it:int list list = [[0; 3]; [1; 2]]
    hyper'plane'indices 4 [2; 10000] ;; val it:int list list = [[0; 4]; [1; 3]]

  

好吧,如果我们从给定列表中产生的indeces和数组,我们现在可以将序列定义为{平面0中的所有元素;比平面1中的所有元素......等等}并获得比原始序列更多的易失性函数。

但是 finite'sequence 结果非常贪婪。而现在的问题。我怎么能改进它?

祝贺亚历山大。 (抱歉英语不好)

1 个答案:

答案 0 :(得分:2)

您能解释一下究竟是什么问题 - 时间或空间复杂性或性能?你有特定的基准吗?我不知道如何在这里改进时间复杂度,但我编辑了一些代码以删除中间列表,这可能对内存分配行为有所帮助。

不要这样做:

for n in [0 .. nmax] do

请改为:

for n in 0 .. nmax do

以下是代码:

let rec hyper'plane'indices indexsum maxlengths =
    match maxlengths with
    | [] -> Seq.singleton []
    | [x] -> if indexsum < x then Seq.singleton [indexsum] else Seq.empty
    | i :: is ->
        seq {
            for x in 0 .. min indexsum (i - 1) do
                for xs in hyper'plane'indices (indexsum - x) is do
                    yield x :: xs
        }

let finite'sequence xs =
    match xs with
    | [] -> Seq.singleton []
    | ns -> 
        let ars = [ for n in ns -> Seq.toArray n ]
        let length'list = List.map Array.length ars
        let nmax = List.max length'list
        seq {
            for n in 0 .. nmax do
                for ixs in hyper'plane'indices n length'list do
                    yield List.map2 Array.get ars ixs
        }

这样做会更好吗?顺便说一下美丽的问题。

更新:也许您更有兴趣混合序列,而不是维护算法中的确切公式。这是一个Haskell代码,它公平地混合了有限数量的可能无限序列,其中公平性意味着每个输入元素都有一个包含它的输出序列的有限前缀。您在评论中提到您有一个难以概括为N维的2D增量解决方案,而Haskell代码正是如此:

merge :: [a] -> [a] -> [a]
merge [] y          = y
merge x []          = x
merge (x:xs) (y:ys) = x : y : merge xs ys

prod :: (a -> b -> c) -> [a] -> [b] -> [c]
prod _ [] _ = []
prod _ _ [] = []
prod f (x:xs) (y:ys) = f x y : a `merge` b `merge` prod f xs ys where
  a = [f x y | x <- xs] 
  b = [f x y | y <- ys]

prodN :: [[a]] -> [[a]]
prodN []     = [[]]
prodN (x:xs) = prod (:) x (prodN xs)

我还没有将它移植到F# - 它需要一些思考,因为序列与头/尾很不匹配。

更新2:

对F#进行了相当机械的翻译。

type Node<'T> =
    | Nil
    | Cons of 'T * Stream<'T>

and Stream<'T> = Lazy<Node<'T>>

let ( !! ) (x: Lazy<'T>) = x.Value
let ( !^ ) x = Lazy.CreateFromValue(x)

let rec merge (xs: Stream<'T>) (ys: Stream<'T>) : Stream<'T> =
    lazy
    match !!xs, !!ys with
    | Nil, r | r, Nil -> r
    | Cons (x, xs), Cons (y, ys) -> Cons (x, !^ (Cons (y, merge xs ys)))

let rec map (f: 'T1 -> 'T2) (xs: Stream<'T1>) : Stream<'T2> =
    lazy
    match !!xs with
    | Nil -> Nil
    | Cons (x, xs) -> Cons (f x, map f xs)

let ( ++ ) = merge

let rec prod f xs ys =
    lazy
    match !!xs, !!ys with
    | Nil, _ | _, Nil -> Nil
    | Cons (x, xs), Cons (y, ys) ->
        let a = map (fun x -> f x y) xs
        let b = map (fun y -> f x y) ys
        Cons (f x y, a ++ b ++ prod f xs ys)

let ofSeq (s: seq<'T>) =
    lazy
    let e = s.GetEnumerator()
    let rec loop () =
        lazy
        if e.MoveNext()
            then Cons (e.Current, loop ())
            else e.Dispose(); Nil
    !! (loop ())

let toSeq stream =
    stream
    |> Seq.unfold (fun stream ->
        match !!stream with
        | Nil -> None
        | Cons (x, xs) -> Some (x, xs))

let empty<'T> : Stream<'T> = !^ Nil
let cons x xs = !^ (Cons (x, xs))
let singleton x = cons x empty

let rec prodN (xs: Stream<Stream<'T>>) : Stream<Stream<'T>> =
    match !!xs with
    | Nil -> singleton empty
    | Cons (x, xs) -> prod cons x (prodN xs)

let test () =
    ofSeq [
        ofSeq [1; 2; 3]
        ofSeq [4; 5; 6]
        ofSeq [7; 8; 9]
    ]
    |> prodN
    |> toSeq
    |> Seq.iter (fun xs ->
        toSeq xs
        |> Seq.map string
        |> String.concat ", "
        |> stdout.WriteLine)