使用F#List和Seq合并两个排序列表/序列。通过从辅助存储器读取两个文件来获得这些值 - 文件读取的结果以两个序列存储。假设存储整数用于测试目的,现在尝试合并这些整数以使用此代码打印出已排序的系列:
let rec printSortedSeq l1 l2 =
match ( l1, l2) with
| l1,l2 when Seq.isEmpty l1 && Seq.isEmpty l2 -> printfn "";
| l1, l2 when Seq.isEmpty l1 -> printf "%d " (Seq.head l2); printSortedSeq l1 (Seq.skip 1 l2);
| l1, l2 when Seq.isEmpty l2-> printf "%d " (Seq.head l1); printSortedSeq (Seq.skip 1 l1) [];
| l1,l2 -> if Seq.head l1 = Seq.head l2 then printf "%d " (Seq.head l1); printSortedSeq (Seq.skip 1 l1) (Seq.skip 1 l2);
elif Seq.head l1 < Seq.head l2 then printf "%d " (Seq.head l1); printSortedSeq (Seq.skip 1 l1) (Seq.skip 1 l2);
else printf "%d " (Seq.head l2); printSortedSeq (Seq.skip 1 l1) (Seq.skip 1 l2);
最初编写代码是为了合并两个排序列表:
let rec printSortedList l1 l2 =
match ( l1, l2) with
| h1 :: t1 , h2 :: t2 -> if h1 = h2 then printf "%d " h1; printSortedList t1 t2;
elif h1 < h2 then printf "%d " h1; printSortedList t1 l2;
else printf "%d " h2; printSortedList l1 t2;
| [] , h2 :: t2 -> printf "%d " h2; printSortedList [] t2;
| h1 :: t1, [] -> printf "%d " h1; printSortedList t1 [];
| [], [] -> printfn"";
使用它们的表现大大有利于列表。做#time后我给出时间结果;;在FSI的一些试验输入中。
let x = [0..2..500];
let y = [1..2..100];
let a = {0..2..500}
let b = {1..2..100}
printSortedList x y ;; Real:00:00:00.012,CPU:00:00:00.015
printSortedSeq a b ;; Real:00:00:00.504,CPU:00:00:00.515
问题是 - 有没有什么方法可以使用序列来加快速度?因为虽然列表要快得多,但由于提供输入的文件非常大(> 2 GB),因此它们不适合主内存,因此我将文件中的值作为延迟序列读取。在合并之前将它们转换为列表会使整个目的失效。
答案 0 :(得分:4)
Seq.skip是一种反模式。使用F#PowerPack中的LazyList,或使用枚举器(GetEnumerator ... MoveNext ... Current)有效地遍历Seq。参见其他类似的Q&amp; A.
答案 1 :(得分:4)
正如toyvo所提到的,使用有状态的枚举器可以大大简化:
let mkStatefulEnum (e: IEnumerator<'T>) =
let x = ref None
fun move ->
if move then x := (if e.MoveNext() then Some e.Current else None)
!x
let merge (a: seq<'T>) (b: seq<'T>) =
seq {
use x = a.GetEnumerator()
use y = b.GetEnumerator()
let nextX = mkStatefulEnum x
let nextY = mkStatefulEnum y
yield! Seq.unfold (fun (a, b) ->
match a, b with
| Some a, Some b ->
if a < b then Some (a, (nextX true, nextY false))
else Some (b, (nextX false, nextY true))
| Some a, None -> Some (a, (nextX true, nextY false))
| None, Some b -> Some (b, (nextX false, nextY true))
| None, None -> None
) (nextX true, nextY true)
}
答案 2 :(得分:3)
你的问题的答案是F#序列操作与List相比主要是缓慢的,是没有。由于序列重新遍历,您的序列代码在多项式时间内运行,而列表代码在线性时间内运行。
对于记录,可以在线性时间内合并两个排序的序列。例如:
open System.Collections.Generic
type State<'T> =
| Neutral
| Left of 'T
| Right of 'T
| Tail
let mergeSeqs (a: seq<'T>) (b: seq<'T>) =
let cmp x y =
match compare x y with
| 1 -> Some (y, Left x)
| _ -> Some (x, Right y)
seq {
use x = a.GetEnumerator()
use y = b.GetEnumerator()
let step st =
match st with
| Neutral ->
match x.MoveNext(), y.MoveNext() with
| true, true -> cmp x.Current y.Current
| true, false -> Some (x.Current, Tail)
| false, true -> Some (y.Current, Tail)
| false, false -> None
| Left v ->
match y.MoveNext() with
| true -> cmp v y.Current
| false -> Some (v, Neutral)
| Right v ->
match x.MoveNext() with
| true -> cmp x.Current v
| false -> Some (v, Neutral)
| Tail ->
match x.MoveNext(), y.MoveNext() with
| false, false -> None
| true, _ -> Some (x.Current, Tail)
| _, true -> Some (y.Current, Tail)
yield! Seq.unfold step Neutral
}
你可以通过减少消费来改善这一点。设计一个类似于State<'T>
的可变状态的自定义IEnumerator,并将其用作合并序列的基础。