我在这里有这个功能:
let ProcessFile (allLines: string list) =
let list = new List<List<string>>()
let rec SplitFile (input: string list) =
if input.Length <> 0 then
list.Add(new List<string>(input.TakeWhile(fun x -> x <> "")))
let nextGroup = input.SkipWhile(fun x -> x <> "").SkipWhile(fun x -> x = "")
SplitFile (Seq.toList nextGroup)
SplitFile allLines |> ignore
list
将文件的内容作为字符串列表给出,并将每个由空行分隔的组作为单独的列表,给出一个列表列表。
我的问题是,是否有一种更好的方法来实现这一点,让我能够提供类似于字符串列表的内容,而不是我必须使用新的List&lt;列表与LT;串GT;&GT ;?因为这对我来说似乎并不特别。
答案 0 :(得分:5)
更惯用的解决方案可能是:
let processFile xs =
let rec nonEmpties n = function
| [] as xs | ""::xs -> n, xs
| _::xs -> nonEmpties (n+1) xs
let rec loop xs =
seq { match xs with
| [] -> ()
| ""::xs -> yield! loop xs
| xs ->
let n, ys = nonEmpties 0 xs
yield Seq.take n xs
yield! loop ys }
loop xs
其中嵌套的nonEmpties
函数计算给定列表前面有多少非空元素,并返回最后一个非空元素后的计数和尾部列表,以及{{1} } function跳过空元素并产生非空元素序列。
此解决方案的一些有趣特征:
完全尾递归,因此它可以处理任意长的非空字符串序列和非空字符串序列序列。
通过返回输入列表来避免复制。
在测试输入1,000个1000个字符串的序列时,此解决方案比衙门快8倍,比Tomas快50%。
这是一个更快的解决方案,首先将输入列表转换为数组,然后对数组索引起作用:
loop
在测试输入1,000个1000个字符串的序列时,此解决方案比yamen快34倍,比Tomas快6倍。
答案 1 :(得分:2)
您的代码对我来说非常易读,但递归使用TakeWhile
和SkipWhile
效率相当低。这是一个简单的函数递归解决方案:
let ProcessFile (allLines: string list) =
// Recursively processes 'input' and keeps the list of 'groups' collected
// so far. We keep elements of the currently generated group in 'current'
let rec SplitFile input groups current =
match input with
// Current line is empty and there was some previous group
// Add the current group to the list of groups and continue with empty current
| ""::xs when current <> [] -> SplitFile xs ((List.rev current)::groups) []
// Current line is empty, but there was no previous group - skip & continue
| ""::xs -> SplitFile xs groups []
// Current line is non-empty - add it to the current group
| x::xs -> SplitFile xs groups (x::current)
// We reached the end - add current group if it is not empty
| [] when current <> [] -> List.rev ((List.rev current)::groups)
| [] -> List.rev groups
SplitFile allLines [] []
ProcessFile ["a"; "b"; ""; ""; "c"; ""; "d"]
根据以下情况,可以使用seq { ... }
编写相同的代码。我们仍然需要使用累加器(current
)来保留当前组的列表,但是当我们迭代输入时,我们现在使用yield
和yield!
懒洋洋地返回组:
let ProcessFile (allLines: string list) =
let rec SplitFile input current = seq {
match input with
| ""::xs when current <> [] ->
yield List.rev current
yield! SplitFile xs []
| ""::xs ->
yield! SplitFile xs []
| x::xs ->
yield! SplitFile xs (x::current)
| [] when current <> [] ->
yield List.rev current
| [] -> () }
SplitFile allLines []
答案 2 :(得分:0)
就个人而言,我喜欢一个衬垫:
let source = ["a"; "b"; ""; ""; "c"; ""; "d"]
source // can be any enumerable or seq
|> Seq.scan (fun (i, _) e -> if e = "" then (i + 1, e) else (i, e)) (0, "") // add the 'index'
|> Seq.filter (fun (_, e) -> e <> "") // remove the empty entries
|> Seq.groupBy fst // group by the index
|> Seq.map (fun (_, l) -> l |> Seq.map snd |> List.ofSeq) // extract the list only from each group (discard the index)
|> List.ofSeq // turn back into a list
这里最大的问题是Seq.groupBy
会将整个列表读入内存,但无论如何你都是这样做的。有groupBy
的实现只会查看相邻的条目,这就足够了,并且可以让您将文件作为Seq
输入(例如,使用File.ReadLines
而不是{ {1}})。
答案 3 :(得分:0)
如何使用普通的旧List.fold
let processFile lines =
([], lines) ||>
List.fold(fun acc l ->
match acc with
| [] when l = "" -> acc // filter empty lines at the start of the file
| [] -> [[l]] // start the first group
| []::xss when l = "" -> acc // filter continous empty lines
| xs::xss when l = "" -> // found an empty line, start a new group
let rxs = List.rev xs // reverse the current group before starting a new one
[]::rxs::xss
| xs::xss -> (l::xs)::xss) // continue adding to the current group
|> List.rev