我需要在文本中提取相等字符的序列。
例如:
字符串"aaaBbbcccccccDaBBBzcc11211"
应该转换为字符串列表
["aaa";"B";"bb";"ccccccc";"D";"a";"BBB";"z";"cc";"11";"2";"11"]
。
直到现在,这是我的解决方案:
let groupSequences (text:string) =
let toString chars =
System.String(chars |> Array.ofList)
let rec groupSequencesRecursive acc chars = seq {
match (acc, chars) with
| [], c :: rest ->
yield! groupSequencesRecursive [c] rest
| _, c :: rest when acc.[0] <> c ->
yield (toString acc)
yield! groupSequencesRecursive [c] rest
| _, c :: rest when acc.[0] = c ->
yield! groupSequencesRecursive (c :: acc) rest
| _, [] ->
yield (toString acc)
| _ ->
yield ""
}
text
|> List.ofSeq
|> groupSequencesRecursive []
groupSequences "aaaBbbcccccccDaBBBzcc11211"
|> Seq.iter (fun x -> printfn "%s" x)
|> ignore
我是F#新手。
这个解决方案可以更好吗?
答案 0 :(得分:10)
这是一个完全通用实现:
let group xs =
let folder x = function
| [] -> [[x]]
| (h::t)::ta when h = x -> (x::h::t)::ta
| acc -> [x]::acc
Seq.foldBack folder xs []
此函数具有类型seq<'a> -> 'a list list when 'a : equality
,因此不仅适用于字符串,而且适用于任何(有限)元素序列,只要元素类型支持相等比较。
与OP中的输入字符串一起使用时,返回值不是完全的预期形状:
> group "aaaBbbcccccccDaBBBzcc11211";;
val it : char list list =
[['a'; 'a'; 'a']; ['B']; ['b'; 'b']; ['c'; 'c'; 'c'; 'c'; 'c'; 'c'; 'c'];
['D']; ['a']; ['B'; 'B'; 'B']; ['z']; ['c'; 'c']; ['1'; '1']; ['2'];
['1'; '1']]
而不是string list
,返回值为char list list
。您可以使用map
:
> group "aaaBbbcccccccDaBBBzcc11211" |> List.map (List.toArray >> System.String);;
val it : System.String list =
["aaa"; "B"; "bb"; "ccccccc"; "D"; "a"; "BBB"; "z"; "cc"; "11"; "2"; "11"]
这利用了以String
作为输入的char[]
构造函数重载。
如最初所述,此实现是通用的,因此也可以与其他类型的列表一起使用;例如整数:
> group [1;1;2;2;2;3;4;4;3;3;3;0];;
val it : int list list = [[1; 1]; [2; 2; 2]; [3]; [4; 4]; [3; 3; 3]; [0]]
答案 1 :(得分:2)
groupby怎么样
"aaaBbbcccccccD"
|> Seq.groupBy id
|> Seq.map (snd >> Seq.toArray)
|> Seq.map (fun t -> new string (t))
如果您输入订单事项,这是一个有效的方法
"aaaBbbcccccccDaBBBzcc11211"
|> Seq.pairwise
|> Seq.toArray
|> Array.rev
|> Array.fold (fun (accum::tail) (ca,cb) -> if ca=cb then System.String.Concat(accum,string ca)::tail else string(ca)::accum::tail) (""::[])
答案 2 :(得分:0)
这个也是基于递归,虽然匹配可以通过较少数量的检查来消除。
let chop (txt:string) =
let rec chopInner txtArr (word: char[]) (res: List<string>) =
match txtArr with
| h::t when word.[0] = h -> chopInner t (Array.append word [|h|]) res
| h::t when word.[0] <> h ->
let newWord = word |> (fun s -> System.String s)
chopInner t [|h|] (List.append res [newWord])
| [] ->
let newWord = word |> (fun s -> System.String s)
(List.append res [newWord])
let lst = txt.ToCharArray() |> Array.toList
chopInner lst.Tail [|lst.Head|] []
结果如预期:
val text : string = "aaaBbbcccccccDaBBBzcc11211"
> chop text;;
val it : string list =
["aaa"; "B"; "bb"; "ccccccc"; "D"; "a"; "BBB"; "z"; "cc"; "11"; "2"; "11"]
答案 3 :(得分:0)
当你折叠时,你需要携带前一个值和持有临时结果的累加器。先前的值被包装为第一次迭代的帐户选项。之后,最终结果被提取并反转。
"aaaBbbcccccccDaBBBzcc11211"
|> Seq.map string
|> Seq.fold (fun state ca ->
Some ca,
match state with
| Some cb, x::xs when ca = cb -> x + ca::xs
| _, xss -> ca::xss )
(None, [])
|> snd
|> List.rev
// val it : string list =
// ["aaa"; "B"; "bb"; "ccccccc"; "D"; "a"; "BBB"; "z"; "cc"; "11"; "2"; "11"]
答案 4 :(得分:0)
有趣的是为什么每个人都在发布基于匹配的解决方案?为什么不进行简单的递归?
let rec groups i (s:string) =
let rec next j = if j = s.Length || s.[i] <> s.[j] then j else next(j+1)
if i = s.Length then []
else let j = next i in s.Substring(i, j - i) :: (groups j s)
"aaaBbbcccccccDaBBBzcc11211" |> groups 0
val it : string list = ["aaa"; "B"; "bb"; "ccccccc"; "D"; "a"; "BBB"; "z"; "cc"; "11"; "2"; "11"]
答案 5 :(得分:0)
作为其他人在这里:
了解你的弃牌; - )
let someString = "aaaBbbcccccccDaBBBzcc11211"
let addLists state elem =
let (p, ls) = state
elem,
match p = elem, ls with
| _, [] -> [ elem.ToString() ]
| true, h :: t -> (elem.ToString() + h) :: t
| false, h :: t -> elem.ToString() :: ls
someString
|> Seq.fold addLists ((char)0, [])
|> snd
|> List.rev