在F#中切片/分组一系列相等的字符

时间:2016-01-28 00:45:01

标签: f# functional-programming

我需要在文本中提取相等字符的序列。

例如: 字符串"aaaBbbcccccccDaBBBzcc11211"应该转换为字符串列表 ["aaa";"B";"bb";"ccccccc";"D";"a";"BBB";"z";"cc";"11";"2";"11"]

直到现在,这是我的解决方案:

let groupSequences (text:string) = 

    let toString chars =
        System.String(chars |> Array.ofList)

    let rec groupSequencesRecursive acc chars = seq {
        match (acc, chars) with
        | [], c :: rest -> 
            yield! groupSequencesRecursive [c] rest
        | _, c :: rest when acc.[0] <> c -> 
            yield (toString acc)
            yield! groupSequencesRecursive [c] rest
        | _, c :: rest when acc.[0] = c -> 
            yield! groupSequencesRecursive (c :: acc) rest
        | _, [] -> 
            yield (toString acc)
        | _ -> 
            yield ""
    }

    text
    |> List.ofSeq
    |> groupSequencesRecursive []

groupSequences "aaaBbbcccccccDaBBBzcc11211"
|> Seq.iter (fun x -> printfn "%s" x)
|> ignore

我是F#新手。

这个解决方案可以更好吗?

6 个答案:

答案 0 :(得分:10)

这是一个完全通用实现:

let group xs =
    let folder x = function
        | [] -> [[x]]
        | (h::t)::ta when h = x -> (x::h::t)::ta
        | acc -> [x]::acc
    Seq.foldBack folder xs []

此函数具有类型seq<'a> -> 'a list list when 'a : equality,因此不仅适用于字符串,而且适用于任何(有限)元素序列,只要元素类型支持相等比较。

与OP中的输入字符串一起使用时,返回值不是完全的预期形状:

> group "aaaBbbcccccccDaBBBzcc11211";;
val it : char list list =
  [['a'; 'a'; 'a']; ['B']; ['b'; 'b']; ['c'; 'c'; 'c'; 'c'; 'c'; 'c'; 'c'];
   ['D']; ['a']; ['B'; 'B'; 'B']; ['z']; ['c'; 'c']; ['1'; '1']; ['2'];
   ['1'; '1']]

而不是string list,返回值为char list list。您可以使用map

轻松将其转换为字符串列表
> group "aaaBbbcccccccDaBBBzcc11211" |> List.map (List.toArray >> System.String);;
val it : System.String list =
  ["aaa"; "B"; "bb"; "ccccccc"; "D"; "a"; "BBB"; "z"; "cc"; "11"; "2"; "11"]

这利用了以String作为输入的char[]构造函数重载。

如最初所述,此实现是通用的,因此也可以与其他类型的列表一起使用;例如整数:

> group [1;1;2;2;2;3;4;4;3;3;3;0];;
val it : int list list = [[1; 1]; [2; 2; 2]; [3]; [4; 4]; [3; 3; 3]; [0]]

答案 1 :(得分:2)

groupby怎么样

"aaaBbbcccccccD"
|> Seq.groupBy id
|> Seq.map (snd >> Seq.toArray)
|> Seq.map (fun t -> new string (t))

如果您输入订单事项,这是一个有效的方法

"aaaBbbcccccccDaBBBzcc11211"
|> Seq.pairwise
|> Seq.toArray
|> Array.rev
|> Array.fold (fun (accum::tail) (ca,cb) -> if ca=cb then System.String.Concat(accum,string ca)::tail else string(ca)::accum::tail)   (""::[])

答案 2 :(得分:0)

这个也是基于递归,虽然匹配可以通过较少数量的检查来消除。

let chop (txt:string) =
    let rec chopInner txtArr (word: char[]) (res: List<string>) =
        match txtArr with
        | h::t when word.[0] = h -> chopInner t (Array.append word [|h|]) res
        | h::t when word.[0] <> h -> 
            let newWord = word |> (fun s -> System.String s)
            chopInner t [|h|] (List.append res [newWord])
        | [] -> 
            let newWord = word |> (fun s -> System.String s)
            (List.append res [newWord])

    let lst = txt.ToCharArray() |> Array.toList 
    chopInner lst.Tail [|lst.Head|] []

结果如预期:

val text : string = "aaaBbbcccccccDaBBBzcc11211"
> chop text;;
val it : string list =
  ["aaa"; "B"; "bb"; "ccccccc"; "D"; "a"; "BBB"; "z"; "cc"; "11"; "2"; "11"]

答案 3 :(得分:0)

当你折叠时,你需要携带前一个值和持有临时结果的累加器。先前的值被包装为第一次迭代的帐户选项。之后,最终结果被提取并反转。

"aaaBbbcccccccDaBBBzcc11211"
|> Seq.map string
|> Seq.fold (fun state ca ->
    Some ca,
    match state with
    | Some cb, x::xs when ca = cb -> x + ca::xs
    | _, xss ->                      ca::xss )
    (None, [])
|> snd 
|> List.rev
// val it : string list =
//   ["aaa"; "B"; "bb"; "ccccccc"; "D"; "a"; "BBB"; "z"; "cc"; "11"; "2"; "11"]

答案 4 :(得分:0)

有趣的是为什么每个人都在发布基于匹配的解决方案?为什么不进行简单的递归?

let rec groups i (s:string) =
  let rec next j = if j = s.Length || s.[i] <> s.[j] then j else next(j+1)
  if i = s.Length then []
  else let j = next i in s.Substring(i, j - i) :: (groups j s)

"aaaBbbcccccccDaBBBzcc11211" |> groups 0
val it : string list = ["aaa"; "B"; "bb"; "ccccccc"; "D"; "a"; "BBB"; "z"; "cc"; "11"; "2"; "11"]

答案 5 :(得分:0)

作为其他人在这里:

了解你的弃牌; - )

let someString = "aaaBbbcccccccDaBBBzcc11211"

let addLists state elem = 
    let (p, ls) = state
    elem, 
    match p = elem, ls with
    | _, [] -> [ elem.ToString() ]
    | true, h :: t -> (elem.ToString() + h) :: t
    | false, h :: t -> elem.ToString() :: ls

someString
|> Seq.fold addLists ((char)0, [])
|> snd
|> List.rev