F#用牛津逗号连接字符串

时间:2019-04-06 11:36:01

标签: .net string collections f#

我想使用牛津(或串行)逗号将字符串集合合并为单个字符串。

给予

a

我想要

let ss = [ "a"; "b"; "c"; "d" ]

这就是我的想法。

"a, b, c, and d"

如何改善?

---编辑---
关于多次迭代序列的评论很重要。以下两种实现都避免了转换为数组。

如果我使用let oxford (strings: seq<string>) = let ss = Seq.toArray strings match ss.Length with | 0 -> "" | 1 -> ss.[0] | 2 -> sprintf "%s and %s" ss.[0] ss.[1] | _ -> let allButLast = ss.[0 .. ss.Length - 2] let commaSeparated = System.String.Join(", ", allButLast) sprintf "%s, and %s" commaSeparated (Seq.last ss) ,我会很喜欢:

seq

如果我使用open System.Linq let oxfordSeq (ss: seq<string>) = match ss.Count() with | 0 -> "" | 1 -> ss.First() | 2 -> sprintf "%s and %s" (ss.ElementAt(0)) (ss.ElementAt(1)) | _ -> let allButLast = ss.Take(ss.Count() - 1) let commaSeparated = System.String.Join(", ", allButLast) sprintf "%s, and %s" commaSeparated (ss.Last()) ,我还可以利用索引来避免Last()的迭代。

array

---编辑---
看到来自@CaringDev的链接,我认为这很好。没有通配符,处理空值,减少索引编制的正确性,并且仅在Join()方法中遍历数组一次。

let oxfordArray (ss: string[]) =
  match ss.Length with
  | 0 -> ""
  | 1 -> ss.[0]
  | 2 -> sprintf "%s and %s" ss.[0] ss.[1]
  | _ ->
    let allButLast = ss.[0 .. ss.Length - 2]
    let commaSeparated = System.String.Join(", ", allButLast)
    sprintf "%s, and %s" commaSeparated (ss.[ss.Length - 1]

这一次也很不错,跳跃的次数更少:

let oxford = function
    | null | [||] -> ""
    | [| a |] -> a
    | [| a; b |] -> sprintf "%s and %s" a b
    | ss ->
        let allButLast = System.ArraySegment(ss, 0, ss.Length - 1)
        let sb = System.Text.StringBuilder()
        System.String.Join(", ", allButLast) |> sb.Append |> ignore
        ", and " + ss.[ss.Length - 1] |> sb.Append |> ignore
        string sb

5 个答案:

答案 0 :(得分:5)

您可以直接查看列表,并在列表上使用模式匹配。也许可以对此进行改进,但是可以给出想法。

let rec oxford (s:string) (ss:string list) =
    match ss with
    | [] -> s
    | [x;y] -> sprintf "%s, %s, and %s" s x y
    | h::t when String.length s = 0 -> oxford h t
    | h::t -> oxford (sprintf "%s, %s" s h) t

它使用较小的列表递归地调用自己,并使用逗号。当列表只有2个大小时,它将使用when很不幸,但是发现我第一次打电话时需要一个空字符串,因此不要以,结尾。

编辑

因此,就我个人而言,我希望上面的选项适用于少量单词。但是,每次调用的字符串concat对于较大的数字而言效果都不理想。

// collect into a list including the *,* and *and*, then just concat that to string
let oxfordDrct (ss:string list) =
    let l = ss |> List.length
    let map i s = if(i < l-1) then [s;", "] else ["and ";s]        
    match ss with
    | [] -> ""
    | [x] -> x
    | [x;y] -> sprintf "%s, and %s" x y
    | _ -> ss |> List.mapi map |> List.concat |> String.concat ""

// Recursive like the original but instead pass a StringBuilder instead of string
let oxfordSb xs =        
    let rec collect (s:StringBuilder) (ss:string list) =
        match ss with
        | [] -> s
        | [x;y] -> sprintf ", %s, and %s" x y |> s.Append
        | h::t when s.Length = 0 -> collect (s.Append(h)) t
        | h::t -> collect (s.Append(sprintf ", %s" h)) t
    let sb = new StringBuilder()     
    (collect sb xs) |> string

这2个选项的性能与原始选项非常相似,所有这些选项均比recstring

答案 1 :(得分:3)

foldBack也有可能。通过这种方式连接字符串的性能并不出色,但是对于4个项目通常并不重要。

let oxfordify (ws : seq<string>) : string =
  // Folder concats the value and the aggregated result using seperator 0
  //  it updates the state with the new string and moves 
  //  seperator 1 into seperator 0 slot and set seperator 1 to ", "
  let folder v (r, s0, s1) = (v + s0 + r, s1, ", ")
  // The seperator 0 for first iteration is empty string (if it's only 1 value)
  // The seperator 1 is set to ", and " as the seperator between 2 last items
  // For all other items ", " will be used (see folder)
  let r, _, _ = Seq.foldBack folder ws ("", "", ", and ")
  r

同事向我指出了使用无限序列表示分隔符的方向:

let separators = 
  Seq.concat [| [|""; ", and "|] :> seq<_>; Seq.initInfinite (fun _ -> ", ") |]
let oxfordify (ws : seq<string>) : string = 
  Seq.fold2 (fun r v s -> v + s + r) "" (ws |> Seq.rev) separators 

要获得更高性能的选择,您可以考虑以下内容:

module Details =
  module Loops =
    let inline app (sb : System.Text.StringBuilder) (w : string) : unit =
      sb.Append w |> ignore
    let rec oxfordify sb (ws : _ array) i : string =
      if i < ws.Length then
        if i = 0 then 
          ()
        elif i = ws.Length - 1 then 
          app sb ", and "
        else 
          app sb ", "
        app sb ws.[i]
        oxfordify sb ws (i + 1)
      else
        sb.ToString ()
open Details

let oxfordify (ws : string array) : string = 
  let sb = System.Text.StringBuilder ()
  Loops.oxfordify sb ws 0

答案 2 :(得分:2)

我不太不同的方法。

let oxford (ss: string array) =
    match ss.Length with
    | 0 -> ""
    | 1 -> ss.[0]
    | 2 -> sprintf "%s and %s" ss.[0] ss.[1]
    | _ ->
       let cs = System.String.Join(", ", ss.[ 0 .. ss.Length - 2])
       sprintf "%s, and %s" cs (ss.[ss.Length - 1])

除了您将遍历序列多次(数组转换+字符串连接+ Seq.last)的事实之外,我的代码确实看不到任何错误。

这不是性能问题,因为我不希望在大于一位数字的序列上调用此函数,但是在偶然的情况下该序列会产生副作用或计算成本高昂,您将变得奇怪。这就是为什么我将输入切换到数组的原因。

就可读性而言,您无法取得比已有的更好的性能,显式枚举基本情况,并且最后一行中的sprintf额外的字符串分配无论如何都毫无关系(特别是与您将获得直接递归)。

答案 3 :(得分:1)

一种我尚未见过的方式:匹配列表的末尾。

let ox =
    List.rev >> function
    | [] -> ""
    | [x] -> x
    | [y; x] -> x + " and " + y
    | y::ys -> String.concat ", " (List.rev ("and " + y::ys))
// val ox : (string list -> string)

ox["a"; "b"; "c"; "d"]
// val it : string = "a, b, c, and d"

答案 4 :(得分:0)

另一个递归变量:

let rec oxford l =
    match l with
    | [] -> ""
    | [x] -> x
    | [x; y] -> x + " and " + y
    | head :: tail -> 
        head + ", " + oxford tail