FParsec - 解析器序列

时间:2013-07-21 01:52:38

标签: parsing f# fparsec

说我有一些文字:

a = "foobarbaz"
b = "foobar"
c = "foo"
d = "rubbish"
e = "foobazbar"

和三个解析器foo,bar和baz分别用于字符串'foo','bar'和'baz'。

我如何创建一个能够给我结果的解析器:

a = ["foo", "bar", "baz"]
b = ["foo", "bar"]
c = ["foo"]
d = []
e = ["foo"]

对上面的输入运行时?在构建列表时,基本上尝试每种可能性直到失败。我可以使用用户状态,但我想尽可能避免。 (我想让各个解析器本身不知道用户状态)

我得到的最接近的是下面的fooseq:

let foo = pstring "foo"
let bar = pstring "bar"
let baz = pstring "baz"

let foobar = pipe2 foo bar Seq.of2
let foobarbaz = pipe3 foo bar baz Seq.of3

let fooseq = choice (Seq.map attempt [foobarbaz; foobar; foo |>> Seq.of1 ;])

//(the Seq.ofx functions just take arguments and create a sequence of them)    

在我看来,必须有更好的方法来做到这一点?

1 个答案:

答案 0 :(得分:5)

FParsec没有内置的序列组合器,它可以完全满足您的需求,但您可以自己实现一个,如下例所示:

let mySeq (parsers: seq<Parser<'t,'u>>) : Parser<'t[],'u> =
  let ps = Array.ofSeq parsers
  if ps.Length = 0 then preturn [||]
  else
    fun stream ->   
      let mutable stateTag = stream.StateTag
      let mutable reply = ps.[0] stream
      let mutable error = reply.Error
      let mutable myReply = Reply()
      if reply.Status <> Ok then myReply.Result <- [||]
      else
        // create array to hold results
        let mutable xs = Array.zeroCreate ps.Length
        xs.[0] <- reply.Result
        let mutable i = 1
        while i < ps.Length do                    
          stateTag <- stream.StateTag
          reply <- ps.[i] stream
          error <- if stateTag <> stream.StateTag then reply.Error
                   else mergeErrors error reply.Error
          if reply.Status = Ok then
            xs.[i] <- reply.Result
            i <- i + 1
          else // truncate array and break loop         
            xs <- Array.sub xs 0 i
            i <- ps.Length 
        myReply.Result <- xs
      myReply.Status <- if reply.Status = Error && stateTag = stream.StateTag 
                        then Ok
                        else reply.Status
      myReply.Error <- error            
      myReply

使用mySeq组合器,您可以将fooSeq解析器表达为

let fooSeq = mySeq [foo; bar; baz]