任务是找到特定的键值对并解析它们。这些对可以以任何顺序发生。我的部分工作尝试:
open FParsec
type Parser<'a> = Parser<'a, unit>
type Status = Running | Done
type Job =
{ Id: int
Status: Status
Count: int }
let ws = spaces
let jobId: Parser<int> = ws >>. skipStringCI "Job id" >>. ws >>. skipChar '=' >>. ws >>. pint32
let status: Parser<Status> =
ws >>. skipStringCI "Status" >>. ws >>. skipChar '=' >>. ws >>. (
(skipStringCI "Running" >>% Running) <|> (skipStringCI "Done" >>% Done))
let count: Parser<int> = ws >>. skipStringCI "Count" >>. ws >>. skipChar '=' >>. ws >>. pint32
let parse: Parser<Job> = parse {
do! skipCharsTillStringCI "Job id" false 1000
let! id = jobId
do! skipCharsTillStringCI "Status" false 1000
let! status = status
do! skipCharsTillStringCI "Count" false 1000
let! count = count
return { Id = id; Status = status; Count = count }}
[<EntryPoint>]
let main argv =
let sample = """
Some irrelevant text.
Job id = 33
Some other text.
Status = Done
And another text.
Count = 10
Trailing text.
"""
printfn "%A" (run parse sample)
0
(*
result:
Success: {Id = 33;
Status = Done;
Count = 10;}
*)
所以,它有效,但它有两个问题:明显的重复(jobId函数中的“Job id”和顶级解析器中的“Job id”等),并且它需要“Job id”,“Status”和“计数”按此特定顺序排序,这是要求的错误。
我有一种强烈的感觉,就是有一个优雅的解决方案。
谢谢!
答案 0 :(得分:4)
第一个问题(重复)可以通过次要重构来解决。基本思想是将每个解析器包装到一个可以跳过的包装器中 注意,这段代码还远没有完美,我只是试着让重构尽可能小。
let jobId: Parser<int> = pint32
let status: Parser<Status> =
(skipStringCI "Running" >>% Running) <|> (skipStringCI "Done" >>% Done)
let count: Parser<int> = pint32
let skipAndParse prefix parser =
skipCharsTillStringCI prefix false 1000
>>. ws >>. skipStringCI prefix >>. ws >>. skipChar '=' >>. ws >>. parser
let parse: Parser<Job> = parse {
let! id = skipAndParse "Job id" jobId
let! status = skipAndParse "Status" status
let! count = skipAndParse "Count" count
return { Id = id; Status = status; Count = count }}
第二个问题更复杂。如果您希望数据行以自由顺序出现,则必须考虑
时的情况要缓解这种情况,您需要生成list
个数据行,分析是否存在所需的所有数据,并决定如何处理任何可能的重复数据。
请注意,每个数据行都不能再有“跳过”部分,因为它可能会在实际解析器之前跳过信息行。
let skipAndParse2 prefix parser =
ws >>. skipStringCI prefix >>. ws >>. skipChar '=' >>. ws >>. parser
// Here, you create a DU that will say which data line was found
type Result =
| Id of int
| Status of Status
| Count of int
| Irrelevant of string
// here's a combinator parser
let parse2 =
// list of possible data line parsers
// Note they are intentionally reordered
[
skipAndParse2 "Count" count |>> Count
skipAndParse2 "Status" status |>> Status
skipAndParse2 "Job id" jobId |>> Id
// the trailing one would skip a line in case if it has not
// been parsed by any of prior parsers
// a guard rule is needed because of specifics of
// restOfLine behavior at the end of input: namely, it would
// succeed without consuming an input, which leads
// to an infinite loop. Actually FParsec handles this and
// raises an exception
restOfLine true .>> notFollowedByEof |>> Irrelevant
]
|> List.map attempt // each parser is optional
|> choice // on each iteration, one of the parsers must succeed
|> many // a loop
运行代码:
let sample = "
Some irrelevant text.\n\
Job id = 33\n\
Some other text.\n\
Status = Done\n\
And another text.\n\
Count = 10\n\
Trailing text.\n\
"
sample |> run parse2 |> printfn "%A "
将产生以下输出:
Success: [Irrelevant ""; Irrelevant "Some irrelevant text."; Id 33;
Irrelevant ""; Irrelevant "Some other text."; Status Done; Irrelevant "";
Irrelevant "And another text."; Count 10; Irrelevant ""]
进一步处理需要过滤Irrelevant
元素,检查重复项或缺少项目,以及形成Job
记录或引发错误。
更新:进一步处理隐藏Result
并返回Job option
的简单示例:
// naive implementation of the record maker
// return Job option
// ignores duplicate fields (uses the first one)
// returns None if any field is missing
let MakeJob arguments =
let a' =
arguments
|> List.filter (function |Irrelevant _ -> false | _ -> true)
try
let theId = a' |> List.pick (function |Id x -> Some x | _ -> None)
let theStatus = a' |> List.pick (function |Status x -> Some x | _ -> None)
let theCount = a' |> List.pick (function |Count x -> Some x | _ -> None)
Some { Id=theId; Status = theStatus; Count = theCount }
with
| :?System.Collections.Generic.KeyNotFoundException -> None
要使用它,只需将以下行添加到parse2
的代码中:
|>> MakeJob