'sepEndBy'无法捕获是否包含在'之间'

时间:2014-01-02 10:54:14

标签: f# fparsec

我想解析以下文字:

 WHERE
 ( AND
       ApplicationGroup.REFSTR = 5
       BV_1.Year = 2009
       BV_1.MonetaryCodeId = 'Commited'
       BV_3.Year = 2009
       BV_3.MonetaryCodeId = 'Commited'
       BV_4.Year = 2009
       BV_4.MonetaryCodeId = 'Commited
 )

我开始使用组合器获取条件列表:

let multiConditionWhereList : Parser<WhereCondition list, unit> =
        sepEndBy1 (ws >>. whereCondition) (newline)
        <?> "where condition list"

当我交出where-statement的条件列表(每行带一个=)时,我会在其Result中找到一个回复​​七个WhereConditions。状态为“好”。但错误列表包含“预期的换行符”ErrorMessage。

但每当我尝试使用以下形状的组合器在开头用oparator解析这种用圆括号包裹的列表时:

let multiConditionWhereClause : Parser<WhereStatement, unit> =
        pstringCI "where"
        .>> spaces 
        >>. between (pchar '(') (pchar ')') 
                    ( ws  >>. whereChainOperator .>> spaces1
                      .>>. multiConditionWhereList )
        |>> (fun (chainOp, conds) -> { Operator = chainOp; 
                                       SearchConditions = conds } )

我收到状态“错误”的回复。但是Error-List是空的以及结果。

所以我有点卡在这一点上。首先我不明白,为什么我的 multiConditionWhereList 中的 sepByEnd1 组合器会产生一个非空的错误列表,并期望最后的换行符。更重要的是,当我将它包装在一个中间语句中时,我不明白为什么没有捕获列表。

作为参考,我包括整套规则以及导致问题的规则的调用:

#light

#r "System.Xml.Linq.dll"
#r @"..\packages\FParsec.1.0.1\lib\net40-client\FParsecCS.dll"
#r @"..\packages\FParsec.1.0.1\lib\net40-client\FParsec.dll"

module Ast =    
    open System
    open System.Xml.Linq

    type AlfabetParseError (msg: string) =
              inherit Exception (msg)

    type FindStatement = 
            { TableReferences: TableReferences;}

    and TableReferences = 
            { PrimaryTableReference: TableReferenceWithAlias; JoinTableReferences: JoinTableReference list; }

    and TableReferenceWithAlias = 
            { Name: string; Alias: string }

    and JoinTableReference = 
            { JoinType:JoinType; TableReference: TableReferenceWithAlias; JoinCondition: JoinCondition; }

    and JoinType =
            | InnerJoin
            | OuterJoin
            | LeftJoin
            | RightJoin

    and JoinCondition = 
            { LeftHandSide: FieldReference; RightHandSide: FieldReference; }

    and WhereStatement = 
            { Operator: WhereOperator; SearchConditions: WhereCondition list }

    and WhereOperator = 
            | And
            | Or
            | Equal
            | Is
            | IsNot
            | Contains
            | Like
            | NoOp
    and WhereLeftHandSide =
            | FieldReferenceLH of FieldReference

    and WhereRightHandSide =
            | FieldReferenceRH of FieldReference
            | VariableReferenceRH of VariableReference
            | LiteralRH of Literal

    and WhereCondition =
            { LeftHandSide: WhereLeftHandSide; Operator: WhereOperator; RightHandSide: WhereRightHandSide; }

    and FieldReference =
            { FieldName: Identifier; TableName: Identifier }

    and VariableReference =
            { VariableName : Identifier; }

    and Literal = 
            | Str of string
            | Int of int
            | Hex of int
            | Bin of int
            | Float of float
            | Null 

    and Identifier = 
              Identifier of string  

    and QueryXml =
            { Doc : XDocument }  

module AlfabetQueryParser =
    open Ast
    open FParsec
    open System
    open System.Xml.Linq

    module Parsers =

        (* Utilities *)
        let toJoinType (str:string) = 
            match str.ToLowerInvariant() with
            | "innerjoin" -> InnerJoin
            | "outerjoin" -> OuterJoin
            | "leftjoin"  -> LeftJoin
            | "rightjoin" -> RightJoin
            | _           -> raise <| AlfabetParseError "Invalid join type"

        let toWhereOperator (str:string) = 
            match str.ToLowerInvariant() with
            | "and"       -> And
            | "or"        -> Or
            | "="         -> Equal
            | "is"        -> Is
            | "is not"    -> IsNot
            | "contains"  -> Contains
            | "like"      -> Like
            | _           -> raise <| AlfabetParseError "Invalid where operator type"

        (* Parsers *)
        let ws : Parser<string, unit> =
            manyChars (satisfy (fun c -> c = ' '))

        let ws1 : Parser<string, unit> =
            many1Chars (satisfy (fun c -> c = ' '))

        let identifier : Parser<string, unit> = 
            many1Chars (satisfy (fun(c) -> isDigit(c) || isAsciiLetter(c) || c.Equals('_')))

        let fieldReference : Parser<FieldReference, unit> =
            identifier 
            .>> pstring "." 
            .>>. identifier
            |>> (fun (tname, fname) -> {FieldName = Identifier(fname); 
                                        TableName = Identifier(tname) })

        let variableReference : Parser<VariableReference, unit> =
            pstring ":"
            >>. identifier
            |>> (fun vname -> { VariableName = Identifier(vname) })

        let numeralOrDecimal : Parser<Literal, unit> =
            numberLiteral NumberLiteralOptions.AllowFraction "number" 
            |>> fun num -> 
                    if num.IsInteger then Int(int num.String)
                    else Float(float num.String)

        let hexNumber : Parser<Literal, unit> =    
            pstring "#x" >>. many1SatisfyL isHex "hex digit"
            |>> fun hexStr -> 
                    Hex(System.Convert.ToInt32(hexStr, 16)) 

        let binaryNumber : Parser<Literal, unit> =    
            pstring "#b" >>. many1SatisfyL (fun c -> c = '0' || c = '1') "binary digit"
            |>> fun hexStr -> 
                    Bin(System.Convert.ToInt32(hexStr, 2))

        let numberLiteral : Parser<Literal, unit> =
            choiceL [numeralOrDecimal
                     hexNumber
                     binaryNumber]
                    "number literal"

        let strEscape = 
            pchar '\\' >>. pchar '\''

        let strInnard = 
            strEscape <|> noneOf "\'"

        let strInnards = 
            manyChars strInnard

        let strLiteral =  
            between (pchar '\'') (pchar '\'') strInnards
            |>> Str

        let literal : Parser<Literal, unit> = 
                (pstringCI "null" |>> (fun str -> Null))
            <|> numberLiteral
            <|> strLiteral

        let joinCondition : Parser<JoinCondition, unit> =
            spaces .>> pstring "ON" .>> spaces
            >>. fieldReference
            .>> spaces .>> pstring "=" .>> spaces
            .>>. fieldReference
            |>> (fun(lhs, rhs) -> { LeftHandSide = lhs; RightHandSide = rhs })

        let tableReferenceWithoutAlias : Parser<TableReferenceWithAlias, unit> =
            identifier
            |>> (fun (name) -> { Name = name; Alias = ""})

        let tableReferenceWithAlias : Parser<TableReferenceWithAlias, unit> =
            identifier
            .>> spaces .>> pstringCI "as" .>> spaces 
            .>>. identifier
            |>> (fun (name, alias) -> { Name = name; Alias = alias})

        let primaryTableReference : Parser<TableReferenceWithAlias, unit> =
            attempt tableReferenceWithAlias <|> tableReferenceWithoutAlias

        let joinTableReference : Parser<JoinTableReference, unit> =
            identifier
            .>> spaces 
            .>>. (attempt tableReferenceWithAlias <|> tableReferenceWithoutAlias)
            .>> spaces
            .>>. joinCondition
            |>> (fun ((joinTypeStr, tableRef), condition) -> { JoinType = toJoinType(joinTypeStr);
                                                               TableReference = tableRef; 
                                                               JoinCondition = condition } )

        let tableReferences : Parser<TableReferences, unit> =
            primaryTableReference
            .>> spaces
            .>>. many (joinTableReference .>> spaces)
            |>> (fun (pri, joinTables) -> { PrimaryTableReference = pri; 
                                            JoinTableReferences = joinTables; } )

        let whereConditionOperator : Parser<WhereOperator, unit> =
            choice [
                pstringCI "="
              ; pstringCI "is not"
              ; pstringCI "is"
              ; pstringCI "contains"
              ; pstringCI "like"
            ]
            |>> toWhereOperator


        let whereChainOperator : Parser<WhereOperator, unit> = 
            choice [
                pstringCI "and"
            ;   pstringCI "or"
            ]
            |>> toWhereOperator

        let whereCondition : Parser<WhereCondition, unit> =

            let leftHandSide : Parser<WhereLeftHandSide, unit> =
                fieldReference |>> FieldReferenceLH

            let rightHandSide : Parser<WhereRightHandSide, unit> =
                    (attempt fieldReference |>> FieldReferenceRH)  
                <|> (attempt variableReference |>> VariableReferenceRH)
                <|> (literal |>> LiteralRH)

            leftHandSide
            .>> ws1 .>>. whereConditionOperator .>> ws1
            .>>. rightHandSide
            |>> (fun((lhs, op), rhs) -> { LeftHandSide = lhs; 
                                          Operator = op; 
                                          RightHandSide = rhs })

        let singleConditionWhereClause : Parser<WhereStatement, unit> =
            pstringCI "where" .>> spaces
            >>. whereCondition
            |>> (fun (cond) -> { Operator = NoOp;
                                 SearchConditions = [ cond ] } );

        let multiConditionChainOperator : Parser<WhereOperator, unit> =
            pstring "(" .>> spaces >>. whereChainOperator .>> spaces
            <?> "where multi-condition operator"

        let multiConditionWhereList : Parser<WhereCondition list, unit> =
            sepEndBy1 (ws >>. whereCondition) (newline)
            <?> "where condition list"

        let multiConditionWhereClause : Parser<WhereStatement, unit> =
            pstringCI "where"
            .>> spaces 
            >>. between (pchar '(') (pchar ')') 
                        ( ws  >>. whereChainOperator .>> spaces1
                          .>>. multiConditionWhereList )
            |>> (fun (chainOp, conds) -> { Operator = chainOp; 
                                           SearchConditions = conds } )

        let whereClause : Parser<WhereStatement, unit> =
            (attempt multiConditionWhereClause)
            <|> singleConditionWhereClause

        let findStatement : Parser<FindStatement, unit> =
            spaces .>> pstringCI "find" .>> spaces
            >>. tableReferences
            |>> (fun (tableRef) -> { TableReferences = tableRef; } )

        let queryXml : Parser<QueryXml, unit> = 
            pstringCI "QUERY_XML" .>> newline
            >>. manyCharsTill anyChar eof
            |>> (fun (xmlStr) -> { Doc = XDocument.Parse(xmlStr) } )

    let parse input =  
        match run Parsers.findStatement input with
        | Success (x, _, _) -> x
        | Failure (x, _, _) -> raise <|  AlfabetParseError x


open FParsec

let input = @"WHERE
            ( AND
                ApplicationGroup.REFSTR CONTAINS  :BASE
                BV_1.Year = 2009
                BV_1.MonetaryCodeId = 'Commited'
                BV_3.Year = 2009
                BV_3.MonetaryCodeId = 'Commited'
                BV_4.Year = 2009
                BV_4.MonetaryCodeId = 'Commited'
            )"

let r = run AlfabetQueryParser.Parsers.multiConditionWhereClause input

1 个答案:

答案 0 :(得分:4)

FParsec无法为您的示例生成更多有用的错误消息的原因是您已使用satisfy原语定义了wsid解析器。由于您只指定了谓词函数,因此FParsec不知道如何描述预期的输入。 User's Guide解释了这个问题以及如何避免这个问题。在您的代码中,您可以例如使用satisfyLmany1SatisfyL作为定义。

修复wsid解析器之后,您会很快发现您的代码没有正确解析列表,因为空格分析是混乱的。在可能的情况下,您应始终将空格分析为尾随空格,而不是前导空格,因为这样可以避免回溯的需要。要根据上面给出的输入修复解析器,您可以例如替换

sepEndBy1 (ws >>. whereCondition) (newline)

sepEndBy1 (whereCondition .>> ws) (newline >>. ws)

multiConditionWhereList的定义中。

请注意,非空错误消息列表不一定意味着错误,因为FParsec通常会收集在流中当前位置应用的所有解析器的错误消息,即使解析器是“可选的” 。这可能是您看到“预期换行符”的原因,因为在该位置会接受换行符。