Python-Rply将多个不同的规则分配给多个不同的函数

时间:2019-04-07 19:04:30

标签: python parsing ply

假设我有一个像这样的python-rply代码(摘自here):

from rply import ParserGenerator, LexerGenerator
from rply.token import BaseBox

lg = LexerGenerator()
# Add takes a rule name, and a regular expression that defines the rule.
lg.add("PLUS", r"\+")
lg.add("MINUS", r"-")
lg.add("NUMBER", r"\d+")

lg.ignore(r"\s+")

# This is a list of the token names. precedence is an optional list of
# tuples which specifies order of operation for avoiding ambiguity.
# precedence must be one of "left", "right", "nonassoc".
# cache_id is an optional string which specifies an ID to use for
# caching. It should *always* be safe to use caching,
# RPly will automatically detect when your grammar is
# changed and refresh the cache for you.
pg = ParserGenerator(["NUMBER", "PLUS", "MINUS"],
        precedence=[("left", ['PLUS', 'MINUS'])], cache_id="myparser")

@pg.production("main : expr")
def main(p):
    # p is a list, of each of the pieces on the right hand side of the
    # grammar rule
    return p[0]

@pg.production("expr : expr PLUS expr")
@pg.production("expr : expr MINUS expr")
def expr_op(p):
    lhs = p[0].getint()
    rhs = p[2].getint()
    if p[1].gettokentype() == "PLUS":
        return BoxInt(lhs + rhs)
    elif p[1].gettokentype() == "MINUS":
        return BoxInt(lhs - rhs)
    else:
        raise AssertionError("This is impossible, abort the time machine!")

@pg.production("expr : NUMBER")
def expr_num(p):
    return BoxInt(int(p[0].getstr()))

lexer = lg.build()
parser = pg.build()

class BoxInt(BaseBox):
    def __init__(self, value):
        self.value = value

    def getint(self):
        return self.value

这是一个简单的代码,因此当您键入以下内容时:

parser.parse(lexer.lex("1 + 3"))

它将执行,为您提供4作为输出和答案。这是一个有效的代码,但仍需要改进。调用@pg.production进行加法和减法的代码部分效率不高;我的意思是,如果您要添加更多的运算符,它将变得非常局促。有没有一种好的方法可以使该零件的非狭窄版本看起来像这样:

@pg.production("expr : expr PLUS expr")
def plus(p):
    lhs = p[0].getint()
    rhs = p[2].getint()
    if p[1].gettokentype() == "PLUS":
        return BoxInt(lhs + rhs)
    else:
        raise AssertionError("This is impossible, abort the time machine!")

@pg.production("expr : expr MINUS expr")
def minus(p):
    lhs = p[0].getint()
    rhs = p[2].getint()

    if p[1].gettokentype() == "MINUS":
        return BoxInt(lhs - rhs)
    else:
        raise AssertionError("This is impossible, abort the time machine!")

注意:我使用的是rply,而不是ply,但是它们非常相似。

1 个答案:

答案 0 :(得分:1)

如果您拆分功能以使每个产品都有其自己的功能-这实际上是最佳实践-那么,绝对没有必要检查操作符的令牌类型。您知道这是什么原因,因为解析器的逻辑意味着仅在与生产匹配时调用该函数。

因此,您可以编写紧凑的代码:

[  
    {  
        addresses=address,
        bestName=null,
        reasonListed=null,
        countryDetails=country,
        matchReAlert=null,
        phones=null,
        resultDate=0,
        acceptListID=acceptlist,
        bestNameScore=0,
        error=null,
        bestCountry=null,
        trueMatch=null,
        doBs=null,
        file={  
            published=3123123213,
            build=111,
            name=file1,
            id=456,
            type=txt,
            custom=true
        },
        entityDetails={  
            akAs=null,
            addresses={  
                entityAddress=[  
                    {  
                        stateProvinceDistrict=null,
                        country=India,
                        comments=null,
                        city=Hyderabad,
                        postalCode=500001,
                        street1=null,
                        id=0,
                        street2=null,
                        type=null
                    }
                ]
            },
            dateListed=null,
            comments=null,
            gender=MALE,
            listReferenceNumber=null,
            reasonListed=reason,
            entityType=IND,
            additionalInfo={  
                entityAdditionalInfo=[  
                    {  
                        comments=null,
                        id=0,
                        type=DOB,
                        value=12-12-1989
                    }
                ]
            },
            name=null,
            iDs=null,
            phones=null
        },
        entityName=In,
        falsePositive=null,
        gatewayOFACScreeningIndicatorMatch=null,
        previousResultID=null,
        conflicts=null,
        iDs=null,
        entityScore=0,
        id=456,
        addedToAcceptList=true,
        matchXML=null,
        secondaryOFACScreeningIndicatorMatch=null,
        entityUniqueID=null,
        autoFalsePositive=null,
        bestCountryScore=null,
        citizenships=null,
        checkSum=0,
        addressName=true,
        ofacInfo=null,
        bestAddressIsPartial=null,
        bestCountryType=null
    },
    {  
        addresses=address,
        bestName=null,
        reasonListed=null,
        countryDetails=country,
        matchReAlert=null,
        phones=null,
        resultDate=0,
        acceptListID=acceptlist,
        bestNameScore=0,
        error=null,
        bestCountry=null,
        trueMatch=null,
        doBs=null,
        file={  
            published=3123123213,
            build=111,
            name=file1,
            id=789,
            type=txt,
            custom=true
        },
        entityDetails={  
            akAs=null,
            addresses={  
                entityAddress=[  
                    {  
                        stateProvinceDistrict=null,
                        country=India,
                        comments=null,
                        city=Hyderabad,
                        postalCode=500001,
                        street1=null,
                        id=0,
                        street2=null,
                        type=null
                    }
                ]
            },
            dateListed=null,
            comments=null,
            gender=MALE,
            listReferenceNumber=null,
            reasonListed=reason,
            entityType=IND,
            additionalInfo={  
                entityAdditionalInfo=[  
                    {  
                        comments=null,
                        id=0,
                        type=DOB,
                        value=12-12-1989
                    }
                ]
            },
            name=null,
            iDs=null,
            phones=null
        },
        entityName=In,
        falsePositive=null,
        gatewayOFACScreeningIndicatorMatch=null,
        previousResultID=null,
        conflicts=null,
        iDs=null,
        entityScore=0,
        id=789,
        addedToAcceptList=true,
        matchXML=null,
        secondaryOFACScreeningIndicatorMatch=null,
        entityUniqueID=null,
        autoFalsePositive=null,
        bestCountryScore=null,
        citizenships=null,
        checkSum=0,
        addressName=true,
        ofacInfo=null,
        bestAddressIsPartial=null,
        bestCountryType=null
    },
    {  
        addresses=address,
        bestName=null,
        reasonListed=null,
        countryDetails=country,
        matchReAlert=null,
        phones=null,
        resultDate=0,
        acceptListID=acceptlist,
        bestNameScore=0,
        error=null,
        bestCountry=null,
        trueMatch=null,
        doBs=null,
        file={  
            published=3123123213,
            build=111,
            name=file1,
            id=123,
            type=txt,
            custom=true
        },
        entityDetails={  
            akAs=null,
            addresses={  
                entityAddress=[  
                    {  
                        stateProvinceDistrict=null,
                        country=India,
                        comments=null,
                        city=Hyderabad,
                        postalCode=500001,
                        street1=null,
                        id=0,
                        street2=null,
                        type=null
                    }
                ]
            },
            dateListed=null,
            comments=null,
            gender=MALE,
            listReferenceNumber=null,
            reasonListed=reason,
            entityType=IND,
            additionalInfo={  
                entityAdditionalInfo=[  
                    {  
                        comments=null,
                        id=0,
                        type=DOB,
                        value=12-12-1989
                    }
                ]
            },
            name=null,
            iDs=null,
            phones=null
        },
        entityName=In,
        falsePositive=null,
        gatewayOFACScreeningIndicatorMatch=null,
        previousResultID=null,
        conflicts=null,
        iDs=null,
        entityScore=0,
        id=123,
        addedToAcceptList=true,
        matchXML=null,
        secondaryOFACScreeningIndicatorMatch=null,
        entityUniqueID=null,
        autoFalsePositive=null,
        bestCountryScore=null,
        citizenships=null,
        checkSum=0,
        addressName=true,
        ofacInfo=null,
        bestAddressIsPartial=null,
        bestCountryType=null
    }
]