是否可以转储pyparsing对象的EBNF / BNF语法表?

时间:2016-09-14 19:26:05

标签: parsing pyparsing

前言:这可能是一个愚蠢穿制服的问题。

我用pyparsing库(以及堆栈溢出帖子的帮助)编写了一个语法,用括号,花括号和方括号解析嵌套表达式。我很好奇语法表中的作品会是什么样子。我想知道是否有办法为任意的pyparsing上下文无关语法自动生成这个。

作为参考,这里定义了pyparsing grammer:

def parse_nestings(string, only_curl=False):
    r"""
    References:
        http://stackoverflow.com/questions/4801403/pyparsing-nested-mutiple-opener-clo

    CommandLine:
        python -m utool.util_gridsearch parse_nestings:1 --show

    Example:
        >>> from utool.util_gridsearch import *  # NOQA
        >>> import utool as ut
        >>> string = r'lambda u: sign(u) * abs(u)**3.0 * greater(u, 0)'
        >>> parsed_blocks = parse_nestings(string)
        >>> recombined = recombine_nestings(parsed_blocks)
        >>> print('PARSED_BLOCKS = ' + ut.repr3(parsed_blocks, nl=1))
        >>> print('recombined = %r' % (recombined,))
        >>> print('orig       = %r' % (string,))
        PARSED_BLOCKS = [
            ('nonNested', 'lambda u: sign'),
            ('paren', [('ITEM', '('), ('nonNested', 'u'), ('ITEM', ')')]),
            ('nonNested', '* abs'),
            ('paren', [('ITEM', '('), ('nonNested', 'u'), ('ITEM', ')')]),
            ('nonNested', '**3.0 * greater'),
            ('paren', [('ITEM', '('), ('nonNested', 'u, 0'), ('ITEM', ')')]),
        ]

    Example:
        >>> from utool.util_gridsearch import *  # NOQA
        >>> import utool as ut
        >>> string = r'\chapter{Identification \textbf{foobar} workflow}\label{chap:application}'
        >>> parsed_blocks = parse_nestings(string)
        >>> print('PARSED_BLOCKS = ' + ut.repr3(parsed_blocks, nl=1))
        PARSED_BLOCKS = [
            ('nonNested', '\\chapter'),
            ('curl', [('ITEM', '{'), ('nonNested', 'Identification \\textbf'), ('curl', [('ITEM', '{'), ('nonNested', 'foobar'), ('ITEM', '}')]), ('nonNested', 'workflow'), ('ITEM', '}')]),
            ('nonNested', '\\label'),
            ('curl', [('ITEM', '{'), ('nonNested', 'chap:application'), ('ITEM', '}')]),
        ]
    """
    import utool as ut  # NOQA
    import pyparsing as pp

    def as_tagged(parent, doctag=None):
        """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
        namedItems = dict((v[1], k) for (k, vlist) in parent._ParseResults__tokdict.items()
                          for v in vlist)
        # collapse out indents if formatting is not desired
        parentTag = None
        if doctag is not None:
            parentTag = doctag
        else:
            if parent._ParseResults__name:
                parentTag = parent._ParseResults__name
        if not parentTag:
            parentTag = "ITEM"
        out = []
        for i, res in enumerate(parent._ParseResults__toklist):
            if isinstance(res, pp.ParseResults):
                if i in namedItems:
                    child = as_tagged(res, namedItems[i])
                else:
                    child = as_tagged(res, None)
                out.append(child)
            else:
                # individual token, see if there is a name for it
                resTag = None
                if i in namedItems:
                    resTag = namedItems[i]
                if not resTag:
                    resTag = "ITEM"
                child = (resTag, pp._ustr(res))
                out += [child]
        return (parentTag, out)

    def combine_nested(opener, closer, content, name=None):
        r"""
        opener, closer, content = '(', ')', nest_body
        """
        import utool as ut  # NOQA
        ret1 = pp.Forward()
        _NEST = ut.identity
        #_NEST = pp.Suppress
        opener_ = _NEST(opener)
        closer_ = _NEST(closer)
        group = pp.Group(opener_ + pp.ZeroOrMore(content) + closer_)
        ret2 = ret1 << group
        if ret2 is None:
            ret2 = ret1
        else:
            pass
            #raise AssertionError('Weird pyparsing behavior. Comment this line if encountered. pp.__version__ = %r' % (pp.__version__,))
        if name is None:
            ret3 = ret2
        else:
            ret3 = ret2.setResultsName(name)
        assert ret3 is not None, 'cannot have a None return'
        return ret3

    # Current Best Grammar
    nest_body = pp.Forward()
    nestedParens   = combine_nested('(', ')', content=nest_body, name='paren')
    nestedBrackets = combine_nested('[', ']', content=nest_body, name='brak')
    nestedCurlies  = combine_nested('{', '}', content=nest_body, name='curl')

    nonBracePrintables = ''.join(c for c in pp.printables if c not in '(){}[]') + ' '
    nonNested = pp.Word(nonBracePrintables).setResultsName('nonNested')
    nonNested = nonNested.leaveWhitespace()

    # if with_curl and not with_paren and not with_brak:
    if only_curl:
        # TODO figure out how to chain |
        nest_body << (nonNested | nestedCurlies)
    else:
        nest_body << (nonNested | nestedParens | nestedBrackets | nestedCurlies)

    nest_body = nest_body.leaveWhitespace()
    parser = pp.ZeroOrMore(nest_body)

    debug_ = ut.VERBOSE

    if len(string) > 0:
        tokens = parser.parseString(string)
        if debug_:
            print('string = %r' % (string,))
            print('tokens List: ' + ut.repr3(tokens.asList()))
            print('tokens XML: ' + tokens.asXML())
        parsed_blocks = as_tagged(tokens)[1]
        if debug_:
            print('PARSED_BLOCKS = ' + ut.repr3(parsed_blocks, nl=1))
    else:
        parsed_blocks = []
    return parsed_blocks

0 个答案:

没有答案