前言:这可能是一个愚蠢穿制服的问题。
我用pyparsing库(以及堆栈溢出帖子的帮助)编写了一个语法,用括号,花括号和方括号解析嵌套表达式。我很好奇语法表中的作品会是什么样子。我想知道是否有办法为任意的pyparsing上下文无关语法自动生成这个。
作为参考,这里定义了pyparsing grammer:
def parse_nestings(string, only_curl=False):
r"""
References:
http://stackoverflow.com/questions/4801403/pyparsing-nested-mutiple-opener-clo
CommandLine:
python -m utool.util_gridsearch parse_nestings:1 --show
Example:
>>> from utool.util_gridsearch import * # NOQA
>>> import utool as ut
>>> string = r'lambda u: sign(u) * abs(u)**3.0 * greater(u, 0)'
>>> parsed_blocks = parse_nestings(string)
>>> recombined = recombine_nestings(parsed_blocks)
>>> print('PARSED_BLOCKS = ' + ut.repr3(parsed_blocks, nl=1))
>>> print('recombined = %r' % (recombined,))
>>> print('orig = %r' % (string,))
PARSED_BLOCKS = [
('nonNested', 'lambda u: sign'),
('paren', [('ITEM', '('), ('nonNested', 'u'), ('ITEM', ')')]),
('nonNested', '* abs'),
('paren', [('ITEM', '('), ('nonNested', 'u'), ('ITEM', ')')]),
('nonNested', '**3.0 * greater'),
('paren', [('ITEM', '('), ('nonNested', 'u, 0'), ('ITEM', ')')]),
]
Example:
>>> from utool.util_gridsearch import * # NOQA
>>> import utool as ut
>>> string = r'\chapter{Identification \textbf{foobar} workflow}\label{chap:application}'
>>> parsed_blocks = parse_nestings(string)
>>> print('PARSED_BLOCKS = ' + ut.repr3(parsed_blocks, nl=1))
PARSED_BLOCKS = [
('nonNested', '\\chapter'),
('curl', [('ITEM', '{'), ('nonNested', 'Identification \\textbf'), ('curl', [('ITEM', '{'), ('nonNested', 'foobar'), ('ITEM', '}')]), ('nonNested', 'workflow'), ('ITEM', '}')]),
('nonNested', '\\label'),
('curl', [('ITEM', '{'), ('nonNested', 'chap:application'), ('ITEM', '}')]),
]
"""
import utool as ut # NOQA
import pyparsing as pp
def as_tagged(parent, doctag=None):
"""Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
namedItems = dict((v[1], k) for (k, vlist) in parent._ParseResults__tokdict.items()
for v in vlist)
# collapse out indents if formatting is not desired
parentTag = None
if doctag is not None:
parentTag = doctag
else:
if parent._ParseResults__name:
parentTag = parent._ParseResults__name
if not parentTag:
parentTag = "ITEM"
out = []
for i, res in enumerate(parent._ParseResults__toklist):
if isinstance(res, pp.ParseResults):
if i in namedItems:
child = as_tagged(res, namedItems[i])
else:
child = as_tagged(res, None)
out.append(child)
else:
# individual token, see if there is a name for it
resTag = None
if i in namedItems:
resTag = namedItems[i]
if not resTag:
resTag = "ITEM"
child = (resTag, pp._ustr(res))
out += [child]
return (parentTag, out)
def combine_nested(opener, closer, content, name=None):
r"""
opener, closer, content = '(', ')', nest_body
"""
import utool as ut # NOQA
ret1 = pp.Forward()
_NEST = ut.identity
#_NEST = pp.Suppress
opener_ = _NEST(opener)
closer_ = _NEST(closer)
group = pp.Group(opener_ + pp.ZeroOrMore(content) + closer_)
ret2 = ret1 << group
if ret2 is None:
ret2 = ret1
else:
pass
#raise AssertionError('Weird pyparsing behavior. Comment this line if encountered. pp.__version__ = %r' % (pp.__version__,))
if name is None:
ret3 = ret2
else:
ret3 = ret2.setResultsName(name)
assert ret3 is not None, 'cannot have a None return'
return ret3
# Current Best Grammar
nest_body = pp.Forward()
nestedParens = combine_nested('(', ')', content=nest_body, name='paren')
nestedBrackets = combine_nested('[', ']', content=nest_body, name='brak')
nestedCurlies = combine_nested('{', '}', content=nest_body, name='curl')
nonBracePrintables = ''.join(c for c in pp.printables if c not in '(){}[]') + ' '
nonNested = pp.Word(nonBracePrintables).setResultsName('nonNested')
nonNested = nonNested.leaveWhitespace()
# if with_curl and not with_paren and not with_brak:
if only_curl:
# TODO figure out how to chain |
nest_body << (nonNested | nestedCurlies)
else:
nest_body << (nonNested | nestedParens | nestedBrackets | nestedCurlies)
nest_body = nest_body.leaveWhitespace()
parser = pp.ZeroOrMore(nest_body)
debug_ = ut.VERBOSE
if len(string) > 0:
tokens = parser.parseString(string)
if debug_:
print('string = %r' % (string,))
print('tokens List: ' + ut.repr3(tokens.asList()))
print('tokens XML: ' + tokens.asXML())
parsed_blocks = as_tagged(tokens)[1]
if debug_:
print('PARSED_BLOCKS = ' + ut.repr3(parsed_blocks, nl=1))
else:
parsed_blocks = []
return parsed_blocks