我正在开发一个名为medly
的编译器,我正在使用ply,一个像lex & yacc
一样工作的工具,但我的语法有些问题。
这是词法分析员:
# ------------------------------------------------------------
# Lexico.py
# ------------------------------------------------------------
import ply.lex as lex
reserved = {
'medly' : 'MEDLY',
'main' : 'MAIN',
'print' : 'PRINT',
'if' : 'IF',
# 'else' : 'ELSE',
# 'int' : 'INT',
# 'float' : 'FLOAT',
# 'var' : 'VAR',
'play' : 'PLAY',
'repeat' : 'REPEAT',
'swap' : 'SWAP',
'aug' : 'AUG',
# 'true' : 'TRUE',
# 'false' : 'FALSE',
'wind' : 'WIND',
'lira' : 'LIRA',
'key' : 'KEY',
'type' : 'TYPE',
'len' : 'LEN',
'random' : 'RANDOM',
'chorus' : 'CHORUS',
'while' : 'WHILE',
}
# List of token names. This is always required
tokens = [
'LPAR',
'RPAR',
'LCOR',
'RCOR',
'LLLAVE',
'RLLAVE',
'PLUS',
'MINUS',
'TIMES',
'DIV',
'MENOR',
'MAYOR',
# 'DIF',
'EQ',
'COMA',
# 'PTO',
'DP',
'PYC',
'CTENOTA',
'CTEI',
'CTESTRING',
'ID',
'TUNE',
'MAYOREQ',
'MENOREQ',
'EQS',
] + list(reserved.values())
# Regular expression rules for simple tokens
t_LPAR = r'\('
t_RPAR = r'\)'
t_LCOR = r'\['
t_RCOR = r'\]'
t_LLLAVE = r'\{'
t_RLLAVE = r'\}'
t_PLUS = r'\+'
t_MINUS = r'\-'
t_TIMES = r'\*'
t_DIV = r'/'
t_MENOR = r'\<'
t_MAYOR = r'\>'
#t_DIF = r'\!'
t_EQ = r'\='
t_COMA = r'\,'
#t_PTO = r'\.'
t_DP = r'\:'
t_PYC = r'\;'
t_TUNE = r'\^'
t_MAYOREQ = r'\>='
t_MENOREQ = r'\<='
t_EQS = r'\=='
# A regular expression rule with some action code
def t_CTENOTA(t):
r'[0-9][A-G][0-5]|[$]'
t.type = reserved.get(t.value, 'CTENOTA')
return t
def t_CTEI(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print "Linea %d: El numero %s esta muy grande!!" % (t.lineno,t.value)
t.value = 0
return t
def t_ID(t):
r'_[a-zA-Z_][a-zA-Z0-9_]*'
t.type = reserved.get(t.value,'ID')
return t
def t_CTESTRING(t):
r'[a-zA-Z0-9_.]+'
t.type = reserved.get(t.value, 'CTESTRING')
return t
# Define a rule so we can track line numbers
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# Error handling rule
def t_error(t):
print "Illegal character '%s'" % t.value[0]
t.lexer.skip(1)
import ply.lex as lex
lexer = lex.lex()
lexer.input('medly')
while True:
tok = lexer.token()
if not tok: break
print tok
这是解析器:
# Yacc example
#from compiler import ast
import ply.yacc as yacc
import decimal
from compiler import ast
# Get the token map from the lexer. This is required.
from Lexico import tokens
# Helper function
def Assign(left, right):
names = []
if isinstance(left, ast.Name):
# Single assignment on left
return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
elif isinstance(left, ast.Tuple):
# List of things - make sure they are Name nodes
names = []
for child in left.getChildren():
if not isinstance(child, ast.Name):
raise SyntaxError("that assignment not supported")
names.append(child.name)
ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
return ast.Assign([ast.AssTuple(ass_list)], right)
else:
raise SyntaxError("Can't do that yet")
def p_programa(p):
'''programa : MEDLY DP chorus solo
| MEDLY DP solo'''
if len(p) == 5:
p[0] = p[3] + p[4]
if len(p) == 4:
p[0] = p[3]
def p_solo(p):
'''solo : MAIN ID notas'''
p[0] = p[3]
def p_chorus(p):
'chorus : CHORUS ID param LLLAVE notas RLLAVE'
p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5])
def p_param(p):
'''param : LPAR RPAR
| LPAR n2 RPAR'''
if len(p) == 3:
p[0] = []
else:
p[0] = p[2]
def p_n2(p):
''' n2 : CTENOTA
| n2 COMA CTENOTA '''
if len(p) == 4:
p[0] = p[1] + p[3]
else:
p[0] =[p[1]]
def p_notas(p):
'''notas : notas estatuto
| estatuto'''
if len(p) == 3:
p[0] = ast.Stmt(p[2])
else:
p[0] = ast.Stmt(p[1])
def p_if(p):
'if : IF condicion DP notas'
p[0] = ast.If([(p[2], p[4])], None)
def p_asignacion(p):
'''asignacion : ID EQ lista PYC
| ID EQ music PYC '''
p[0] = p[3]
def p_lista(p):
'lista : LCOR n2 RCOR'
p[0] = p[2]
def p_estatuto(p):
'''estatuto : asignacion
| condicion
| play
| repeat
| augment
| swap
| tune '''
p[0] = p[1]
def p_while(p):
'while : WHILE LPAR expresion RPAR notas'
p[0] = ast.While(p[4], p[6], None)
def p_len(p):
'len : LEN LPAR music RPAR PYC'
p[0] = p[3]
def p_print(p):
'''print : PRINT LPAR DP CTESTRING RPAR PYC
| PRINT LPAR music2 RPAR PYC '''
if len(p) == 6:
p[0] = p[3]
def p_music2(p):
'''music2 : music3
| music3 COMA music2 '''
if len(p) == 3:
p[0] = p[1] + p[3]
else:
p[0] = p[1]
def p_music3(p):
'music3 : music'
p[0] = p[1]
def p_condicion(p):
"condicion : comparacion"
p[0] = p[1]
def comparaMenor((left, right)):
return ast.Compare(left, [('<', right),])
def comparaMayor((left, right)):
return ast.Compare(left, [('>', right),])
def comparaIgual((left, right)):
return ast.Compare(left, [('==', right),])
binary_ops = {
"+": ast.Add,
"-": ast.Sub,
"*": ast.Mul,
"/": ast.Div,
"<": comparaMenor,
">": comparaMayor,
"==": comparaIgual,
}
unary_ops = {
"+": ast.UnaryAdd,
"-": ast.UnarySub,
}
precedence = (
("left", "EQ", "MAYOR", "MENOR"),
("left", "PLUS", "MINUS"),
("left", "TIMES", "DIV"),
)
def p_comparacion(p):
"""comparacion : comparacion PLUS comparacion
| comparacion MINUS comparacion
| comparacion TIMES comparacion
| comparacion DIV comparacion
| comparacion MENOR comparacion
| comparacion EQ comparacion
| comparacion MAYOR comparacion
| PLUS comparacion
| MINUS comparacion
| CTEI"""
if len(p) == 4:
p[0] = binary_ops[p[2]]((p[1], p[3]))
elif len(p) == 3:
p[0] = unary_ops[p[1]](p[2])
else:
p[0] = p[1]
def p_tune(p):
'''tune : music TUNE CTENOTA PYC'''
p[0] = p[1]
def p_swap(p):
'swap : SWAP LPAR music COMA CTENOTA DIV CTENOTA RPAR PYC'
p[0] = p[3]
def p_expresion(p):
'''expresion : music MENOR music
| music MAYOR music
| music EQ music
| music MAYOREQ music
| music MENOREQ music
| music EQS music'''
p[0] = p[1] + p[3]
def p_music(p):
'''music : lista
| ID
| CTENOTA
| CTEI '''
p[0] = p[1]
def p_repeat(p):
'repeat : REPEAT LPAR music2 DP CTEI DP TYPE LPAR PYC '
p[0] = p[3]
def p_play(p):
'''play : PLAY LPAR music2 DP LIRA LPAR PYC
| PLAY LPAR music2 DP KEY LPAR PYC
| PLAY LPAR music2 DP WIND LPAR PYC'''
p[0] = p[3]
def p_random(p):
'random : RANDOM LPAR CTEI RPAR PYC'
def p_augument(p):
'augment : AUG LPAR music RPAR CTENOTA COMA CTEI TIMES CTEI RPAR PYC'
p[0] = p[3]
def p_error(p):
print "Syntax error in input!"
# Build the parser
parser = yacc.yacc()
while True:
try:
s = raw_input('medly > ')
except EOFError:
break
if not s: continue
result = parser.parse(s)
我的问题是:
解析器定义的注释部分是否重要?
def p_param(p):
'''param : LPAR RPAR <------ this part
| LPAR n2 RPAR'''
if len(p) == 3:
p[0] = []
else:
p[0] = p[2]
答案 0 :(得分:1)
评论中已经说过,但问题的答案恰到好处:
是的,文档字符串很重要。 Ply读取并解析文档字符串以获取语法规则(或者定义为函数的标记的正则表达式),它需要构建解析器。
另见documentation,尤其是这部分:
每个语法规则都由Python函数定义,其中该函数的docstring包含适当的无上下文语法规范。