关于ply的语法,评论部分是否重要?

时间:2014-05-01 22:51:52

标签: parsing python-2.7 compiler-construction ply

我正在开发一个名为medly的编译器,我正在使用ply,一个像lex & yacc一样工作的工具,但我的语法有些问题。

这是词法分析员:

# ------------------------------------------------------------
# Lexico.py
# ------------------------------------------------------------
import ply.lex as lex
reserved = {
    'medly' : 'MEDLY',
    'main' : 'MAIN',
    'print' : 'PRINT',
    'if' : 'IF',
#   'else' : 'ELSE',
#   'int' : 'INT',
#   'float' : 'FLOAT',
#   'var' : 'VAR',
    'play' : 'PLAY',
    'repeat' : 'REPEAT',
    'swap' : 'SWAP',
    'aug' : 'AUG',
#   'true' : 'TRUE',
#   'false' : 'FALSE',
    'wind' : 'WIND',
    'lira' : 'LIRA',
    'key' : 'KEY',
    'type' : 'TYPE',
    'len' : 'LEN',
    'random' : 'RANDOM',
    'chorus' : 'CHORUS',
    'while' : 'WHILE',
}
# List of token names.   This is always required
tokens = [
    'LPAR',
    'RPAR',
    'LCOR',
    'RCOR',
    'LLLAVE',
    'RLLAVE', 
    'PLUS',
    'MINUS',
    'TIMES',
    'DIV',
    'MENOR',
    'MAYOR',
#   'DIF',
    'EQ',
    'COMA',
#   'PTO',
    'DP',
    'PYC',
    'CTENOTA',
    'CTEI',
    'CTESTRING',
    'ID',
    'TUNE',
    'MAYOREQ',
    'MENOREQ',
    'EQS',

] + list(reserved.values())     


# Regular expression rules for simple tokens
t_LPAR      = r'\('
t_RPAR      = r'\)'
t_LCOR      = r'\['
t_RCOR      = r'\]'
t_LLLAVE    = r'\{'
t_RLLAVE    = r'\}'
t_PLUS      = r'\+'
t_MINUS     = r'\-'
t_TIMES     = r'\*'
t_DIV       = r'/'
t_MENOR     = r'\<'
t_MAYOR     = r'\>'
#t_DIF      = r'\!'
t_EQ        = r'\='
t_COMA      = r'\,'
#t_PTO      = r'\.'
t_DP        = r'\:'
t_PYC       = r'\;'
t_TUNE      = r'\^'
t_MAYOREQ   = r'\>='
t_MENOREQ   = r'\<='
t_EQS       = r'\=='

# A regular expression rule with some action code

def t_CTENOTA(t):
    r'[0-9][A-G][0-5]|[$]'
    t.type = reserved.get(t.value, 'CTENOTA')
    return t

def t_CTEI(t):
    r'\d+'
    try:
        t.value = int(t.value)    
    except ValueError:
        print "Linea %d: El numero %s esta muy grande!!" % (t.lineno,t.value)
        t.value = 0
    return t


def t_ID(t):
    r'_[a-zA-Z_][a-zA-Z0-9_]*'
    t.type = reserved.get(t.value,'ID')
    return t   

def t_CTESTRING(t):
  r'[a-zA-Z0-9_.]+'
  t.type = reserved.get(t.value, 'CTESTRING')
  return t

# Define a rule so we can track line numbers
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

# A string containing ignored characters (spaces and tabs)
t_ignore  = ' \t'

# Error handling rule
def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    t.lexer.skip(1)

import ply.lex as lex
lexer = lex.lex()
lexer.input('medly')
while True:
    tok = lexer.token()
    if not tok: break
    print tok

这是解析器:

# Yacc example
#from compiler import ast
import ply.yacc as yacc
import decimal
from compiler import ast

# Get the token map from the lexer.  This is required.
from Lexico import tokens

# Helper function
def Assign(left, right):
    names = []
    if isinstance(left, ast.Name):
        # Single assignment on left
        return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
    elif isinstance(left, ast.Tuple):
        # List of things - make sure they are Name nodes
        names = []
        for child in left.getChildren():
            if not isinstance(child, ast.Name):
                raise SyntaxError("that assignment not supported")
            names.append(child.name)
        ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
        return ast.Assign([ast.AssTuple(ass_list)], right)
    else:
        raise SyntaxError("Can't do that yet")



def p_programa(p):
    '''programa : MEDLY DP chorus solo
                | MEDLY DP solo'''
    if len(p) == 5:
        p[0] = p[3] + p[4]
    if len(p) == 4:
        p[0] = p[3]

def p_solo(p):
    '''solo : MAIN ID notas'''
    p[0] = p[3]  


def p_chorus(p): 
    'chorus : CHORUS ID param LLLAVE notas RLLAVE'
    p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5])

def p_param(p):
    '''param : LPAR  RPAR
             | LPAR n2 RPAR'''
    if len(p) == 3:
        p[0] = []
    else:
        p[0] = p[2]

def p_n2(p):
    ''' n2 : CTENOTA
           | n2 COMA CTENOTA '''
    if len(p) == 4:
        p[0] = p[1] + p[3]
    else:
        p[0] =[p[1]]

def p_notas(p):
    '''notas : notas estatuto
             | estatuto'''
    if len(p) == 3:
        p[0] = ast.Stmt(p[2])
    else:
        p[0] = ast.Stmt(p[1])

def p_if(p):
    'if : IF condicion DP notas'
    p[0] = ast.If([(p[2], p[4])], None)

def p_asignacion(p):
    '''asignacion : ID EQ lista PYC
                  | ID EQ music PYC '''
    p[0] = p[3]

def p_lista(p):
    'lista : LCOR n2 RCOR'
    p[0] = p[2]

def p_estatuto(p):
    '''estatuto : asignacion
                | condicion
                | play
                | repeat
                | augment
                | swap
                | tune '''
    p[0] = p[1]

def p_while(p):
    'while : WHILE LPAR expresion RPAR notas'
    p[0] = ast.While(p[4], p[6], None)

def p_len(p):
    'len : LEN LPAR music RPAR PYC'
    p[0] = p[3]

def p_print(p):
    '''print : PRINT LPAR DP CTESTRING RPAR PYC
             | PRINT LPAR music2 RPAR PYC '''
    if len(p) == 6:
        p[0] = p[3]

def p_music2(p):
    '''music2 : music3
              | music3 COMA music2 '''
    if len(p) == 3:
        p[0] = p[1] + p[3]
    else:
        p[0] = p[1]

def p_music3(p):
    'music3 : music'
    p[0] = p[1]

def p_condicion(p):
    "condicion : comparacion"
    p[0] = p[1]

def comparaMenor((left, right)):
    return ast.Compare(left, [('<', right),])
def comparaMayor((left, right)):
    return ast.Compare(left, [('>', right),])
def comparaIgual((left, right)):
    return ast.Compare(left, [('==', right),])  

binary_ops = {
    "+": ast.Add,
    "-": ast.Sub,
    "*": ast.Mul,
    "/": ast.Div,
    "<": comparaMenor,
    ">": comparaMayor,
    "==": comparaIgual,
}
unary_ops = {
    "+": ast.UnaryAdd,
    "-": ast.UnarySub,
    }
precedence = (
    ("left", "EQ", "MAYOR", "MENOR"),
    ("left", "PLUS", "MINUS"),
    ("left", "TIMES", "DIV"),
    )


def p_comparacion(p):
    """comparacion : comparacion PLUS comparacion
                   | comparacion MINUS comparacion
                   | comparacion TIMES comparacion
                   | comparacion DIV comparacion
                   | comparacion MENOR comparacion
                   | comparacion EQ comparacion
                   | comparacion MAYOR comparacion
                   | PLUS comparacion
                   | MINUS comparacion
                   | CTEI"""

    if len(p) == 4:
        p[0] = binary_ops[p[2]]((p[1], p[3]))

    elif len(p) == 3:
        p[0] = unary_ops[p[1]](p[2])
    else:
        p[0] = p[1] 



def p_tune(p):
    '''tune : music TUNE CTENOTA PYC'''
    p[0] = p[1]

def p_swap(p):
    'swap : SWAP LPAR music COMA CTENOTA DIV CTENOTA RPAR PYC'
    p[0] = p[3]

def p_expresion(p):
    '''expresion : music MENOR music
                 | music MAYOR music
                 | music EQ music 
                 | music MAYOREQ music
                 | music MENOREQ music
                 | music EQS music'''
    p[0] = p[1] + p[3]  

def p_music(p):
    '''music : lista
             | ID
             | CTENOTA
             | CTEI '''
    p[0] = p[1]

def p_repeat(p):
    'repeat : REPEAT LPAR music2 DP CTEI DP TYPE LPAR PYC '
    p[0] = p[3] 


def p_play(p):
    '''play : PLAY LPAR music2 DP LIRA LPAR PYC 
            | PLAY LPAR music2 DP KEY LPAR PYC
            | PLAY LPAR music2 DP WIND LPAR PYC'''
    p[0] = p[3]

def p_random(p):
    'random : RANDOM LPAR CTEI RPAR PYC'

def p_augument(p):
    'augment : AUG LPAR music RPAR CTENOTA COMA CTEI TIMES CTEI RPAR PYC'
    p[0] = p[3]

def p_error(p):
    print "Syntax error in input!"

# Build the parser
parser = yacc.yacc()

while True:

    try:
        s = raw_input('medly > ')
    except EOFError:
        break
    if not s: continue
    result = parser.parse(s)

我的问题是:

解析器定义的注释部分是否重要?

def p_param(p):   

    '''param : LPAR  RPAR                       <------ this part
         | LPAR n2 RPAR'''
    if len(p) == 3:
        p[0] = []
    else:
        p[0] = p[2]

1 个答案:

答案 0 :(得分:1)

评论中已经说过,但问题的答案恰到好处:

是的,文档字符串很重要。 Ply读取并解析文档字符串以获取语法规则(或者定义为函数的标记的正则表达式),它需要构建解析器。

另见documentation,尤其是这部分:

  

每个语法规则都由Python函数定义,其中该函数的docstring包含适当的无上下文语法规范。