我正在使用PLY通过带有for构造的C程序进行解析。我的问题是我的程序似乎只接受某些形式/格的for循环,尽管我写了一些程序似乎失败的推导程序,但其他程序却出现了语法错误。
代码:
import ply.lex as lex
import ply.yacc as yacc
tokens = ['HASH','HEADER_FILE','FLOW_OPEN','FLOW_CLOSE','SEMI_COLON',
'TYPE','SMALL_OPEN','SMALL_CLOSE','IDENTIFIER','COLON',
'ASSIGNMENT_OP','UNARY_OP','BINARY_OP','LITERAL','LOGICAL_OP']
reserved = {'include' : 'INCLUDE', 'main': 'MAIN','int':'INT','void':'VOID','for':'FOR','switch':'SWITCH','case':'CASE','default':'DEFAULT','break':'BREAK'}
tokens += reserved.values()
t_HASH = r'\#'
#t_INCLUDE = r'include'
t_HEADER_FILE = r'<stdio.h>'
#t_MAIN = r'main'
t_FLOW_OPEN = r'{'
t_FLOW_CLOSE = r'}'
t_SMALL_OPEN = r'\('
t_SMALL_CLOSE = r'\)'
t_SEMI_COLON = r';'
t_ASSIGNMENT_OP = r'\='
#t_IDENTIFIER = r'[a-zA-Z][a-zA-Z]*'
t_UNARY_OP = r'\++|\--'
#t_BINARY_OP = r'[\+\-\*\/]'
t_LITERAL = r'[0-9][0-9]*'
t_LOGICAL_OP = r'\<|\>'
t_COLON: r':'
def t_check_reserved(t):
r'[a-zA-Z][a-zA-Z]*'
if t.value in reserved:
t.type = reserved[t.value]
else:
t.type = 'IDENTIFIER'
return t
def t_error(token):
print(f'Illegal character: {token.value}')
def t_whitespace(t):
r'\s+'
pass
def t_COMMENT(t):
r'(\/\/\/.*)|(\/\/\!.*)|(\/\/.*)|(\/\*[.\n]*.*\*\/)'
pass
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
lexer = lex.lex()
#Building the parser
def p_expression_start(p):
'expression : header body'
print('Derivation complete!')
def p_header(p):
'header : HASH INCLUDE HEADER_FILE'
print('Deriving rule header')
def p_body(p):
'body : main rest'
print('Deriving rule body')
def p_main(p):
'main : type MAIN SMALL_OPEN SMALL_CLOSE'
print('Deriving rule main')
def p_type(p):
'''type : INT
| VOID
'''
def p_rest(p):
'rest : FLOW_OPEN st FLOW_CLOSE'
print('Deriving rule rest')
def p_st(p):
'''
st : IDENTIFIER SEMI_COLON st
| type IDENTIFIER SEMI_COLON st
| for
| end
'''
print('Deriving rule st')
def p_for(p):
'''
for : FOR SMALL_OPEN declarative SEMI_COLON num_expression SEMI_COLON update_expression SMALL_CLOSE FLOW_OPEN st FLOW_CLOSE
'''
print('Deriving rule for')
def p_declarative(p):
'''
declarative : IDENTIFIER ASSIGNMENT_OP IDENTIFIER
| IDENTIFIER ASSIGNMENT_OP LITERAL
'''
def p_num_expression(p):
'''
num_expression : LITERAL
| IDENTIFIER
| conditional_expression
'''
def p_conditional_expression(p):
'''
conditional_expression : IDENTIFIER LOGICAL_OP LITERAL
| IDENTIFIER LOGICAL_OP IDENTIFIER
| empty
'''
def p_update_expression(p):
'''
update_expression : IDENTIFIER UNARY_OP
| empty
'''
def p_end(p): #Empty production
'''end :'''
print('Deriving rule end')
def p_error(p):
if p == None:
token = "end of file"
else:
token = f"{p.type}({p.value}) on line {p.lineno}"
print(f"Syntax error: Unexpected {token}")
parser = yacc.yacc(method='LALR',debug=True)
with open(r'forparsing.txt','r') as file:
'''while True:
try:
line = next(file)
print('Parsing')
parser.parse(line)
except:
print('Finished')
break
'''
content = file.read()
parser.parse(content)
该程序失败的一些情况是: 1.当我在num_exp部分中给出标识符或文字时 for循环(即for(init_exp; num_exp; update_exp)) 即使我在语法规则中明确提到了这些情况:
def p_num_expression(p):
'''
num_expression : LITERAL
| IDENTIFIER
| conditional_expression
'''
此外,C语言程序仅在conditional_expression部分的形式为a> 10(IDENTIFER LOGICAL_OP IDENTIFIER)时才起作用,即使该非终止符的语法规则为:
def p_conditional_expression(p):
'''
conditional_expression : IDENTIFIER LOGICAL_OP LITERAL
| IDENTIFIER LOGICAL_OP IDENTIFIER
| empty
'''
因此,这是3种可能的推导中仅有的一种被接受。使用其他两种形式之一给我带来语法错误。
编辑:这是我收到语法错误的输入之一:
# include <stdio.h>
void main()
{
for(a=15;1;a++)
{
}
}
这不应该失败,因为我对非终端num_expression的推导之一是num_expression-> LITERAL。但是,如果我用一个条件表达式替换一个,例如a> 10,则推导完成。
我编写的语法是否存在根本性的错误,导致语法分析器似乎未考虑某些规则?