我尝试使用LEX和YACC创建一个语法分析器,它与C ++非常相似...... 这部分是LEX部分:
%{
#include <stdio.h>
#include "y.tab.h"
%}
%x comment
%x comment_line
%%
"//" {BEGIN comment_line;}
<comment_line>\n {BEGIN 0;}
<comment_line>. ;
"/*" {BEGIN comment;}
<comment>"*/" {BEGIN 0;}
<comment>. ;
"bool"|"const"|"int"|"float"|"char"|"string" {return TIP;}
"void" {return TIP_VOID;}
"class" {return TIP_CLASS;}
\'.\' {return VAL_CHAR;}
\".*\" {return VAL_STRING;}
"start_program" {return BGIN;}
"end_program" {return END;}
"start_func" {return START_FUNC;}
"end_func" {return END_FUNC;}
"start_class" {return START_CLASS;}
"end_class" {return END_CLASS;}
"if" {return IF;}
"then" {return THEN;}
"end_then" {return END_THEN;}
"else" {return ELSE;}
"end_else" {return END_ELSE;}
"while" {return WHILE;}
"do" {return DO;};
"end_while" {return END_WHILE;}
"for" {return FOR;}
"end_for" {return END_FOR;}
"return" {return RETURN;}
"true"|"false" {return VAL_BOOL;}
"and"|"or" {return OPERATOR_BOOL;}
"<"|">"|"<="|"=>" {return OPERATOR_COMPARISON;}
"=="|"!=" {return OPERATOR_INEQUALITY;}
"<-" {return ASSIGN;}
[_a-zA-Z][_a-zA-Z0-9]* {return ID;}
0|[1-9][0-9]* {return NR_INT_U;}
-?0|([1-9][0-9]*) {return NR_INT;}
-?(0|[1-9][0-9]*)","[0-9]* {return NR_FLOAT;}
[ \t] ;
\n {yylineno++;}
. { return yytext[0];}
虽然这部分是YACC部分:
%{
#include <stdio.h>
extern FILE* yyin;
extern char* yytext;
extern int yylineno;
%}
%token ID TIP BGIN END ASSIGN NR NR_FLOAT NR_INT NR_INT_U TIP_VOID START_FUNC END_FUNC RETURN TIP_CLASS START_CLASS END_CLASS OPERATOR_COMPARISON OPERATOR_INEQUALITY VAL_BOOL IF THEN END_THEN ELSE END_ELSE DO WHILE END_WHILE FOR END_FOR VAL_CHAR VAL_STRING OPERATOR_BOOL
%start progr
%%
progr: declarations bloc {printf("Syntacticaly correct program\n");}
;
declarations : declaration ';'
| declarations declaration ';'
;
declaration : variable_declaration
| function body_function
| class_declaration
;
class_declaration : TIP_CLASS ID START_CLASS declarations END_CLASS
;
variable_declaration : TIP ID
| TIP ID ASSIGN value_asign
| TIP ID vector_declaration
| TIP_CLASS ID ID
;
value_asign: NR_INT_U
| VAL_BOOL
| NR_INT
| NR_FLOAT
| VAL_CHAR
| VAL_STRING
| ID '(' list_call ')'
| ID '.' ID '(' list_call ')'
;
/*functions declaration */
function : TIP ID '(' parameter_list ')'
| TIP ID '(' ')'
| TIP_VOID ID '(' parameter_list ')'
| TIP_VOID ID '(' ')'
;
body_function: START_FUNC list_function END_FUNC
;
list_function : RETURN expression ';'
| list_function RETURN expression ';'
| statement ';'
| list_function statement ';'
| bloc_logic_function
| list bloc_logic_function
;
bloc_logic_function: IF expression_logic
THEN list_function END_THEN
ELSE list_function END_ELSE
| WHILE expression_logic
list_function
END_WHILE
| FOR statement WHILE expression_logic DO statement
list_function
END_FOR
;
/*declaration of multidimensional arrays*/
vector_declaration : '[' NR_INT_U ']'
| vector_declaration '[' NR_INT_U ']'
;
vector : '[' expression ']'
| vector '[' expression ']'
;
parameter_list : param
| parameter_list ',' param
;
param : TIP ID
;
/* bloc */
bloc : BGIN list END
;
/* list of instructions */
list : bloc_logic
| list bloc_logic
| statement ';'
| list statement ';'
;
bloc_logic: IF expression_logic
THEN list END_THEN
ELSE list END_ELSE
| WHILE expression_logic
list
END_WHILE
| FOR statement WHILE expression_logic DO statement
list
END_FOR
;
/* instruction */
statement: variable_declaration
| ID ASSIGN ID
| ID ASSIGN NR_INT_U
| ID ASSIGN NR_INT
| ID ASSIGN NR_FLOAT
| ID ASSIGN VAL_BOOL
| ID ASSIGN expression
| ID vector ASSIGN expression
| ID '(' list_call ')'
| ID '.' ID '(' list_call ')'
;
expression: simbol
| expression '+' expression
| '('expression '+' expression ')'
| expression '-' expression
| '('expression '-' expression ')'
| expression '*' expression
| '('expression '*' expression ')'
| expression ':' expression
| '('expression ':' expression ')'
;
simbol: ID
| '-' ID
| '-' ID vector
| ID vector
| NR_INT_U
| NR_INT
| NR_FLOAT
| ID '(' list_call ')'
| ID '.' ID '(' list_call ')'
;
expression_logic : logical_condition
| '(' logical_condition ')'
| expression_logic OPERATOR_BOOL expression_logic
| '(' expression_logic OPERATOR_BOOL expression_logic ')'
;
logical_condition : VAL_BOOL OPERATOR_INEQUALITY VAL_BOOL
| VAL_BOOL OPERATOR_INEQUALITY expression
| expression OPERATOR_COMPARISON expression
| expression OPERATOR_INEQUALITY expression
| expression OPERATOR_INEQUALITY VAL_BOOL
;
list_call : expression
| list_call ',' expression
| VAL_STRING
| list_call ',' VAL_STRING
| VAL_CHAR
| list_call ',' VAL_CHAR
| VAL_BOOL
| list_call ',' VAL_BOOL
| /*empty*/
;
%%
int yyerror(char * s){
printf("Error: %s at line:%d\n",s,yylineno);
}
int main(int argc, char** argv){
yyin=fopen(argv[1],"r");
yyparse();
}
这是我尝试运行它的例子:
int factorial(int n)
start_func
if n==0
then
return 1;
end_then
else
return n*factorial(n-1);
end_else
end_func;
class book
start_class
int code;
string name;
float price;
void setPrice(float x)
start_func
price <- x;
end_func;
string getNume()
start_func
return name;
end_func;
end_class;
start_program
class book python;
float y<-1,37;
string auxiliar;
python.setPrice(y);
auxiliar<-getNume();
int v[50];
int j<-0;
for i<-0 while i<5 do i<-i+1
v[i]<-j+1;
end_for
end_program
我正在尝试使用这些说明在Putty上运行它:
lex tema.l
yacc -d tema.y 2>/dev/null
gcc y.tab.c lex.yy.c -ll
./a.out <prog2.txt
我似乎无法找到问题...我已经搜索了每一行代码,但是我在屏幕上显示的只是我在第一行显示错误,这是没有意义的,因为我可以'看到任何错误......也许我在这里失明了......你怎么看?
答案 0 :(得分:0)
显然你在第一行有回车或换文或其他一些奇怪的字符。您的空白和行尾规则过于严格。尝试
[ \t\f]+ ;
"\r\n"|"\n" { yylineno++; }
第一行将换页作为空格处理,添加+
使得效率更高。第二行将\r\n
和\n
作为行终止符处理。
此外,你需要解决所有这些问题:
"bool"|"const"|"int"|"float"|"char"|"string" {return TIP;}
"true"|"false" {return VAL_BOOL;}
"and"|"or" {return OPERATOR_BOOL;}
"<"|">"|"<="|"=>" {return OPERATOR_COMPARISON;}
"=="|"!=" {return OPERATOR_INEQUALITY;}
不要&#39;优化&#39;像这样。您在这里丢失了信息,您将无法生成正确的代码。此外,通过合并and
和or
,您已经拒绝了自己有机会获得正确的优先权。让解析器完成解析器的工作。这样做,适当调整你的语法和%token
声明:
"bool" {return TIP_BOOL;}
"const" {return TIP_CONST;}
"int" {return TIP_INT;}
"float" {return TIP_FLOAT;}
"char" {return TIP_CHAR;}
"string" {return TIP_STRING;}
"true" {return VAL_TRUE;}
"false" {return VAL_FALSE;}
"and" { return OPERATOR_AND; }
"or" {return OPERATOR_OR;}
"<=" { return OPERATOR_LE; }
"=>" {return OPERATOR_GE;}
"==" {return OPERATOR_EQUALITY;}
"!=" {return OPERATOR_INEQUALITY;}
并注意,您根本不必处理<
,>
,=
,+
等等:它们由
. { return yytext[0];}
因此您可以直接在语法中使用'<'
,'>'
,'='
等。