我正在尝试为编译器生成中间代码。我在macOS上使用flex-bison。 当我尝试输入时,出现语法错误,并且我不知道该语法错误来自何处。
这是我的词法:
%{
#include<stdbool.h>
#include <stdlib.h>
#include <string.h>
//#include "LinkedList.h"
#include "parser.tab.h"
char* tempString;
%}
%option yylineno
whole 0|[1-9][0-9]*
real {whole}\.(([0-9])|([0-9][0-9]*[1-9]))
divisionop [/]
noSymbol [^ \t\n\/\+\-\*\:\;\,\(\)\.]
error {noSymbol}+
%%
[ \t\n\r]+ ;
\#[-+]?{whole}/[\ :\,\;\+\-\/\*\)\n\r]|".GT."|".GE."|".EQ."|".NE."|".LT."|".LE."|":=" {
char* string = (char*) malloc(strlen(yytext));
strcpy(string, yytext + 1);
string[strlen(string)] = 0;
//printf("Found integer : %d\n", atoi(string));
//push(string,INTEGER,yylineno);
return INTEGER;}
\#[-+]?{real}/[\ :\,\;\+\-\/\*\)\n\r]|".GT."|".GE."|".EQ."|".NE."|".LT."|".LE."|":=" {
char* string = (char*) malloc(strlen(yytext));
strcpy(string, yytext + 1);
string[strlen(string)] = 0;
//printf("Found real:%s\n",string);
//push(string,REAL,0);
return REALNUM;
}
[a-zA-Z][0-9][0-9a-zA-Z]* {
//printf("Found identifier:%s\n",yytext);
//yylval.str = yytext;
//printf("****lvalue is:%s\n",yylval.str);
//push(yytext,IDENTIFIER,yylineno);
tempString = (char*) malloc(strlen(yytext));
strcpy(tempString, yytext);
return IDENTIFIER;
}
Program {
//printf("Found program :%s\n",yytext);
return PROGRAM;
}
Int {
//printf("Found int :%s\n",yytext);
return INT;
}
Real {
//printf("Found real :%s\n",yytext);
return REAL;
}
Bool {
//printf("Found bool :%s\n",yytext);
return BOOL;
}
Procedure {
//printf("Found real :%s\n",yytext);
return PROCEDURE;
}
Function {
//printf("Found function :%s\n",yytext);
return FUNCTION;
}
Begin {
//printf("Found begin :%s\n",yytext);
return BEGINN;
}
End {
//printf("Found end :%s\n",yytext);
return END;
}
If {
//printf("Found if :%s\n",yytext);
return IF;
}
Then {
//printf("Found then :%s\n",yytext);
return THEN;
}
Else {
//printf("Found else :%s\n",yytext);
return ELSE;
}
While {
//printf("Found while :%s\n",yytext);
return WHILE;
}
Do {
//printf("Found do :%s\n",yytext);
return DO;
}
For {
//printf("Found for :%s\n",yytext);
return FOR;
}
To {
//printf("Found to :%s\n",yytext);
return TO;
}
Downto {
//printf("Found downto :%s\n",yytext);
return DOWNTO;
}
Case {
//printf("Found case :%s\n",yytext);
return CASE;
}
Return {
//printf("Found return :%s\n",yytext);
return RETURN;
}
"And Then" {
//printf("Found andthen :%s\n",yytext);
//push(yytext,ANDTHEN,yylineno);
return ANDTHEN;
}
"Or Else" {
//printf("Found andthen :%s\n",yytext);
//push(yytext,ORELSE,yylineno);
return ORELSE;
}
"+" {
//printf("Found plus :%s\n",yytext);
return PLUS;
}
\- {
//printf("Found minus :%s\n",yytext);
return MINUS;
}
\* {
//printf("Found multiply :%s\n",yytext);
return MULTIPLY;
}
{divisionop} {
//printf("Found division :%s\n",yytext);
//push(yytext,DIVISION,yylineno);
return DIVISION;
}
".GT." {
//printf("Found greaterthan :%s\n",yytext);
return GREATERTHAN;
}
".GE." {
//printf("Found greaterequal :%s\n",yytext);
return GREATEREQUAL;
}
\.NE\. {
//printf("Found notequal :%s\n",yytext);
return NOTEQUAL;
}
\.EQ\. {
//printf("Found EQUAL :%s\n",yytext);
return EQUAL;
}
\.LT\. {
//printf("Found lessthan :%s\n",yytext);
return LESSTHAN;
}
\.LE\. {
//printf("Found lessequal :%s\n",yytext);
return LESSEQUAL;
}
[,] {
//printf("Found comma :%s\n",yytext);
return COMMA;
}
":=" {
//printf("Found declare :%s\n",yytext);
return DECLARE;
}
[:] {
//printf("Found semicolon :%s\n",yytext);
return COLON;
}
[;] {
//printf("Found semicolon :%s\n",yytext);
return SEMICOLON;
}
\( {
//printf("Found oppar :%s\n",yytext);
return OPPAR;
}
\) {
//printf("Found cppar :%s\n",yytext);
return CPAR;
}
False {
//printf("Found false :%s\n",yytext);
return FALSE;
}
True {
//printf("Found true :%s\n",yytext);
return TRUE;
}
{error} {
printf("Error on : **<< %s >>**\n", yytext);
//yymore();
}
\. {
printf("Error on : **<< illegal use of \" %s \" >>**\n", yytext);
//yymore();
}
%%
int yywrap(){
return 1;
}
//int main(){
// yyin = fopen("input2P.txt", "r");
// initLinkedList(table);
// while(yylex());
// printLinkedList();
// return 0;
//}
这是我的解析器。
%{
#include<stdio.h>
#include "stack.h"
//#include "LinkedList.h"
//#include "symbol_table.h"
int currentType;
char* returnType;
extern FILE* yyin;
extern char* tempString;
struct Stack* tblptrStack;
char* new_temp(char *c) {
string name("t");
name += to_string(num);
num++;
char *what = (char *) malloc(sizeof(char) * 100);
strcpy(what, name.c_str());
symbol_table_insert(what, c);
strcpy(what, symbol_table_lookup(what).id.c_str());
return what;
}
int yylex();
void yyerror(char* error);
struct SymbolTable* secondArg(struct SymbolTable* a,struct SymbolTable* b );
%}
%union{
char *str;
struct {
int quad;
int is_boolean;
char *place;
char *code;
char *type;
} eval;
}
}
%start program
%token INTEGER
%token ZERO
%token REALNUM
%token PROGRAM
%token INT
%token REAL
%token BOOL
%token PROCEDURE
%token FUNCTION
%token BEGINN
%token END
%token IF
%token THEN
%token ELSE
%token WHILE
%token DO
%token FOR
%token TO
%token DOWNTO
%token CASE
%token RETURN
%token ANDTHEN
%token ORELSE
%token IDENTIFIER
%token PLUS
%token MINUS
%token MULTIPLY
%token DIVISION
%token GREATERTHAN
%token GREATEREQUAL
%token NOTEQUAL
%token EQUAL
%token LESSTHAN
%token LESSEQUAL
%token COMMA
%token SEMICOLON
%token COLON
%token DECLARE
%token OPPAR
%token CPAR
%token FALSE
%token TRUE
%token ERROR
%type <eval> exp
%left COMMA
%left INT BOOL REAL
%left COLON
%left IF_PREC
%left ELSE
%left ANDTHEN ORELSE
%left PLUS MINUS
%left MULTIPLY DIVISION
%left GREATERTHAN GREATEREQUAL NOTEQUAL EQUAL LESSTHAN LESSEQUAL
%%
program:
PROGRAM IDENTIFIER M SEMICOLON declist block SEMICOLON {
printf("\nin program\n"); allSymbolTablePrint( pop(tblptrStack) );
}
| PROGRAM IDENTIFIER M SEMICOLON block SEMICOLON {
printf("\nin program\n"); allSymbolTablePrint( pop(tblptrStack) );
}
;
M:
{
struct SymbolTable* t = mkTable( NULL , "program");
push(t, tblptrStack);
}
;
declist:
dec
| declist dec
;
dec :
vardec {printf("var deccc");}
| procdec
| funcdec
;
type :
INT {
currentType=4;
returnType = (char*) malloc(strlen("INT"));
strcpy(returnType, "INT");
printf("this is int");
}
| REAL {
currentType=8;
returnType = (char*) malloc(strlen("REAL"));
strcpy(returnType, "REAL");
}
|BOOL {
currentType=1;
returnType = (char*) malloc(strlen("BOOL"));
strcpy(returnType, "BOOL");
}
;
iddec :
IDENTIFIER { enterVar( tempString, currentType , 0 , top(tblptrStack) ); printf("a variable entered:%s\n",tempString); }
| IDENTIFIER { enterVar( tempString, currentType , 0 , top(tblptrStack) ); printf("a variable entered:%s\n",tempString); } DECLARE {
printf("this is declare eeee");
} exp
;
idlist :
iddec {printf("this is idlist iddec");}
| idlist COMMA iddec
;
vardec :
type idlist SEMICOLON
{printf("vardec");}
;
procdec :
PROCEDURE IDENTIFIER NP OPPAR paramdecs CPAR declist block SEMICOLON {
char* tmpc = top(tblptrStack)->name;
struct SymbolTable* tmpt = pop(tblptrStack);
enterProcFunc( tmpc , 0 , "NULL" , tmpt , top(tblptrStack) );
}
| PROCEDURE IDENTIFIER NP OPPAR paramdecs CPAR block SEMICOLON {
char* tmpc = top(tblptrStack)->name;
struct SymbolTable* tmpt = pop(tblptrStack);
enterProcFunc( tmpc , 0 , "NULL" , tmpt , top(tblptrStack) );
}
;
NP:
{
printf("inside NP\t tempString:%s\n",tempString);
struct SymbolTable* t = mkTable( top(tblptrStack) , tempString );
push(t, tblptrStack);
}
;
funcdec :
FUNCTION IDENTIFIER FN OPPAR paramdecs CPAR COLON type {
char* tmpc = top(tblptrStack)->name;
struct SymbolTable* tmpt = pop(tblptrStack);
enterProcFunc(tmpc , -1 , returnType , tmpt , top(tblptrStack) );
} declist block SEMICOLON
| FUNCTION IDENTIFIER FN OPPAR paramdecs CPAR COLON type {
char* tmpc = top(tblptrStack)->name;
struct SymbolTable* tmpt = pop(tblptrStack);
enterProcFunc(tmpc , -1 , returnType , tmpt , top(tblptrStack) );
} block SEMICOLON
;
FN:
{
printf("inside FN\t tempString:%s\n",tempString);
struct SymbolTable* t = mkTable( top(tblptrStack) , tempString );
push(t, tblptrStack);
}
;
paramdecs :
paramdec { printf("paramdecs"); }
| paramdecs SEMICOLON paramdec { printf("paramdecs\n"); }
| { printf("paramdecs\n"); }
;
paramdec :
type paramlist
;
paramlist :
IDENTIFIER
| paramlist COMMA IDENTIFIER
;
block :
BEGINN stmtlist END
| stmt
;
stmtlist :
stmt
| stmtlist SEMICOLON stmt
;
lvalue :
IDENTIFIER
;
stmt :
lvalue DECLARE exp
| IF exp THEN block %prec IF_PREC
| IF exp THEN block ELSE block %prec ELSE
| WHILE exp DO block
| FOR lvalue DECLARE exp TO exp DO block
| FOR lvalue DECLARE exp DOWNTO exp DO block
| CASE exp caseelement END
| RETURN exp
| exp {printf(stmt exp);}
;
exp :
exp ANDTHEN exp
| exp ORELSE exp
| exp PLUS exp {
printf("Rule \t\t mathlogicExpression -> mathlogicExpression KW_PLUS mathlogicExpression\n");
$$.place = new_temp($1.type);
$$.type = $1.type;
printf($1.place, $3.place, "+", $$.place);
};
| exp MINUS exp
| exp MULTIPLY exp { printf(" RULE FOR MULTIPLY\n");}
| exp DIVISION exp
| OPPAR exp CPAR
| boolexp relop boolexp
| INTEGER {printf("this is integerrrr");}
| REALNUM
| TRUE
| FALSE
| lvalue
| IDENTIFIER OPPAR explist CPAR
;
boolexp:
OPPAR exp CPAR
| INTEGER
| REALNUM
| TRUE
| FALSE
| lvalue
| IDENTIFIER OPPAR explist CPAR
;
caseelement :
INTEGER COLON block SEMICOLON
| caseelement INTEGER COLON block SEMICOLON
;
explist :
exp
| explist COMMA exp
|
;
relop :
GREATERTHAN
| GREATEREQUAL
| NOTEQUAL
| EQUAL
| LESSTHAN
| LESSEQUAL
;
%%
struct SymbolTable* popTop(struct Stack* b ){
pop(b);
top(b);
}
int main(){
//printf("hi1");
tblptrStack=createStack();
tblptrStack->top = NULL;
//printf("hi2");
yyin = fopen("input3.txt", "r");
//printf("hi3");
yyparse();
return 0;
}
void yyerror(char* error){
printf("Error : %s\n", error);
}
例如,我尝试输入以下内容:
Program p1Main;
Int i1, i2:=#-23;
Begin
i1 := i1 + i2;
End
;
我收到此错误
a variable entered:i1
a variable entered:i2
Error : syntax error
logout
Saving session...
...copying shared history...
...saving history...truncating history files...
...completed.
我尝试放入printf
来找出此语法错误的来源,但未得到任何结果。我已经看到了其他类似问题,并尝试了解决方案,但没有成功。如果您知道该如何解决,请帮助我。
答案 0 :(得分:6)
您收到语法错误,因为您的输入程序在语法上不正确并且解析器正常工作。
(修整后的)语法将程序显示为:
program: PROGRAM IDENTIFIER M SEMICOLON declist block SEMICOLON
,它显示为一个块:
block :
BEGINN stmtlist END
| stmt
;
,随后的声明列表为:
stmtlist :
stmt
| stmtlist SEMICOLON stmt
;
在这里,您将看到SEMICOLON是语句分隔符而不是终止符。这意味着在结束之前永远不会有SEMICOLON。但是您这样做:
Begin
i1 := i1 + i2;
End
因此,您必须确定哪种语言才是正确的解释;您的语法或输入程序。初学者经常犯此错误,这需要经验丰富的程序员或老师才能发现几秒钟。你怎么能用野牛学到这个?我将在...后面添加该详细信息。
您怎么能自己找到这个?好吧,遵循rules of Stack Overflow将是一个好的开始。您尚未将代码示例减少到解释问题所需的最低限度。您只是将“整个问题”转交给您的问题,只是放弃了并将其移交给我们。
好学生如何简化这个问题。您可能要做的一件事就是简化您对解析器的测试输入,并尝试越来越简单的输入程序。例如,问自己“错误是否在声明或块中发生?”。 “如果我使用一个没有声明的简单程序会怎样?”使用这种形式的推论,您可以推断出故障位于块中的某个位置。您可能已经尝试过与BLOCK和语句列表中的不同规则匹配的示例,以查看哪些替代方法有效,哪些无效,最终找到导致问题的规则。
然后,您可以创建一个较小的语法,词法分析器和测试程序,以粘贴到Stack Overflow中,该程序解释了特定语法规则失败的特定问题,并在此时寻求帮助。但是,通常在创建较小的示例的过程中,问题和解决方案就会显示出来。
查找问题根源的一种更强大,更编程的方式是使用内置在bison中的调试功能。 Bison具有一种测试模式,该模式可以通过使用-t参数运行bison并在运行时设置yydebug变量来启用。这将生成详细的解析跟踪,将准确显示导致错误的输入符号和规则。我将在...之后的下一期中向您展示产生的结果。
好。现在,在您粘贴的代码出现一些问题之后,我得到了所需的输出。您已将无效代码错误地粘贴到问题中。在parser.y的第41行上还有一个额外的“}”,我不得不删除它,但我们没有您的stack.h。我必须删除所有语义操作才能显示语法调试。无论如何,这样做并启用了bison调试后,我们得到了以下内容(在Mac上):
briantompsett:~ cssbct$ gcc parser.tab.c lex.yy.c -ll -o parser
briantompsett:~ cssbct$ ./parser
Starting parse
Entering state 0
Reading a token: Program p1Main;
Int i1, i2:=#-23;
Begin
i1 := i1 + i2;
End
;Next token is token PROGRAM ()
Shifting token PROGRAM ()
Entering state 1
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 3
Reducing stack by rule 3 (line 108):
-> $$ = nterm M ()
Stack now 0 1 3
Entering state 5
Reading a token: Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 6
Reading a token: Next token is token INT ()
Shifting token INT ()
Entering state 9
Reducing stack by rule 9 (line 122):
$1 = token INT ()
-> $$ = nterm type ()
Stack now 0 1 3 5 6
Entering state 26
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 50
Reading a token: Next token is token COMMA ()
Reducing stack by rule 12 (line 133):
$1 = token IDENTIFIER ()
-> $$ = nterm iddec ()
Stack now 0 1 3 5 6 26
Entering state 51
Reducing stack by rule 15 (line 139):
$1 = nterm iddec ()
-> $$ = nterm idlist ()
Stack now 0 1 3 5 6 26
Entering state 52
Next token is token COMMA ()
Shifting token COMMA ()
Entering state 82
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 50
Reading a token: Next token is token DECLARE ()
Shifting token DECLARE ()
Entering state 81
Reducing stack by rule 13 (line 134):
-> $$ = nterm @1 ()
Stack now 0 1 3 5 6 26 52 82 50 81
Entering state 110
Reading a token: Next token is token INTEGER ()
Shifting token INTEGER ()
Entering state 7
Reading a token: Next token is token SEMICOLON ()
Reducing stack by rule 52 (line 206):
$1 = token INTEGER ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 26 52 82 50 81 110
Entering state 124
Next token is token SEMICOLON ()
Reducing stack by rule 14 (line 134):
$1 = token IDENTIFIER ()
$2 = token DECLARE ()
$3 = nterm @1 ()
$4 = nterm exp ()
-> $$ = nterm iddec ()
Stack now 0 1 3 5 6 26 52 82
Entering state 111
Reducing stack by rule 16 (line 140):
$1 = nterm idlist ()
$2 = token COMMA ()
$3 = nterm iddec ()
-> $$ = nterm idlist ()
Stack now 0 1 3 5 6 26
Entering state 52
Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 83
Reducing stack by rule 17 (line 143):
$1 = nterm type ()
$2 = nterm idlist ()
$3 = token SEMICOLON ()
-> $$ = nterm vardec ()
Stack now 0 1 3 5 6
Entering state 27
Reducing stack by rule 6 (line 117):
$1 = nterm vardec ()
-> $$ = nterm dec ()
Stack now 0 1 3 5 6
Entering state 25
Reducing stack by rule 4 (line 113):
$1 = nterm dec ()
-> $$ = nterm declist ()
Stack now 0 1 3 5 6
Entering state 24
Reading a token: this is idlist iddecthis is declare eeeethis is integerrrrvardecvar decccNext token is token BEGINN ()
Shifting token BEGINN ()
Entering state 14
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 20
Reading a token: Next token is token DECLARE ()
Reducing stack by rule 34 (line 184):
$1 = token IDENTIFIER ()
-> $$ = nterm lvalue ()
Stack now 0 1 3 5 6 24 14
Entering state 31
Next token is token DECLARE ()
Shifting token DECLARE ()
Entering state 54
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 20
Reading a token: Next token is token PLUS ()
Reducing stack by rule 34 (line 184):
$1 = token IDENTIFIER ()
-> $$ = nterm lvalue ()
Stack now 0 1 3 5 6 24 14 31 54
Entering state 39
Next token is token PLUS ()
Reducing stack by rule 56 (line 210):
$1 = nterm lvalue ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 24 14 31 54
Entering state 84
Next token is token PLUS ()
Shifting token PLUS ()
Entering state 57
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 20
Reading a token: Next token is token SEMICOLON ()
Reducing stack by rule 34 (line 184):
$1 = token IDENTIFIER ()
-> $$ = nterm lvalue ()
Stack now 0 1 3 5 6 24 14 31 54 84 57
Entering state 39
Next token is token SEMICOLON ()
Reducing stack by rule 56 (line 210):
$1 = nterm lvalue ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 24 14 31 54 84 57
Entering state 87
Next token is token SEMICOLON ()
Reducing stack by rule 46 (line 200):
$1 = nterm exp ()
$2 = token PLUS ()
$3 = nterm exp ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 24 14 31 54
Entering state 84
Next token is token SEMICOLON ()
Reducing stack by rule 35 (line 187):
$1 = nterm lvalue ()
$2 = token DECLARE ()
$3 = nterm exp ()
-> $$ = nterm stmt ()
Stack now 0 1 3 5 6 24 14
Entering state 38
Reducing stack by rule 32 (line 180):
$1 = nterm stmt ()
-> $$ = nterm stmtlist ()
Stack now 0 1 3 5 6 24 14
Entering state 37
Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 71
Reading a token: Next token is token END ()
Error : syntax error
Error: popping token SEMICOLON ()
Stack now 0 1 3 5 6 24 14 37
Error: popping nterm stmtlist ()
Stack now 0 1 3 5 6 24 14
Error: popping token BEGINN ()
Stack now 0 1 3 5 6 24
Error: popping nterm declist ()
Stack now 0 1 3 5 6
Error: popping token SEMICOLON ()
Stack now 0 1 3 5
Error: popping nterm M ()
Stack now 0 1 3
Error: popping token IDENTIFIER ()
Stack now 0 1
Error: popping token PROGRAM ()
Stack now 0
Cleanup: discarding lookahead token END ()
Stack now 0
您将看到,现在它在END之前在SEMICOLON处停止了,如该部分的轨迹所示:
Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 71
Reading a token: Next token is token END ()
Error : syntax error
Error: popping token SEMICOLON ()
这就是将来应该调试的方式。