使用此语法在flex / bison中获取语法错误

时间:2019-05-15 06:44:23

标签: c compiler-construction grammar bison flex-lexer

我正在尝试为编译器生成中间代码。我在macOS上使用flex-bison。 当我尝试输入时,出现语法错误,并且我不知道该语法错误来自何处。

这是我的词法:

%{
#include<stdbool.h>
#include <stdlib.h>
#include <string.h>
//#include "LinkedList.h"
#include "parser.tab.h"
char* tempString;
%}
%option yylineno

whole   0|[1-9][0-9]*
real    {whole}\.(([0-9])|([0-9][0-9]*[1-9]))
divisionop [/]
noSymbol    [^ \t\n\/\+\-\*\:\;\,\(\)\.]
error   {noSymbol}+

%%


[ \t\n\r]+  ;



\#[-+]?{whole}/[\ :\,\;\+\-\/\*\)\n\r]|".GT."|".GE."|".EQ."|".NE."|".LT."|".LE."|":="   {

    char* string = (char*) malloc(strlen(yytext));
    strcpy(string, yytext + 1);
    string[strlen(string)] = 0;
    //printf("Found integer : %d\n", atoi(string));
    //push(string,INTEGER,yylineno);
    return INTEGER;}


\#[-+]?{real}/[\ :\,\;\+\-\/\*\)\n\r]|".GT."|".GE."|".EQ."|".NE."|".LT."|".LE."|":="    {
    char* string = (char*) malloc(strlen(yytext));
    strcpy(string, yytext + 1);
    string[strlen(string)] = 0;
    //printf("Found real:%s\n",string);
    //push(string,REAL,0);
    return REALNUM;
}

[a-zA-Z][0-9][0-9a-zA-Z]*   {
    //printf("Found identifier:%s\n",yytext);
    //yylval.str = yytext;
    //printf("****lvalue is:%s\n",yylval.str);
    //push(yytext,IDENTIFIER,yylineno);
    tempString = (char*) malloc(strlen(yytext));
    strcpy(tempString, yytext);
    return IDENTIFIER;
}

Program {
    //printf("Found program :%s\n",yytext);
    return PROGRAM;

}

Int {
    //printf("Found int :%s\n",yytext);
    return INT;

}

Real    {
    //printf("Found real :%s\n",yytext);
    return REAL;

}

Bool    {
    //printf("Found bool :%s\n",yytext);
    return BOOL;

}

Procedure   {
    //printf("Found real :%s\n",yytext);
    return PROCEDURE;

}

Function    {
    //printf("Found function :%s\n",yytext);
    return FUNCTION;

}

Begin   {
    //printf("Found begin :%s\n",yytext);
    return BEGINN;

}

End {
    //printf("Found end :%s\n",yytext);
    return END;

}

If  {
    //printf("Found if :%s\n",yytext);
    return IF;
}

Then {
    //printf("Found then :%s\n",yytext);
    return THEN;

}

Else {
    //printf("Found else :%s\n",yytext);
    return ELSE;
}

While {
    //printf("Found while :%s\n",yytext);
    return WHILE;
}

Do {
    //printf("Found do :%s\n",yytext);
    return DO;
}

For {
    //printf("Found for :%s\n",yytext);
    return FOR;
}

To {
    //printf("Found to :%s\n",yytext);
    return TO;
}

Downto {
    //printf("Found downto :%s\n",yytext);
    return DOWNTO;
}

Case {
    //printf("Found case :%s\n",yytext);
    return CASE;
}

Return {
    //printf("Found return :%s\n",yytext);
    return RETURN;
}

"And Then" {
    //printf("Found andthen :%s\n",yytext);
    //push(yytext,ANDTHEN,yylineno);
    return ANDTHEN;
}

"Or Else" {
    //printf("Found andthen :%s\n",yytext);
    //push(yytext,ORELSE,yylineno);
    return ORELSE;
}

"+" {
    //printf("Found plus :%s\n",yytext);
    return PLUS;
}

\-  {
    //printf("Found minus :%s\n",yytext);
    return MINUS;

}

\*  {
    //printf("Found multiply :%s\n",yytext);
    return MULTIPLY;
}

{divisionop}    {
    //printf("Found division :%s\n",yytext);
    //push(yytext,DIVISION,yylineno);
    return DIVISION;
}

".GT."  {
    //printf("Found greaterthan :%s\n",yytext);
    return GREATERTHAN;
}

".GE."  {
    //printf("Found greaterequal :%s\n",yytext);
    return GREATEREQUAL;
}

\.NE\.  {
    //printf("Found notequal :%s\n",yytext);
    return NOTEQUAL;
}

\.EQ\.  {
    //printf("Found EQUAL :%s\n",yytext);
    return EQUAL;
}

\.LT\.  {
    //printf("Found lessthan :%s\n",yytext);
    return LESSTHAN;
}

\.LE\.  {
    //printf("Found lessequal :%s\n",yytext);
    return LESSEQUAL;
}

[,] {
    //printf("Found comma :%s\n",yytext);
    return COMMA;
}

":="    {
    //printf("Found declare :%s\n",yytext);
    return DECLARE;
}

[:] {
    //printf("Found semicolon :%s\n",yytext);
    return COLON;
}

[;] {
    //printf("Found semicolon :%s\n",yytext);
    return SEMICOLON;
}


\( {
    //printf("Found oppar :%s\n",yytext);

    return OPPAR;

}

\)  {
    //printf("Found cppar :%s\n",yytext);
    return CPAR;
}

False   {
    //printf("Found false :%s\n",yytext);
    return FALSE;
}

True    {
    //printf("Found true :%s\n",yytext);
    return TRUE;
}

{error} {
    printf("Error on : **<<  %s  >>**\n", yytext);
    //yymore();
}
\.  {
    printf("Error on : **<<  illegal use of \" %s \" >>**\n", yytext);
    //yymore();
}

%%


int yywrap(){
    return 1;
}

//int main(){
//  yyin = fopen("input2P.txt", "r");
//  initLinkedList(table);
//  while(yylex());
//  printLinkedList();
//  return 0;
//}

这是我的解析器。

%{
#include<stdio.h>
#include "stack.h"
//#include "LinkedList.h"
//#include "symbol_table.h"

int currentType;
char* returnType;

extern FILE* yyin;
extern char* tempString;
struct Stack* tblptrStack;

char* new_temp(char *c) {
    string name("t");
    name += to_string(num);
    num++;
    char *what = (char *) malloc(sizeof(char) * 100);
    strcpy(what, name.c_str());
    symbol_table_insert(what, c);
    strcpy(what, symbol_table_lookup(what).id.c_str());
    return what;
}


int yylex();
void yyerror(char* error);
struct SymbolTable* secondArg(struct SymbolTable* a,struct SymbolTable* b );
%}

%union{
  char *str;
   struct {
        int quad;
        int is_boolean;
        char *place;
        char *code;
        char *type;
   } eval;
}

}

%start program
%token INTEGER
%token ZERO
%token REALNUM
%token PROGRAM
%token INT
%token REAL
%token BOOL
%token PROCEDURE
%token FUNCTION
%token BEGINN
%token END
%token IF
%token THEN
%token ELSE
%token WHILE
%token DO
%token FOR
%token TO
%token DOWNTO
%token CASE
%token RETURN
%token ANDTHEN
%token ORELSE
%token IDENTIFIER
%token PLUS
%token MINUS
%token MULTIPLY
%token DIVISION
%token GREATERTHAN
%token GREATEREQUAL
%token NOTEQUAL
%token EQUAL
%token LESSTHAN
%token LESSEQUAL
%token COMMA
%token SEMICOLON
%token COLON
%token DECLARE
%token OPPAR
%token CPAR
%token FALSE
%token TRUE
%token ERROR
%type <eval> exp

%left COMMA
%left INT BOOL REAL

%left COLON
%left IF_PREC   
%left ELSE
%left ANDTHEN ORELSE
%left PLUS MINUS
%left MULTIPLY DIVISION
%left GREATERTHAN GREATEREQUAL NOTEQUAL EQUAL LESSTHAN LESSEQUAL

%%

program:
    PROGRAM IDENTIFIER M SEMICOLON declist block SEMICOLON {
        printf("\nin program\n"); allSymbolTablePrint( pop(tblptrStack) ); 
    }
    | PROGRAM IDENTIFIER M SEMICOLON block SEMICOLON {
        printf("\nin program\n"); allSymbolTablePrint( pop(tblptrStack) ); 
    }
    ;
M:
    { 
        struct SymbolTable* t = mkTable( NULL , "program");
        push(t, tblptrStack);
    }
;
declist:
    dec
    | declist dec 
    ;
dec : 
    vardec {printf("var deccc");}
    | procdec
    | funcdec
    ;
type : 
    INT {
        currentType=4;
        returnType = (char*) malloc(strlen("INT"));
        strcpy(returnType, "INT");
        printf("this is int");
    }
    | REAL {
        currentType=8;
        returnType = (char*) malloc(strlen("REAL"));
        strcpy(returnType, "REAL");
    }
    |BOOL {
        currentType=1;
        returnType = (char*) malloc(strlen("BOOL"));
        strcpy(returnType, "BOOL");
    }
    ;
iddec : 
    IDENTIFIER { enterVar( tempString, currentType , 0 , top(tblptrStack) ); printf("a variable entered:%s\n",tempString); }
    | IDENTIFIER { enterVar( tempString, currentType , 0 , top(tblptrStack) ); printf("a variable entered:%s\n",tempString); } DECLARE {
    printf("this is declare eeee");
    } exp 
    ;
idlist : 
    iddec {printf("this is idlist iddec");}
    | idlist COMMA iddec
    ;
vardec :
    type idlist SEMICOLON 
    {printf("vardec");}
    ;
procdec :
    PROCEDURE IDENTIFIER NP OPPAR paramdecs CPAR declist block SEMICOLON {
        char* tmpc = top(tblptrStack)->name;
        struct SymbolTable* tmpt = pop(tblptrStack);
        enterProcFunc( tmpc , 0 , "NULL" ,  tmpt , top(tblptrStack) ); 
    }
    | PROCEDURE IDENTIFIER NP OPPAR paramdecs CPAR block SEMICOLON { 
        char* tmpc = top(tblptrStack)->name;
        struct SymbolTable* tmpt = pop(tblptrStack);
        enterProcFunc( tmpc , 0 , "NULL" ,  tmpt , top(tblptrStack) ); 
    }
    ;
NP:
    {
        printf("inside NP\t tempString:%s\n",tempString);
        struct SymbolTable* t = mkTable( top(tblptrStack) , tempString );
        push(t, tblptrStack);
    }
    ;
funcdec :
    FUNCTION IDENTIFIER FN OPPAR paramdecs CPAR COLON type {
        char* tmpc = top(tblptrStack)->name;
        struct SymbolTable* tmpt = pop(tblptrStack);
        enterProcFunc(tmpc , -1 , returnType ,  tmpt , top(tblptrStack) );
    } declist block SEMICOLON
    | FUNCTION IDENTIFIER FN OPPAR paramdecs CPAR COLON type {
        char* tmpc = top(tblptrStack)->name;
        struct SymbolTable* tmpt = pop(tblptrStack);
        enterProcFunc(tmpc , -1 , returnType ,  tmpt , top(tblptrStack) );
    } block SEMICOLON
    ;
FN:
    {
        printf("inside FN\t tempString:%s\n",tempString);
        struct SymbolTable* t = mkTable( top(tblptrStack) , tempString );
        push(t, tblptrStack);
    }
    ;
paramdecs :
    paramdec { printf("paramdecs"); }
    | paramdecs SEMICOLON paramdec { printf("paramdecs\n"); }
    | { printf("paramdecs\n"); }
    ;
paramdec : 
    type paramlist
    ;
paramlist :
    IDENTIFIER 
    | paramlist COMMA IDENTIFIER
    ;
block : 
    BEGINN stmtlist END
    | stmt 
    ;
stmtlist : 
    stmt 
    | stmtlist SEMICOLON stmt 
    ;
lvalue : 
    IDENTIFIER
    ;
stmt :
    lvalue DECLARE exp 
    | IF exp THEN block %prec IF_PREC
    | IF exp THEN block ELSE block %prec ELSE
    | WHILE exp DO block 
    | FOR lvalue DECLARE exp TO exp DO block 
    | FOR lvalue DECLARE exp DOWNTO exp DO block 
    | CASE exp caseelement END 
    | RETURN exp 
    | exp {printf(stmt exp);}
    ;
exp : 
    exp ANDTHEN exp 
    | exp ORELSE exp  
    | exp PLUS exp   {
        printf("Rule  \t\t mathlogicExpression -> mathlogicExpression KW_PLUS mathlogicExpression\n");
        $$.place = new_temp($1.type);
        $$.type = $1.type;
        printf($1.place, $3.place, "+", $$.place);
    };
    | exp MINUS exp 
    | exp MULTIPLY exp { printf(" RULE FOR MULTIPLY\n");}
    | exp DIVISION exp 
    | OPPAR exp CPAR 
    | boolexp relop boolexp
    | INTEGER {printf("this is integerrrr");}
    | REALNUM 
    | TRUE
    | FALSE 
    | lvalue 
    | IDENTIFIER OPPAR explist CPAR 
    ;
boolexp:
    OPPAR exp CPAR 
    | INTEGER 
    | REALNUM 
    | TRUE
    | FALSE 
    | lvalue 
    | IDENTIFIER OPPAR explist CPAR 
    ;
caseelement : 
    INTEGER COLON block SEMICOLON
    | caseelement INTEGER COLON block SEMICOLON
    ;
explist :
    exp 
    | explist COMMA exp  
    |
    ;
relop :
    GREATERTHAN
    | GREATEREQUAL
    | NOTEQUAL
    | EQUAL
    | LESSTHAN
    | LESSEQUAL
    ;


%%

struct SymbolTable* popTop(struct Stack* b ){
    pop(b);
    top(b);
}

int main(){
    //printf("hi1");
    tblptrStack=createStack();
    tblptrStack->top = NULL;
    //printf("hi2");
    yyin = fopen("input3.txt", "r");
    //printf("hi3");
    yyparse();
    return 0;
}

void yyerror(char* error){
    printf("Error : %s\n", error);
} 

例如,我尝试输入以下内容:

Program p1Main;

    Int i1, i2:=#-23;   
Begin
    i1 := i1 + i2;

End
;

我收到此错误

a variable entered:i1
a variable entered:i2
Error : syntax error
logout
Saving session...
...copying shared history...
...saving history...truncating history files...
...completed.

我尝试放入printf来找出此语法错误的来源,但未得到任何结果。我已经看到了其他类似问题,并尝试了解决方案,但没有成功。如果您知道该如何解决,请帮助我。

1 个答案:

答案 0 :(得分:6)

您收到语法错误,因为您的输入程序在语法上不正确并且解析器正常工作。

(修整后的)语法将程序显示为:

program:    PROGRAM IDENTIFIER M SEMICOLON declist block SEMICOLON

,它显示为一个块:

block : 
    BEGINN stmtlist END
    | stmt 
    ;

,随后的声明列表为:

stmtlist : 
    stmt 
    | stmtlist SEMICOLON stmt 
    ;

在这里,您将看到SEMICOLON是语句分隔符而不是终止符。这意味着在结束之前永远不会有SEMICOLON。但是您这样做:

 Begin
    i1 := i1 + i2;

End

因此,您必须确定哪种语言才是正确的解释;您的语法或输入程序。初学者经常犯此错误,这需要经验丰富的程序员或老师才能发现几秒钟。你怎么能用野牛学到这个?我将在...后面添加该详细信息。


您怎么能自己找到这个?好吧,遵循rules of Stack Overflow将是一个好的开始。您尚未将代码示例减少到解释问题所需的最低限度。您只是将“整个问题”转交给您的问题,只是放弃了并将其移交给我们。

好学生如何简化这个问题。您可能要做的一件事就是简化您对解析器的测试输入,并尝试越来越简单的输入程序。例如,问自己“错误是否在声明或块中发生?”。 “如果我使用一个没有声明的简单程序会怎样?”使用这种形式的推论,您可以推断出故障位于块中的某个位置。您可能已经尝试过与BLOCK和语句列表中的不同规则匹配的示例,以查看哪些替代方法有效,哪些无效,最终找到导致问题的规则。

然后,您可以创建一个较小的语法,词法分析器和测试程序,以粘贴到Stack Overflow中,该程序解释了特定语法规则失败的特定问题,并在此时寻求帮助。但是,通常在创建较小的示例的过程中,问题和解决方案就会显示出来。

查找问题根源的一种更强大,更编程的方式是使用内置在bison中的调试功能。 Bison具有一种测试模式,该模式可以通过使用-t参数运行bison并在运行时设置yydebug变量来启用。这将生成详细的解析跟踪,将准确显示导致错误的输入符号和规则。我将在...之后的下一期中向您展示产生的结果。


好。现在,在您粘贴的代码出现一些问题之后,我得到了所需的输出。您已将无效代码错误地粘贴到问题中。在parser.y的第41行上还有一个额外的“}”,我不得不删除它,但我们没有您的stack.h。我必须删除所有语义操作才能显示语法调试。无论如何,这样做并启用了bison调试后,我们得到了以下内容(在Mac上):

briantompsett:~ cssbct$ gcc parser.tab.c lex.yy.c -ll -o parser
briantompsett:~ cssbct$ ./parser
Starting parse
Entering state 0
Reading a token: Program p1Main;

    Int i1, i2:=#-23;   
Begin
    i1 := i1 + i2;

End
;Next token is token PROGRAM ()
Shifting token PROGRAM ()
Entering state 1
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 3
Reducing stack by rule 3 (line 108):
-> $$ = nterm M ()
Stack now 0 1 3
Entering state 5
Reading a token: Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 6
Reading a token: Next token is token INT ()
Shifting token INT ()
Entering state 9
Reducing stack by rule 9 (line 122):
   $1 = token INT ()
-> $$ = nterm type ()
Stack now 0 1 3 5 6
Entering state 26
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 50
Reading a token: Next token is token COMMA ()
Reducing stack by rule 12 (line 133):
   $1 = token IDENTIFIER ()
-> $$ = nterm iddec ()
Stack now 0 1 3 5 6 26
Entering state 51
Reducing stack by rule 15 (line 139):
   $1 = nterm iddec ()
-> $$ = nterm idlist ()
Stack now 0 1 3 5 6 26
Entering state 52
Next token is token COMMA ()
Shifting token COMMA ()
Entering state 82
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 50
Reading a token: Next token is token DECLARE ()
Shifting token DECLARE ()
Entering state 81
Reducing stack by rule 13 (line 134):
-> $$ = nterm @1 ()
Stack now 0 1 3 5 6 26 52 82 50 81
Entering state 110
Reading a token: Next token is token INTEGER ()
Shifting token INTEGER ()
Entering state 7
Reading a token: Next token is token SEMICOLON ()
Reducing stack by rule 52 (line 206):
   $1 = token INTEGER ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 26 52 82 50 81 110
Entering state 124
Next token is token SEMICOLON ()
Reducing stack by rule 14 (line 134):
   $1 = token IDENTIFIER ()
   $2 = token DECLARE ()
   $3 = nterm @1 ()
   $4 = nterm exp ()
-> $$ = nterm iddec ()
Stack now 0 1 3 5 6 26 52 82
Entering state 111
Reducing stack by rule 16 (line 140):
   $1 = nterm idlist ()
   $2 = token COMMA ()
   $3 = nterm iddec ()
-> $$ = nterm idlist ()
Stack now 0 1 3 5 6 26
Entering state 52
Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 83
Reducing stack by rule 17 (line 143):
   $1 = nterm type ()
   $2 = nterm idlist ()
   $3 = token SEMICOLON ()
-> $$ = nterm vardec ()
Stack now 0 1 3 5 6
Entering state 27
Reducing stack by rule 6 (line 117):
   $1 = nterm vardec ()
-> $$ = nterm dec ()
Stack now 0 1 3 5 6
Entering state 25
Reducing stack by rule 4 (line 113):
   $1 = nterm dec ()
-> $$ = nterm declist ()
Stack now 0 1 3 5 6
Entering state 24
Reading a token: this is idlist iddecthis is declare eeeethis is integerrrrvardecvar decccNext token is token BEGINN ()
Shifting token BEGINN ()
Entering state 14
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 20
Reading a token: Next token is token DECLARE ()
Reducing stack by rule 34 (line 184):
   $1 = token IDENTIFIER ()
-> $$ = nterm lvalue ()
Stack now 0 1 3 5 6 24 14
Entering state 31
Next token is token DECLARE ()
Shifting token DECLARE ()
Entering state 54
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 20
Reading a token: Next token is token PLUS ()
Reducing stack by rule 34 (line 184):
   $1 = token IDENTIFIER ()
-> $$ = nterm lvalue ()
Stack now 0 1 3 5 6 24 14 31 54
Entering state 39
Next token is token PLUS ()
Reducing stack by rule 56 (line 210):
   $1 = nterm lvalue ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 24 14 31 54
Entering state 84
Next token is token PLUS ()
Shifting token PLUS ()
Entering state 57
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 20
Reading a token: Next token is token SEMICOLON ()
Reducing stack by rule 34 (line 184):
   $1 = token IDENTIFIER ()
-> $$ = nterm lvalue ()
Stack now 0 1 3 5 6 24 14 31 54 84 57
Entering state 39
Next token is token SEMICOLON ()
Reducing stack by rule 56 (line 210):
   $1 = nterm lvalue ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 24 14 31 54 84 57
Entering state 87
Next token is token SEMICOLON ()
Reducing stack by rule 46 (line 200):
   $1 = nterm exp ()
   $2 = token PLUS ()
   $3 = nterm exp ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 24 14 31 54
Entering state 84
Next token is token SEMICOLON ()
Reducing stack by rule 35 (line 187):
   $1 = nterm lvalue ()
   $2 = token DECLARE ()
   $3 = nterm exp ()
-> $$ = nterm stmt ()
Stack now 0 1 3 5 6 24 14
Entering state 38
Reducing stack by rule 32 (line 180):
   $1 = nterm stmt ()
-> $$ = nterm stmtlist ()
Stack now 0 1 3 5 6 24 14
Entering state 37
Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 71
Reading a token: Next token is token END ()
Error : syntax error
Error: popping token SEMICOLON ()
Stack now 0 1 3 5 6 24 14 37
Error: popping nterm stmtlist ()
Stack now 0 1 3 5 6 24 14
Error: popping token BEGINN ()
Stack now 0 1 3 5 6 24
Error: popping nterm declist ()
Stack now 0 1 3 5 6
Error: popping token SEMICOLON ()
Stack now 0 1 3 5
Error: popping nterm M ()
Stack now 0 1 3
Error: popping token IDENTIFIER ()
Stack now 0 1
Error: popping token PROGRAM ()
Stack now 0
Cleanup: discarding lookahead token END ()
Stack now 0

您将看到,现在它在END之前在SEMICOLON处停止了,如该部分的轨迹所示:

Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 71
Reading a token: Next token is token END ()
Error : syntax error
Error: popping token SEMICOLON ()

这就是将来应该调试的方式。