如何调试我的flex / bison语法?

时间:2018-06-12 09:12:15

标签: parsing bison flex-lexer

这是一个非常愚蠢的问题。语法规则afaik没有错误,但它没有给出正确的输出。我一直在盯着它,但这个错误对我来说是不可见的。

我可以使用哪些工具来帮助我查看解析中发生的情况?我试图插入跟踪代码是很多工作,似乎并没有帮助我。

parser.y

%{
#include<stdio.h>
#include<stdlib.h>  
#include<string.h>
#include "SymbolTable.h"
#include "SymbolInfo.h"
#include "ScopeTable.h"

int yyparse(void);
int yylex(void);
extern char* yytext;
extern FILE * yyin;
extern int tableSize;

FILE *logout;
extern int line_count;
extern char *arr[100];
extern char *final_arr[100];

SymbolTable *table;

void yyerror (const char *s)
{
    fprintf(stderr,"%s\n",s);
    return;
}

%}

%union {
    class SymbolInfo* sym;
    char *s;
    float f;
}

%error-verbose
%verbose
%token COMMA INT ID SEMICOLON FLOAT VOID LCURL RCURL RETURN NOT IF FOR WHILE PRINTLN LPAREN RPAREN
%token CONST_INT CONST_FLOAT LTHIRD RTHIRD 
%token ADDOP MULOP INCOP DECOP RELOP LOGICOP ASSIGNOP

%token <f> DOUBLE
//%expect 1

%precedence THEN
%precedence ELSE

%left "<" ">" "<=" ">=" "=" "!="
%left "+" "-"
%left "*" "/"
%left UMINUS 


%%

start : program     {   printf("start -> program\n");
                        fprintf(logout,"%d : start ->  program\n",line_count);
                    }
      ;

program : program unit {
                            printf("program -> program unit\n");
                            fprintf(logout,"%d : program -> program unit\n\n",line_count);
                            for(int j = 0; final_arr[j] != NULL; j++)
                            {
                                fprintf(logout,"%s",final_arr[j]);
                            }
                                fprintf(logout,"\n\n");
                        }
        | unit          {
                            printf("program -> unit\n");
                            fprintf(logout,"%d : program -> unit\n\n",line_count);
                            for(int j = 0; final_arr[j] != NULL; j++)
                            {
                                fprintf(logout,"%s",final_arr[j]);
                            }
                                fprintf(logout,"\n\n");

                        }
        ;

unit : var_dec  {
                    printf("unit -> var_dec\n");
                    fprintf(logout,"%d : unit -> var_dec\n\n",line_count);
                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

                }
                |func_declaration {

                fprintf(logout,"%d : unit -> func_declaration\n\n",line_count);
                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");
                }
                |func_definition {

                fprintf(logout,"%d : unit -> func_definition\n\n",line_count);
                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

                }
                ;

     ;

func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON     {

                printf("func_declaration -> type_specifier id LPAREN parameter_list RPAREN SEMICOLON\n");
                fprintf(logout,"%d : func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON\n\n", line_count);
                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

        }
        | type_specifier ID LPAREN RPAREN SEMICOLON {
                printf("func_declaration -> type_specifier id LPAREN RPAREN SEMICOLON\n");
                fprintf(logout,"%d : func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON\n\n", line_count); 

                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");


        }
        ;

func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement {
                printf("func_definition -> type_specifier ID LPAREN parameter_list RPAREN compound_statement\n");
                fprintf(logout,"%d : func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement\n\n", line_count); 

        }
        | type_specifier ID LPAREN RPAREN compound_statement {
                printf("func_definition -> type_specifier id LPAREN RPAREN compound_statement\n");
                fprintf(logout,"%d : func_definition : type_specifier ID LPAREN RPAREN compound_statement\n\n", line_count);    

        }
        ;               


parameter_list  : parameter_list COMMA type_specifier ID {

                printf("parameter_list -> parameter_list COMMA type_specifier ID\n");
                fprintf(logout,"%d : parameter_list  : parameter_list COMMA type_specifier ID\n\n", line_count);    
                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

        }
        | parameter_list COMMA type_specifier {
                printf("parameter_list -> parameter_list COMMA type_specifier\n");
                fprintf(logout,"%d : parameter_list  : parameter_list COMMA type_specifier\n\n", line_count);   

        }
        | type_specifier ID {
                printf("parameter_list -> type_specifier ID\n");
                fprintf(logout,"%d : parameter_list : type_specifier ID\n\n", line_count);  
                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");
        }
        | type_specifier {
                printf("parameter_list -> type_specifier\n");
                fprintf(logout,"%d :  parameter_list : type_specifier \n\n", line_count);   

        }
        ;


compound_statement : LCURL statements RCURL {
    printf("compound_statement -> LCURL statements RCURL\n");
    fprintf(logout,"compound_statement : LCURL statements RCURL\n\n");
}
            | LCURL RCURL
            ;

var_dec: type_specifier declaration_list SEMICOLON {

                    printf("var_dec -> type_specifier declaration_list SEMICOLON \n");
                    fprintf(logout,"%d : var_dec: type_specifier declaration_list SEMICOLON \n\n", line_count);

                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

            }
        ;            

type_specifier : INT    {printf("type_specifier -> INT\n");
                            fprintf(logout,"%d : type_specifier-> INT\n\n%s\n\n", line_count,yytext);
                        }
               | FLOAT  {printf("type_specifier ->FLOAT\n");
                            fprintf(logout,"%d : type_specifier-> FLOAT\n\n%s\n\n",line_count, yytext);

                        }
               | VOID   {printf("type_specifier -> VOID\n");
                            fprintf(logout,"%d : type_specifier-> VOID\n\n%s\n\n",line_count, yytext);

                         }
               ;        

declaration_list : declaration_list COMMA ID {

                        printf("declaration_list -> declaration_list COMMA ID\n");  
                        fprintf(logout,"%d : declaration_list -> declaration_list COMMA ID\n\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");
                       }
                 | declaration_list COMMA ID LTHIRD CONST_INT RTHIRD {

                        printf("declaration_list -> declaration_list COMMA ID LTHIRD CONST_INT RTHIRD\n");      
                        fprintf(logout,"%d : declaration_list -> declaration_list COMMA ID LTHIRD CONST_INT RTHIRD\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");

                        }
                 |ID    {
                        printf("declaration_list -> ID\n");
                        fprintf(logout,"%d : declaration_list -> ID\n\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");
                        }
                 |ID LTHIRD CONST_INT RTHIRD {

                        printf("declaration_list -> ID LTHIRD CONST_INT RTHIRD\n"); 
                        fprintf(logout,"%d : declaration_list -> ID LTHIRD CONST_INT RTHIRD\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");

                        }
                 ;  

statements : statement {
    printf("statements -> statement\n");
    fprintf(logout,"%d : statements : statement\n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
}
       | statements statement
       ;

statement : var_dec
      | expression_statement
      | compound_statement
      | FOR LPAREN expression_statement expression_statement expression RPAREN statement
      | IF LPAREN expression RPAREN statement
      | WHILE LPAREN expression RPAREN statement
      | PRINTLN LPAREN ID RPAREN SEMICOLON
      | RETURN expression SEMICOLON  {
            printf("statement -> RETURN expression SEMICOLON\n");
            fprintf(logout,"%d : statement : RETURN expression SEMICOLON\n\n",line_count);
            fprintf(logout, "%s\n\n",yytext);
      }
      ;

expression_statement    : SEMICOLON         
            | expression SEMICOLON 
            ;

variable : ID   {
                    printf("variable -> ID\n");
                    fprintf(logout,"%d : variable : ID\n\n",line_count);
                    fprintf(logout, "%s\n\n",yytext);
}   
     | ID LTHIRD expression RTHIRD 
     ;

 expression : logic_expression  {
        printf("expression -> logic_expression\n");
        fprintf(logout,"%d : expression : logic_expression\n\n",line_count);
        fprintf(logout, "%s\n\n",yytext);
 }
       | variable ASSIGNOP logic_expression     
       ;

logic_expression : rel_expression   
         | rel_expression LOGICOP rel_expression    
         ;

rel_expression  : simple_expression {
    printf("rel_expression  -> simple_expression \n");
    fprintf(logout,"%d : rel_expression : simple_expression\n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
}
        | simple_expression RELOP simple_expression 
        ;

simple_expression : term {
    printf("simple_expression -> term\n");
    fprintf(logout,"%d : simple_expression : term \n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
} 
          | simple_expression ADDOP term {
            printf("simple_expression -> simple_expression ADDOP term\n");
            fprintf(logout,"simple_expression : simple_expression ADDOP term \n\n");
            fprintf(logout, "%s\n\n",yytext);
          }
          ;

term :  unary_expression {
                printf("term -> unary_expression\n");
                fprintf(logout,"%d : term : unary_expression\n\n",line_count);
                fprintf(logout, "%s\n\n",yytext);
            }
     |  term MULOP unary_expression
     ;

unary_expression : ADDOP unary_expression  
         | NOT unary_expression 
         | factor {
            printf("unary_expression -> factor\n");
            fprintf(logout,"%d : unary_expression : factor\n\n",line_count);
            fprintf(logout, "%s\n\n",yytext);
         }
         ;

factor  : variable {
    printf("factor -> variable\n");
    fprintf(logout,"%d : factor : variable\n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
}
    | ID LPAREN argument_list RPAREN
    | LPAREN expression RPAREN
    | CONST_INT 
    | CONST_FLOAT
    | variable INCOP 
    | variable DECOP
    ;

argument_list : arguments
              |
              ;

arguments : arguments COMMA logic_expression
          | logic_expression
          ;




%%

int main(int argc, char *argv[])
{

    FILE *fp  ;
    int token = 0;
    if((fp = fopen(argv[1],"r")) == NULL)
    {
        fprintf(logout,"cannot open file");
        exit(1);
    }


    logout = fopen("log.txt","w");

    yyin = fp;
    yyparse();

    fclose(fp);
    fclose(logout);
    return 0;

}

input.txt中

int var(int a, int b){
return a+b;

}

输出我得到:

type_specifier -> INT
type_specifier -> INT
parameter_list -> type_specifier ID
type_specifier -> INT
parameter_list -> parameter_list COMMA type_specifier ID
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression -> term
rel_expression  -> simple_expression 
expression -> logic_expression
syntax error, unexpected ID, expecting SEMICOLON

预期输出为:

type_specifier -> INT
type_specifier -> INT
parameter_list -> type_specifier ID
type_specifier -> INT
parameter_list -> parameter_list COMMA type_specifier ID
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression -> term

variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression : simple_expression ADDOP term
rel_expression  -> simple_expression 
logic_expression : rel_expression
expression -> logic_expression
statement : RETURN expression SEMICOLON
statements : statement
compound_statement : LCURL statements RCURL
func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement
unit : func_definition
program : program unit
start : program

添加flex文件以防万一

%option noyywrap

%{

#include<stdlib.h>
#include<stdio.h>
#include "y.tab.h"
#include "SymbolTable.h"
#include "SymbolInfo.h"
#include "ScopeTable.h"

void yyerror (char *);
extern YYSTYPE yylval;  
extern SymbolTable *table;
extern FILE *logout;
char *arr[100];
char *final_arr[100];

int k; //final_arr count
int i = 0; //arr count
int line_count = 1;

%}


id [a-z]*
DOUBLE (([0-9]+(\.[0-9]*)?)|([0-9]*\.[0-9]+)) 
newline \n

%%

{newline} {
        arr[i] = "\n",final_arr[k] = arr[i];
        i++; k++;
        line_count++;
    }

[ \t]+  {}
(([0-9]+(\.[0-9]*)?)|([0-9]*\.[0-9]+))  {
                        yylval.f = atof(yytext);
                        return DOUBLE;
                    }

"int" {
        memset(&arr,NULL,sizeof(arr)); i = 0;
        arr[i] = "int "; 
        final_arr[k] = "int ";
        i++; k++;
        return INT;
    }
"float" {
        memset(&arr,NULL,sizeof(arr)); i = 0;
        arr[i] = "float "; final_arr[k] = "float ";
        i++; k++;
        return FLOAT;
    }
"void"  {
        memset(&arr,NULL,sizeof(arr)); i = 0;
        arr[i] = "void "; final_arr[k] = "void ";
        i++; k++;
        return VOID;
    }   


";" {
        arr[i] = ";";final_arr[k] = ";";
        i++; k++;
        return SEMICOLON;}
"," {
        arr[i] = ","; final_arr[k] = ",";
        i++; k++;
        return COMMA;
    }
"(" {
        arr[i] = "(";final_arr[k] = "(";
        i++; k++;
        return LPAREN;}
")" {
        arr[i] = ")";final_arr[k] = ")";
        i++; k++;
        return RPAREN;}
"{" {return LCURL;}
"}" {return RCURL;}

{id}    {
        yylval.s = strdup(yytext);
        arr[i] = strdup(yytext); final_arr[k] = strdup(yytext);
        k++; i++;
        for(int j = 1; arr[j] != NULL; j++)
        {
            //fprintf(logout,"%s", arr[j]);
            //fprintf(logout,"arr [%d] %s\n ",j,arr[j]);
        }
        //fprintf(logout,"\n\n");
        return ID;

        }

%%                          

1 个答案:

答案 0 :(得分:4)

您似乎花费了大量精力尝试实现一种跟踪解析器中正在发生的事情的方法,并且效果不大,因为此处的问题只是缺少lexer关键字规则。

使用flex和bison的内置调试功能会更好。然后你的语法和词法分析器会更简单,更容易阅读,并且调试输出会更完整(并且可以让你通过状态表跟踪行为)。

这是一个快速摘要。真的很快。

  1. --debug添加到您的bison命令中。这将导致野牛生成代码以跟踪您的解析。 (如果你很懒,可以使用-t - 用于跟踪 - 这是Posix标准命令行选项,也应该使用yacc,byacc,btyacc等等。)

  2. 假设main文件中包含main,请在.y开头添加以下三行:

    #ifdef YYDEBUG
      yydebug = 1;
    #endif
    

    对于额外的奖励积分,您可以根据某些命令行标记进行此分配。

    一旦这样做,您将收到以下跟踪输出:

    ... snip ... Pick up the trace at the ) at the end of the parameter list
    Reading a token: Next token is token RPAREN ()
    Shifting token RPAREN ()
    Entering state 28
    Reading a token: Next token is token LCURL ()
    Shifting token LCURL ()
    Entering state 25
    Reading a token: Next token is token ID ()
    Shifting token ID ()
    Entering state 44
    Reading a token: Next token is token ID ()
    ... snip ...
    

    请注意,在大括号后面会返回两个ID,对应于代币returna

  3. 您还可以使用flex --debug(或-d)在flex中启用跟踪。这会导致扫描仪生成

    形式的输出行
    --accepting rule at line 85 ("return")
    

    为每个接受的令牌(和其他一些行)。不幸的是,您需要根据源代码检查行号,但在这种情况下,您可能已经注意到上述和

    之间的相似性。
    --accepting rule at line 85 ("b")
    

    为了简化其他调试,有必要养成编写扫描程序的习惯,使其可以独立于解析器进行编译。然后,您可以使用main()中的-lfl实施单独编译来测试您的扫描仪。

  4. 参考资料和更多调试信息: