BISON + FLEX语法 - 为什么令牌被连接在一起

时间:2017-04-16 20:58:12

标签: compiler-construction bison flex-lexer yacc lex

我想了解为什么BISON会根据以下规则连接两个令牌

stmt:
  declaration                 { ... }
  | assignment                { ... }
  | exp                       { ... }
  | ID ';'  <-- this rule     { ...       
                                fprintf(stderr, "\n my id is '%s'", $1);
                                ... 

如果你检查输出将得到我的意思。我运行我的解析器,然后将字符ab;输入到程序中。根据我的野牛语法,这应该被解析为ID后跟;。在某种程度上,这是发生的事情。

但是,当我尝试使用规则$1的{​​{1}}变量时,程序会将ID ';'输出给我,而不是ab;

运行程序

ab

语法

ab;                                   <-- this my input to the program

#TOKEN 294[ID] yytext -> ab
Next token is token "identifier" (1.1: )
Shifting token "identifier" (1.1: )
Entering state 5
Reading a token:
#TOKEN 59[] yytext -> ;
Next token is token ';' (1.1: )
Shifting token ';' (1.1: )
Entering state 16
Reducing stack by rule 6 (line 133):
   $1 = token "identifier" (1.1: )     <-- first token which is 'ab'
   $2 = token ';' (1.1: )              <-- second token which is ';'

[stmt] 4:
 my id is 'ab;'                        <-- the issue! This should be 'ab' not 'ab;'   
ERROR: No such ID ab; found
-> $$ = nterm stmt (1.1: )
Stack now 0 1
Entering state 10
Reducing stack by rule 2 (line 126):
   $1 = nterm prog (1.1: )
   $2 = nterm stmt (1.1: )
-> $$ = nterm prog (1.1: )
Stack now 0
Entering state 1
Reading a token:

词法分析

%{
#include <stdio.h>
#include <string>
#include <map>
#include <math.h>
#include "noname-parse.h"
#include "noname-types.h"

extern int yylex(void);
extern void yyerror(const char *error_msg);
extern void division_by_zero(YYLTYPE &yylloc);


std::map<std::string, symrec*> symbol_table;
std::map<std::string, symrec*>::iterator symbol_table_it;
%}

//////////////////////////////////////////////////
///////////* Bison declarations.  *///////////////
//////////////////////////////////////////////////

%union {

  char* id_v;
  double double_v;
  long long_v;

  symrecv symrecv;
  char* error_msg;
};

%{

  bool symbol_exist(const char* key) {
    std::string skey = key;
    symbol_table_it = symbol_table.find(skey);
    return  (symbol_table_it != symbol_table.end());
  }

  void symbol_insert(const char* key, symrecv symrecv) {
    std::string skey = key;
    symbol_table[skey] = symrecv;
  }

  symrecv symbol_retrieve(const char* key) {
    std::string skey = key;
    return symbol_table[skey];
  }

  void print_stmt(symrecv sym) {

    if (sym->type == TYPE_LONG) {
      fprintf(stderr, "%d", sym->value.intv);

    } else if (sym->type == TYPE_DOUBLE) {
      fprintf(stderr, "%lf", sym->value.doublev);

    } else {
      fprintf(stderr, "print not implemented for type %d", sym->type);
    }
  }
%}

%token LINE_BREAK            "line_break"             
// %token ';'              "stmt_sep"           
%token LETTER                "letter"         
%token DIGIT                 "digit"         
%token DIGITS                "digits"         
%token DARROW                "darrow"         
%token ELSE                  "else"       
%token FALSE                 "false"         
%token IF                    "if"     
%token IN                    "in"     
%token LET                   "let"       
%token LOOP                  "loop"       
%token THEN                  "then"       
%token WHILE                 "while"         
%token BREAK                 "break"         
%token CASE                  "case"       
%token NEW                   "new"       
%token NOT                   "not"       
%token TRUE                  "true"       
%token NEWLINE               "newline"           
%token NOTNEWLINE            "notnewline"             
%token WHITESPACE            "whitespace"             
%token LE                    "le"     
%token ASSIGN                "assign"         
%token NULLCH                "nullch"         
%token BACKSLASH             "backslash"             
%token STAR                  "star"       
%token NOTSTAR               "notstar"           
%token LEFTPAREN             "leftparen"             
%token NOTLEFTPAREN          "notleftparen"               
%token RIGHTPAREN            "rightparen"             
%token NOTRIGHTPAREN         "notrightparen"                 
%token LINE_COMMENT          "line_comment"               
%token START_COMMENT         "start_comment"                 
%token END_COMMENT           "end_comment"               
%token QUOTES                "quotes"         
%token ERROR                 "error"

%token <id_v> ID             "identifier"
%token <double_v> DOUBLE     "double"
%token <long_v> LONG         "long"
%type  <symrecv> assignment  "assignment"
%type  <symrecv> declaration "declaration"
%type  <symrecv> exp         "expression"
%type  <symrecv> stmt        "statement"

%left '-' '+'
%left '*' '/'
%left LET ID 
%right '^'        /* exponentiation */
%precedence NEG   /* negation--unary minus */

%start prog

%% 

//////////////////////////////////////////////////
///////////* The grammar follows. *///////////////
//////////////////////////////////////////////////

prog:
  %empty
  | prog stmt
;

stmt:
  declaration        { fprintf(stderr, "\n[stmt] 2: "); print_stmt($1); }
  | assignment       { fprintf(stderr, "\n[stmt] 3: "); print_stmt($1); }
  | exp              { fprintf(stderr, "\n[stmt] 1: "); print_stmt($1); }
  | ID ';'           { fprintf(stderr, "\n[stmt] 4: "); 

    fprintf(stderr, "\n my id is '%s'", $1);

    $$ = (symrec *) malloc (sizeof (symrec));

    if (!symbol_exist($1)) {

      char buf[1024];
      sprintf(buf, "No such ID %s found", $1);
      yyerror(buf);

    } else {

      $$->name = $1;
      $$->value.doublev = symbol_retrieve($1)->value.doublev;
      printf("\nID %s -> %lf", $1, $$->value.doublev);
    }
  }
  | error            { printf("%d:%d", @1.first_column, @1.last_column); }
;

assignment:
  ID ASSIGN exp ';' {

    $$ = (symrec *) malloc (sizeof (symrec));

    if (!symbol_exist($1)) {

      char buf[1024];
      sprintf(buf, "No such ID %s found", $1);
      yyerror(buf);

    } else {

      $$->name = $1;
      $$->type = $3->type;
      $$->value.doublev = $3->value.doublev;
      symbol_insert($1, $$);
      // printf("\nID %s -> %lf", $1, $$->value.doublev);
      printf("\n[assignment]");
    }
  }
  | LET ID ASSIGN exp ';' {

    $$ = (symrec *) malloc (sizeof (symrec));

    if (symbol_exist($2)) {

      char buf[1024];
      sprintf(buf, "Cannot redefine ID %s", $2);
      yyerror(buf);

    } else {

      $$->name = $2;
      $$->type = $4->type;
      $$->value.doublev = $4->value.doublev;
      symbol_insert($2, $$);
      // printf("\nID %s -> %lf", $1, $$->value.doublev);
      printf("\n[assignment]");
    }
  }
;

declaration:
  LET ID ';' {

    $$ = (symrec *) malloc (sizeof (symrec));

    if (symbol_exist($2)) {

      char buf[1024];
      sprintf(buf, "Cannot redefine ID %s", $2);
      yyerror(buf);

    } else {

      $$->name = $2;
      // $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type;
      symbol_insert($2, $$);
      // $$->value.doublev = symbol_table_it->second->value.doublev;
      // printf("\nID %s -> %lf", $1, $$->value.doublev);
      printf("\n[declaration]");
    }
  }
;

exp:
  LONG {
    $$ = (symrec *) malloc (sizeof (symrec));
    $$->name = (char*) "__annon";
    $$->type = TYPE_LONG;
    $$->value.intv = $1;
    printf("\nexp %ld", $1);
  }
  | DOUBLE {
    $$ = (symrec *) malloc (sizeof (symrec));
    $$->name = (char*) "__annon";
    $$->type = TYPE_DOUBLE;
    $$->value.doublev = $1;
    printf("\nexp %lf", $1);
  }
  | exp '+' exp        {
      // $$ = $1 + $3;
      $$ = (symrec *) malloc (sizeof (symrec));
      $$->name = (char*) "__annon";
      $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type;
      $$->value.doublev = $1->value.doublev + $3->value.doublev;
      printf("\nexp + exp %lf %lf", $1->value.doublev, $3->value.doublev);
    }
  | exp '-' exp        {
      // $$ = $1 - $3;
      $$ = (symrec *) malloc (sizeof (symrec));
      $$->name = (char*) "__annon";
      $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type;
      $$->value.doublev = $1->value.doublev - $3->value.doublev;
      printf("\nexp - exp %lf %lf", $1->value.doublev, $3->value.doublev);
    }
  | exp '*' exp        {
      // $$ = $1 * $3;
      $$ = (symrec *) malloc (sizeof (symrec));
      $$->name = (char*) "__annon";
      $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type;
      $$->value.doublev = $1->value.doublev * $3->value.doublev;
      printf("\nexp * exp %lf %lf", $1->value.doublev, $3->value.doublev);
    }
  | exp '/' exp {
      $$ = (symrec *) malloc (sizeof (symrec));
      $$->name = (char*) "__annon";
      $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type;

      if ($3->value.doublev) {
        // $$ = $1 / $3;
        $$->value.doublev = $1->value.doublev / $3->value.doublev;
      } else {
        // $$ = $1;
        $$->value.doublev = $1->value.doublev;
        division_by_zero(@3);
      }
      printf("\nexp / exp %lf %lf", $1->value.doublev, $3->value.doublev);
    }
  | '-' exp  %prec NEG {
      /**
        * The %prec simply instructs Bison that the rule ‘| '-' exp’ 
        * has the same precedence as NEG—in this case the next-to-highest
        */
      // $$ = -($2->value.doublev);
      $$ = (symrec *) malloc (sizeof (symrec));
      $$->name = (char*) "__annon";
      $$->type = $2->type;
      $$->value.doublev = -$2->value.doublev;
      printf("\nexp ^ exp %lf", $2->value.doublev);
    }
  | exp '^' exp        {
      //$$ = pow($1->value.doublev, $3->value.doublev);
      $$ = (symrec *) malloc (sizeof (symrec));
      $$->name = (char*) "__annon";
      $$->type = $1->type;
      $$->value.doublev = pow($1->value.doublev, $3->value.doublev);
      printf("\nexp ^ exp %lf %lf", $1->value.doublev, $3->value.doublev);
    }
  | '(' exp ')'        {
      // $$ = $2->value.doublev;
      $$ = (symrec *) malloc (sizeof (symrec));
      $$->name = (char*) "__annon";
      $$->type = $2->type;
      $$->value.doublev = $2->value.doublev;
      printf("\n(exp) %lf", $2->value.doublev);
    }
  | error                 { printf("\nERROR on exp rule"); }
  ;
%%

1 个答案:

答案 0 :(得分:1)

此弹力操作不正确:

  yylval.id_v = yytext;

yytext指向内部工作缓冲区。每次调用扫描仪时,其内容都会发生变化。因此,如果要保留构成令牌的字符串,则必须将字符串复制到您自己的存储中,例如使用strdup。 (完成后,不要忘记释放已分配的存储空间。)