我想了解为什么BISON会根据以下规则连接两个令牌
stmt:
declaration { ... }
| assignment { ... }
| exp { ... }
| ID ';' <-- this rule { ...
fprintf(stderr, "\n my id is '%s'", $1);
...
如果你检查输出将得到我的意思。我运行我的解析器,然后将字符ab;
输入到程序中。根据我的野牛语法,这应该被解析为ID
后跟;
。在某种程度上,这是发生的事情。
但是,当我尝试使用规则$1
的{{1}}变量时,程序会将ID ';'
输出给我,而不是ab;
。
运行程序
ab
语法
ab; <-- this my input to the program
#TOKEN 294[ID] yytext -> ab
Next token is token "identifier" (1.1: )
Shifting token "identifier" (1.1: )
Entering state 5
Reading a token:
#TOKEN 59[] yytext -> ;
Next token is token ';' (1.1: )
Shifting token ';' (1.1: )
Entering state 16
Reducing stack by rule 6 (line 133):
$1 = token "identifier" (1.1: ) <-- first token which is 'ab'
$2 = token ';' (1.1: ) <-- second token which is ';'
[stmt] 4:
my id is 'ab;' <-- the issue! This should be 'ab' not 'ab;'
ERROR: No such ID ab; found
-> $$ = nterm stmt (1.1: )
Stack now 0 1
Entering state 10
Reducing stack by rule 2 (line 126):
$1 = nterm prog (1.1: )
$2 = nterm stmt (1.1: )
-> $$ = nterm prog (1.1: )
Stack now 0
Entering state 1
Reading a token:
词法分析
%{
#include <stdio.h>
#include <string>
#include <map>
#include <math.h>
#include "noname-parse.h"
#include "noname-types.h"
extern int yylex(void);
extern void yyerror(const char *error_msg);
extern void division_by_zero(YYLTYPE &yylloc);
std::map<std::string, symrec*> symbol_table;
std::map<std::string, symrec*>::iterator symbol_table_it;
%}
//////////////////////////////////////////////////
///////////* Bison declarations. *///////////////
//////////////////////////////////////////////////
%union {
char* id_v;
double double_v;
long long_v;
symrecv symrecv;
char* error_msg;
};
%{
bool symbol_exist(const char* key) {
std::string skey = key;
symbol_table_it = symbol_table.find(skey);
return (symbol_table_it != symbol_table.end());
}
void symbol_insert(const char* key, symrecv symrecv) {
std::string skey = key;
symbol_table[skey] = symrecv;
}
symrecv symbol_retrieve(const char* key) {
std::string skey = key;
return symbol_table[skey];
}
void print_stmt(symrecv sym) {
if (sym->type == TYPE_LONG) {
fprintf(stderr, "%d", sym->value.intv);
} else if (sym->type == TYPE_DOUBLE) {
fprintf(stderr, "%lf", sym->value.doublev);
} else {
fprintf(stderr, "print not implemented for type %d", sym->type);
}
}
%}
%token LINE_BREAK "line_break"
// %token ';' "stmt_sep"
%token LETTER "letter"
%token DIGIT "digit"
%token DIGITS "digits"
%token DARROW "darrow"
%token ELSE "else"
%token FALSE "false"
%token IF "if"
%token IN "in"
%token LET "let"
%token LOOP "loop"
%token THEN "then"
%token WHILE "while"
%token BREAK "break"
%token CASE "case"
%token NEW "new"
%token NOT "not"
%token TRUE "true"
%token NEWLINE "newline"
%token NOTNEWLINE "notnewline"
%token WHITESPACE "whitespace"
%token LE "le"
%token ASSIGN "assign"
%token NULLCH "nullch"
%token BACKSLASH "backslash"
%token STAR "star"
%token NOTSTAR "notstar"
%token LEFTPAREN "leftparen"
%token NOTLEFTPAREN "notleftparen"
%token RIGHTPAREN "rightparen"
%token NOTRIGHTPAREN "notrightparen"
%token LINE_COMMENT "line_comment"
%token START_COMMENT "start_comment"
%token END_COMMENT "end_comment"
%token QUOTES "quotes"
%token ERROR "error"
%token <id_v> ID "identifier"
%token <double_v> DOUBLE "double"
%token <long_v> LONG "long"
%type <symrecv> assignment "assignment"
%type <symrecv> declaration "declaration"
%type <symrecv> exp "expression"
%type <symrecv> stmt "statement"
%left '-' '+'
%left '*' '/'
%left LET ID
%right '^' /* exponentiation */
%precedence NEG /* negation--unary minus */
%start prog
%%
//////////////////////////////////////////////////
///////////* The grammar follows. *///////////////
//////////////////////////////////////////////////
prog:
%empty
| prog stmt
;
stmt:
declaration { fprintf(stderr, "\n[stmt] 2: "); print_stmt($1); }
| assignment { fprintf(stderr, "\n[stmt] 3: "); print_stmt($1); }
| exp { fprintf(stderr, "\n[stmt] 1: "); print_stmt($1); }
| ID ';' { fprintf(stderr, "\n[stmt] 4: ");
fprintf(stderr, "\n my id is '%s'", $1);
$$ = (symrec *) malloc (sizeof (symrec));
if (!symbol_exist($1)) {
char buf[1024];
sprintf(buf, "No such ID %s found", $1);
yyerror(buf);
} else {
$$->name = $1;
$$->value.doublev = symbol_retrieve($1)->value.doublev;
printf("\nID %s -> %lf", $1, $$->value.doublev);
}
}
| error { printf("%d:%d", @1.first_column, @1.last_column); }
;
assignment:
ID ASSIGN exp ';' {
$$ = (symrec *) malloc (sizeof (symrec));
if (!symbol_exist($1)) {
char buf[1024];
sprintf(buf, "No such ID %s found", $1);
yyerror(buf);
} else {
$$->name = $1;
$$->type = $3->type;
$$->value.doublev = $3->value.doublev;
symbol_insert($1, $$);
// printf("\nID %s -> %lf", $1, $$->value.doublev);
printf("\n[assignment]");
}
}
| LET ID ASSIGN exp ';' {
$$ = (symrec *) malloc (sizeof (symrec));
if (symbol_exist($2)) {
char buf[1024];
sprintf(buf, "Cannot redefine ID %s", $2);
yyerror(buf);
} else {
$$->name = $2;
$$->type = $4->type;
$$->value.doublev = $4->value.doublev;
symbol_insert($2, $$);
// printf("\nID %s -> %lf", $1, $$->value.doublev);
printf("\n[assignment]");
}
}
;
declaration:
LET ID ';' {
$$ = (symrec *) malloc (sizeof (symrec));
if (symbol_exist($2)) {
char buf[1024];
sprintf(buf, "Cannot redefine ID %s", $2);
yyerror(buf);
} else {
$$->name = $2;
// $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type;
symbol_insert($2, $$);
// $$->value.doublev = symbol_table_it->second->value.doublev;
// printf("\nID %s -> %lf", $1, $$->value.doublev);
printf("\n[declaration]");
}
}
;
exp:
LONG {
$$ = (symrec *) malloc (sizeof (symrec));
$$->name = (char*) "__annon";
$$->type = TYPE_LONG;
$$->value.intv = $1;
printf("\nexp %ld", $1);
}
| DOUBLE {
$$ = (symrec *) malloc (sizeof (symrec));
$$->name = (char*) "__annon";
$$->type = TYPE_DOUBLE;
$$->value.doublev = $1;
printf("\nexp %lf", $1);
}
| exp '+' exp {
// $$ = $1 + $3;
$$ = (symrec *) malloc (sizeof (symrec));
$$->name = (char*) "__annon";
$$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type;
$$->value.doublev = $1->value.doublev + $3->value.doublev;
printf("\nexp + exp %lf %lf", $1->value.doublev, $3->value.doublev);
}
| exp '-' exp {
// $$ = $1 - $3;
$$ = (symrec *) malloc (sizeof (symrec));
$$->name = (char*) "__annon";
$$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type;
$$->value.doublev = $1->value.doublev - $3->value.doublev;
printf("\nexp - exp %lf %lf", $1->value.doublev, $3->value.doublev);
}
| exp '*' exp {
// $$ = $1 * $3;
$$ = (symrec *) malloc (sizeof (symrec));
$$->name = (char*) "__annon";
$$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type;
$$->value.doublev = $1->value.doublev * $3->value.doublev;
printf("\nexp * exp %lf %lf", $1->value.doublev, $3->value.doublev);
}
| exp '/' exp {
$$ = (symrec *) malloc (sizeof (symrec));
$$->name = (char*) "__annon";
$$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type;
if ($3->value.doublev) {
// $$ = $1 / $3;
$$->value.doublev = $1->value.doublev / $3->value.doublev;
} else {
// $$ = $1;
$$->value.doublev = $1->value.doublev;
division_by_zero(@3);
}
printf("\nexp / exp %lf %lf", $1->value.doublev, $3->value.doublev);
}
| '-' exp %prec NEG {
/**
* The %prec simply instructs Bison that the rule ‘| '-' exp’
* has the same precedence as NEG—in this case the next-to-highest
*/
// $$ = -($2->value.doublev);
$$ = (symrec *) malloc (sizeof (symrec));
$$->name = (char*) "__annon";
$$->type = $2->type;
$$->value.doublev = -$2->value.doublev;
printf("\nexp ^ exp %lf", $2->value.doublev);
}
| exp '^' exp {
//$$ = pow($1->value.doublev, $3->value.doublev);
$$ = (symrec *) malloc (sizeof (symrec));
$$->name = (char*) "__annon";
$$->type = $1->type;
$$->value.doublev = pow($1->value.doublev, $3->value.doublev);
printf("\nexp ^ exp %lf %lf", $1->value.doublev, $3->value.doublev);
}
| '(' exp ')' {
// $$ = $2->value.doublev;
$$ = (symrec *) malloc (sizeof (symrec));
$$->name = (char*) "__annon";
$$->type = $2->type;
$$->value.doublev = $2->value.doublev;
printf("\n(exp) %lf", $2->value.doublev);
}
| error { printf("\nERROR on exp rule"); }
;
%%
答案 0 :(得分:1)
此弹力操作不正确:
yylval.id_v = yytext;
yytext
指向内部工作缓冲区。每次调用扫描仪时,其内容都会发生变化。因此,如果要保留构成令牌的字符串,则必须将字符串复制到您自己的存储中,例如使用strdup
。 (完成后,不要忘记释放已分配的存储空间。)