首先,我将语法和词汇文件附在此处以供参考:
grammar.y
%{
#include <stdio.h>
extern int yylineno;
int yylex ();
int yyerror ();
//extern char* yytext;
%}
%union{
int integer;
float flt;
char *str;
}
%token <str> IDENTIFIER
%token <flt> CONSTANTF
%token <integer> CONSTANTI
%token LEFT_BRACKET RIGHT_BRACKET
%token EQ INC_OP DEC_OP LE_OP GE_OP EQ_OP NE_OP
%token SUB_ASSIGN MUL_ASSIGN ADD_ASSIGN
%token TYPE_NAME
%token INT FLOAT VOID
%token IF ELSE WHILE RETURN FOR
%start program
%%
primary_expression
: IDENTIFIER {printf("use identifier: %s, length is %d\n", $1, strlen($1));}
| CONSTANTI {printf("use constant Int: %d\n", $1);}
| CONSTANTF {printf("use constant Float: %f\n", $1);}
| LEFT_BRACKET expression RIGHT_BRACKET
| IDENTIFIER LEFT_BRACKET RIGHT_BRACKET {printf("non-param methodCall identifier is: %s\n", $1);}
| IDENTIFIER LEFT_BRACKET argument_expression_list RIGHT_BRACKET {printf("param methodCall identifier is: %s\n", $1);}
| IDENTIFIER INC_OP
| IDENTIFIER DEC_OP
;
postfix_expression
: primary_expression
| postfix_expression '[' expression ']'
;
argument_expression_list
: expression
| argument_expression_list ',' expression
;
unary_expression
: postfix_expression
| INC_OP unary_expression
| DEC_OP unary_expression
| unary_operator unary_expression
;
unary_operator
: '-'
;
multiplicative_expression
: unary_expression
| multiplicative_expression '*' unary_expression
| multiplicative_expression '/' unary_expression
;
additive_expression
: multiplicative_expression
| additive_expression '+' multiplicative_expression
| additive_expression '-' multiplicative_expression
;
comparison_expression
: additive_expression
| additive_expression '<' additive_expression
| additive_expression '>' additive_expression
| additive_expression LE_OP additive_expression
| additive_expression GE_OP additive_expression
| additive_expression EQ_OP additive_expression
| additive_expression NE_OP additive_expression
;
expression
: unary_expression assignment_operator comparison_expression
| comparison_expression
;
assignment_operator
: EQ
| MUL_ASSIGN
| ADD_ASSIGN
| SUB_ASSIGN
;
declaration
: type_name declarator_list ';'
;
declarator_list
: declarator
| declarator_list ',' declarator
;
type_name
: VOID
| INT
| FLOAT
;
declarator
: IDENTIFIER {printf("declare an identifer: %s\n",$1);}
| LEFT_BRACKET declarator RIGHT_BRACKET
| declarator '[' CONSTANTI ']'
| declarator '[' ']'
| declarator LEFT_BRACKET parameter_list RIGHT_BRACKET
| declarator LEFT_BRACKET RIGHT_BRACKET
;
parameter_list
: parameter_declaration
| parameter_list ',' parameter_declaration
;
parameter_declaration
: type_name declarator
;
statement
: compound_statement
| expression_statement
| selection_statement
| iteration_statement
| jump_statement
;
compound_statement
: '{' '}'
| '{' statement_list '}'
| '{' declaration_list statement_list '}'
;
declaration_list
: declaration
| declaration_list declaration
;
statement_list
: statement
| statement_list statement
;
expression_statement
: ';'
| expression ';'
;
selection_statement
: IF '(' expression ')' statement
| IF '(' expression ')' statement ELSE statement
| FOR '(' expression_statement expression_statement expression ')' statement
;
iteration_statement
: WHILE '(' expression ')' statement
;
jump_statement
: RETURN ';'
| RETURN expression ';'
;
program
: external_declaration
| program external_declaration
;
external_declaration
: function_definition
| declaration
;
function_definition
: type_name declarator compound_statement
;
%%
#include <stdio.h>
#include <string.h>
extern char yytext[];
extern int column;
extern int yylineno;
extern FILE *yyin;
char *file_name = NULL;
int yyerror (char *s) {
fflush (stdout);
fprintf (stderr, "%s:%d:%d: %s\n", file_name, yylineno, column, s);
return 0;
}
int main (int argc, char *argv[]) {
FILE *input = NULL;
if (argc==2) {
input = fopen (argv[1], "r");
file_name = strdup (argv[1]);
if (input) {
yyin = input;
}
else {
fprintf (stderr, "%s: Could not open %s\n", *argv, argv[1]);
return 1;
}
}
else {
fprintf (stderr, "%s: error: no input file\n", *argv);
return 1;
}
yyparse ();
free (file_name);
return 0;
}
scanner.l
%{
#include <stdio.h>
#include <string.h>
#include "grammar.tab.h"
void count();
int comment ();
int check_type ();
%}
D[0-9]
L[a-zA-Z_]
%option yylineno
%%
"/*" { comment(); }
"//".* { count(); }
"float" { count(); return(FLOAT); }
"if" { count(); return(IF); }
"else" { count(); return(ELSE); }
"int" { count(); return(INT); }
"return" { count(); return(RETURN); }
"void" { count(); return(VOID); }
"while" { count(); return(WHILE); }
"for" { count(); return(FOR); }
[$]?{L}({L}|{D})* { count(); yylval.str=yytext; return(IDENTIFIER); printf("recognize identifier");}
{D}+ { count(); yylval.integer=atoi(yytext); return(CONSTANTI); }
{D}+"."{D}* { count(); yylval.flt=atof(yytext); return(CONSTANTF); }
"(" {count();return(LEFT_BRACKET);}
")" {count();return(RIGHT_BRACKET);}
"=" {count();return(EQ);}
"+=" { count(); return(ADD_ASSIGN); }
"-=" { count(); return(SUB_ASSIGN); }
"*=" { count(); return(MUL_ASSIGN); }
"++" { count(); return(INC_OP); }
"--" { count(); return(DEC_OP); }
"<=" { count(); return(LE_OP); }
">=" { count(); return(GE_OP); }
"==" { count(); return(EQ_OP); }
"!=" { count(); return(NE_OP); }
";" { count(); return(';'); }
"{" { count(); return('{'); }
"}" { count(); return('}'); }
"," { count(); return(','); }
"/" { count(); return('/'); }
"[" { count(); return('['); }
"]" { count(); return(']'); }
"." { count(); return('.'); }
"!" { count(); return('!'); }
"-" { count(); return('-'); }
"+" { count(); return('+'); }
"*" { count(); return('*'); }
"<" { count(); return('<'); }
">" { count(); return('>'); }
[ \t\v\n\f] { count(); }
. { /* ignore bad characters */ }
%%
int yywrap() {
return 1;
}
int comment() {
char c, c1;
loop:
while ((c = input()) != '*' && c != 0);
if ((c1 = input()) != '/' && c != 0) {
unput(c1);
goto loop;
}
return 0;
}
int column = 0;
void count() {
int i;
for (i = 0; yytext[i] != '\0'; i++) {
if (yytext[i] == '\n')
column = 0;
else if (yytext[i] == '\t')
column += 8 - (column % 8);
else
column++;
}
}
Makefile:
LEX=lex
YACC=yacc
CFLAGS=-Wall
CC=gcc
all:parse
parse:grammar.c scanner.c
$(CC) $(CFLAGS) -o $@ $^
grammar.c:grammar.y
$(YACC) -d -o $@ --defines=grammar.tab.h $^
%.c:%.l
$(LEX) -o $@ $^
clean:
rm -f grammar.c scanner.c
=============================================== ===============================
问题在于: 当我解析输入文件,如:
int a;
int fhu;
float fs;
int drive(float te){
int b;
b=1;
fhu = drive(fs);
fs = 0.4;
return 0;
}
我得到了以下输出:
declare an identifer: a
declare an identifer: fhu
declare an identifer: fs
declare an identifer: drive
declare an identifer: te
declare an identifer: b
use identifier: b=, length is 2
use constant Int: 1
use identifier: fhu =, length is 5
use identifier: fs), length is 3
param methodCall identifier is: drive(fs)
use identifier: fs =, length is 4
use constant Float: 0.400000
use constant Int: 0
我很困惑为什么“b =”,“fhu =”,“fs)”被意外的字符'=','='和')'识别出来。您可以在声明语句中看到标识符已正确识别,但不能在主语句中识别。
为什么他们使用相同的词汇规则但产生不同的结果?
有没有人可以帮我解决这个问题?我是yacc的新手,任何建议都可能有所帮助和欢迎!拜托,谢谢!
答案 0 :(得分:2)
正如我在第一篇评论中提到的,问题与不存储词法扫描程序返回的字符串副本有关。我能够像描述的那样重现问题(使用Mac OS X 10.9.1 Mavericks而不是Ubuntu,所以它不是特定于平台的。)
'琐碎'修复是:
[$]?{L}({L}|{D})* { count(); yylval.str=strdup(yytext); printf("recognize identifier (%s)\n", yylval.str); return(IDENTIFIER);}
那里有三处变化:
printf()
,以便执行。printf()
打印标识符字符串。strdup()
复制字符串!“琐碎”这个词用引号(两次),因为分配内存的直接问题是“它在哪里被释放”而当前答案是“无处可去 - 直到程序退出”,这不太可能是合适的长期解决方案。因此,您需要查看如何使用IDENTIFIER令牌类型返回的标识符,以确保释放内存。但这会让你回到正轨。
在示例文件上运行parse
的输出:
recognize identifier (a)
declare an identifer: a
recognize identifier (fhu)
declare an identifer: fhu
recognize identifier (fs)
declare an identifer: fs
recognize identifier (drive)
declare an identifer: drive
recognize identifier (te)
declare an identifer: te
recognize identifier (b)
declare an identifer: b
recognize identifier (b)
use identifier: b, length is 1
use constant Int: 1
recognize identifier (fhu)
use identifier: fhu, length is 3
recognize identifier (drive)
recognize identifier (fs)
use identifier: fs, length is 2
param methodCall identifier is: drive
recognize identifier (fs)
use identifier: fs, length is 2
use constant Float: 0.400000
use constant Int: 0
我想知道的最后一件事是,当代码直接将
yytext
分配给yylval.str
时,为什么语法分析器会得到“b=
”而不是“b
“?在进行语法分析时,yytext
如何改变?
尝试在代码中添加以下内容 - 特别是push_identifier()
中的dump_identifiers()
和grammary.y
- 并同时使用'with strdup()
'和'without {{ 1}}'strdup()
的版本。
scanner.l
使用'with %{
#include <stdio.h>
#include <string.h>
extern int yylineno;
int yylex(void);
int yyerror(char *str);
static void push_identifier(char *str);
//extern char* yytext;
%}
%expect 1
%union{
int integer;
float flt;
char *str;
}
%token <str> IDENTIFIER
%token <flt> CONSTANTF
%token <integer> CONSTANTI
%token LEFT_BRACKET RIGHT_BRACKET
%token EQ INC_OP DEC_OP LE_OP GE_OP EQ_OP NE_OP
%token SUB_ASSIGN MUL_ASSIGN ADD_ASSIGN
%token TYPE_NAME
%token INT FLOAT VOID
%token IF ELSE WHILE RETURN FOR
%start program
%%
primary_expression
: IDENTIFIER {printf("use identifier: %s, length is %zu\n", $1, strlen($1)); push_identifier($1);}
| CONSTANTI {printf("use constant Int: %d\n", $1);}
| CONSTANTF {printf("use constant Float: %f\n", $1);}
| LEFT_BRACKET expression RIGHT_BRACKET
| IDENTIFIER LEFT_BRACKET RIGHT_BRACKET {printf("non-param methodCall identifier is: %s\n", $1); push_identifier($1);}
| IDENTIFIER LEFT_BRACKET argument_expression_list RIGHT_BRACKET {printf("param methodCall identifier is: %s\n", $1); push_identifier($1);}
| IDENTIFIER INC_OP { push_identifier($1); }
| IDENTIFIER DEC_OP { push_identifier($1); }
;
postfix_expression
: primary_expression
| postfix_expression '[' expression ']'
;
argument_expression_list
: expression
| argument_expression_list ',' expression
;
unary_expression
: postfix_expression
| INC_OP unary_expression
| DEC_OP unary_expression
| unary_operator unary_expression
;
unary_operator
: '-'
;
multiplicative_expression
: unary_expression
| multiplicative_expression '*' unary_expression
| multiplicative_expression '/' unary_expression
;
additive_expression
: multiplicative_expression
| additive_expression '+' multiplicative_expression
| additive_expression '-' multiplicative_expression
;
comparison_expression
: additive_expression
| additive_expression '<' additive_expression
| additive_expression '>' additive_expression
| additive_expression LE_OP additive_expression
| additive_expression GE_OP additive_expression
| additive_expression EQ_OP additive_expression
| additive_expression NE_OP additive_expression
;
expression
: unary_expression assignment_operator comparison_expression
| comparison_expression
;
assignment_operator
: EQ
| MUL_ASSIGN
| ADD_ASSIGN
| SUB_ASSIGN
;
declaration
: type_name declarator_list ';'
;
declarator_list
: declarator
| declarator_list ',' declarator
;
type_name
: VOID
| INT
| FLOAT
;
declarator
: IDENTIFIER {printf("declare an identifer: %s\n",$1); push_identifier($1); }
| LEFT_BRACKET declarator RIGHT_BRACKET
| declarator '[' CONSTANTI ']'
| declarator '[' ']'
| declarator LEFT_BRACKET parameter_list RIGHT_BRACKET
| declarator LEFT_BRACKET RIGHT_BRACKET
;
parameter_list
: parameter_declaration
| parameter_list ',' parameter_declaration
;
parameter_declaration
: type_name declarator
;
statement
: compound_statement
| expression_statement
| selection_statement
| iteration_statement
| jump_statement
;
compound_statement
: '{' '}'
| '{' statement_list '}'
| '{' declaration_list statement_list '}'
;
declaration_list
: declaration
| declaration_list declaration
;
statement_list
: statement
| statement_list statement
;
expression_statement
: ';'
| expression ';'
;
selection_statement
: IF '(' expression ')' statement
| IF '(' expression ')' statement ELSE statement
| FOR '(' expression_statement expression_statement expression ')' statement
;
iteration_statement
: WHILE '(' expression ')' statement
;
jump_statement
: RETURN ';'
| RETURN expression ';'
;
program
: external_declaration
| program external_declaration
;
external_declaration
: function_definition
| declaration
;
function_definition
: type_name declarator compound_statement
;
%%
#include <stdio.h>
#include <string.h>
extern char yytext[];
extern int column;
extern int yylineno;
extern FILE *yyin;
char *file_name = NULL;
int yyerror(char *s)
{
fflush(stdout);
fprintf(stderr, "%s:%d:%d: %s\n", file_name, yylineno, column, s);
return 0;
}
static char *list[20];
static int sp = 0;
static void push_identifier(char *str)
{
list[sp++] = str; // Appalling lack of error checking - not fit for production
}
static void dump_identifiers(void)
{
printf("Identifiers (%d):\n", sp);
for (int i = 0; i < sp; i++)
printf("[%2d] = <<%s>>\n", i, list[i]);
}
int main(int argc, char *argv[])
{
FILE *input = NULL;
if (argc == 2)
{
input = fopen(argv[1], "r");
file_name = strdup(argv[1]);
if (input)
{
yyin = input;
}
else
{
fprintf(stderr, "%s: Could not open %s\n", *argv, argv[1]);
return 1;
}
}
else
{
fprintf(stderr, "%s: error: no input file\n", *argv);
return 1;
}
yyparse();
dump_identifiers();
free(file_name);
return 0;
}
'版本,输出结束:
strdup()
使用'without use constant Int: 0
Identifiers (11):
[ 0] = <<a>>
[ 1] = <<fhu>>
[ 2] = <<fs>>
[ 3] = <<drive>>
[ 4] = <<te>>
[ 5] = <<b>>
[ 6] = <<b>>
[ 7] = <<fhu>>
[ 8] = <<fs>>
[ 9] = <<drive>>
[10] = <<fs>>
'版本,输出结束:
strdup()
换句话说,扫描程序中的代码正在重用use constant Int: 0
Identifiers (11):
[ 0] = <<a;
int fhu;
float fs;
int drive(float te){
int b;
b=1;
fhu = drive(fs);
fs = 0.4;
return 0;
}
>>
[ 1] = <<fhu;
float fs;
int drive(float te){
int b;
b=1;
fhu = drive(fs);
fs = 0.4;
return 0;
}
>>
[ 2] = <<fs;
int drive(float te){
int b;
b=1;
fhu = drive(fs);
fs = 0.4;
return 0;
}
>>
[ 3] = <<drive(float te){
int b;
b=1;
fhu = drive(fs);
fs = 0.4;
return 0;
}
>>
[ 4] = <<te){
int b;
b=1;
fhu = drive(fs);
fs = 0.4;
return 0;
}
>>
[ 5] = <<b;
b=1;
fhu = drive(fs);
fs = 0.4;
return 0;
}
>>
[ 6] = <<b=1;
fhu = drive(fs);
fs = 0.4;
return 0;
}
>>
[ 7] = <<fhu = drive(fs);
fs = 0.4;
return 0;
}
>>
[ 8] = <<fs);
fs = 0.4;
return 0;
}
>>
[ 9] = <<drive(fs);
fs = 0.4;
return 0;
}
>>
[10] = <<fs = 0.4;
return 0;
}
>>
为其自身目的指向的缓冲区。这就是最后发生的事情;我不确定在解析代码时发生了什么 - 每次调用yytext
时都需要调用dump_identifiers()
。 push_identifier()
和push_identifier()
中的打印地址也可能很有启发性。