因此,基本上,我只是想忽略lex中的注释,而根本不将注释传递给yacc。由于某些原因,当我有多行注释时,我的解析器只会在不打印任何内容时将其打印出来。
老实说,我不确定发生了什么。这里什么都不会打印出来。为什么打印注释?我的lex文件中的语法错误吗?
这是我的lex文件:
%{
/*constants are defined outside of the l file in y.tab.h
*constants are defined from 257
*/
#include "y.tab.h"
int input_line_no = 1;
char the_tokens[1000];
char full_line[1000];
int lex_state = 0;
%}
whitespace [ \t]
number [0-9]
letter [A-Za-z]
alfanum [A-Za-z0-9_]
intcon {number}+
id {letter}{alfanum}*
anything .
%option noyywrap
/*
*Start conditions are specified to identify comments,
*literal strings, and literal chars.
*/
%Start string_in char_in
%x COMMENT
%%
/*identify comment*/
^[\t]*"{".*"}"[\t]*\n ;
^[\t]*"/*" {lex_state = 1; BEGIN COMMENT;}
^[\t]*"/*".*"*/"[\t]*\n ;
<COMMENT>"*/"[\t]*\n {lex_state=0; BEGIN 0;}
<COMMENT>"*/" {lex_state=0; BEGIN 0;}
<COMMENT>\n ;
<COMMENT>.\n ;
/*tokenization of special strings*/
"extern" {return EXTERN;}
"if" {return IF;}
"else" {return ELSE;}
"void" {return VOID;}
"char" {return CHAR;}
"int" {return INT;}
/*line number is recorded*/
[\n] input_line_no++;
/*start tokenization of strings*/
<INITIAL>\" {
lex_state = 2;
BEGIN(string_in);
}
<string_in>[^"] {
return(STRINGCON);
}
<string_in>\" {
lex_state = 0;
BEGIN(INITIAL);
}
/*tokenization of characters*/
<INITIAL>\' {
lex_state = 3;
BEGIN(char_in);
}
<char_in>[^']
{
return(CHARCON);
}
<char_in>\\n {
return(CHARCON);
}
<char_in>\\0 {
return(CHARCON);
}
<char_in>\' {
lex_state = 0;
BEGIN(INITIAL);
}
{whitespace} ;
/*tokenization of numbers*/
{intcon} {return(INTCON);}
{id} {return ID;}
/*tokenization of operations*/
"==" {return EQUALS;}
"!=" {return NOTEQU;}
">=" {return GREEQU;}
"<=" {return LESEQU;}
">" {return GREATE;}
"<" {return LESSTH;}
"&&" {return ANDCOM;}
"||" {return ORCOMP;}
"!" {return ABANG;}
";" {return SEMIC;}
"," {return COMMA;}
"(" {return LPAR;}
")" {return RPAR;}
"[" {return LBRAC;}
"]" {return RBRAC;}
"{" {return LCURL;}
"}" {return RCURL;}
"+" {return ADD;}
"-" {return SUB;}
"*" {return MUL;}
"/" {return DIV;}
"=" {return EQUAL;}
/*For strings that can not be identified by any patterns specified previously
*lex returns the value of the character
*/
{anything} {return(OTHER);}
%%
这是我的yacc文件:
%{
#include <stdio.h>
#define YDEBUG
#ifndef YDEBUG
#define Y_DEBUG_PRINT(x)
#else
#define Y_DEBUG_PRINT(x) printf("Yout %s \n ",x)
#endif
int yydebug = 0;
extern char the_token[];
/* This is how I read tokens from lex... :) */
extern int input_line_no;
/* This is the current line number */
extern char *full_line;
/* This is the full line */
extern int lex_state;
%}
%token STRINGCON CHARCON INTCON EQUALS NOTEQU GREEQU LESEQU GREATE LESSTH
%token ANDCOM ORCOMP SEMIC COMMA LPAR RPAR LBRAC RBRAC LCURL RCURL ABANG
%token EQUAL ADD SUB MUL DIV ID EXTERN FOR WHILE RETURN IF ELSE
%token VOID CHAR INT OTHER
%left ORCOMP
%left ANDCOM
%left EQUALS NOTEQU
%left LESSTH GREATE LESEQU GREEQU
%left ADD SUB
%left MUL DIV
%right UMINUS
%right ABANG
%start prog
%%
prog:
| dcl SEMIC prog2
| Function prog2
prog2:
| dcl SEMIC prog2
| Function prog2
dcl: VAR_list
| ID LPAR Param_types RPAR dcl2
| EXTERN ID LPAR Param_types RPAR dcl2
| EXTERN Type ID LPAR Param_types RPAR dcl2
| EXTERN VOID ID LPAR Param_types RPAR dcl2
| Type ID LPAR Param_types RPAR dcl2
| VOID ID LPAR Param_types RPAR dcl2
dcl2:
| COMMA ID LPAR Param_types RPAR dcl2
Function: Functionhead LCURL Functionbody RCURL
| VOID Functionhead LCURL Functionbody RCURL
| Type Functionhead LCURL Functionbody RCURL
Functionhead: ID LPAR Param_types RPAR
Functionbody:
|VAR_list STMT_list
Param_types: VOID
|Param_types1
Param_types1: Param_type1
| Param_types1 COMMA Param_type1
Param_type1: Type ID Param_type11
Param_type11:
| LBRAC RBRAC
VAR_list: Type VAR_list2
VAR_list2: var_decl
| var_decl COMMA VAR_list2
var_decl: ID
| ID LBRAC INTCON RBRAC
Type: CHAR
|INT
STMT_list: STMT2
STMT2: STMT
| STMT STMT2
STMT : IF LPAR Expr RPAR STMT
| IF LPAR Expr RPAR STMT ELSE STMT
/*if cats) ERROR*/
| IF Expr RPAR STMT ELSE STMT {warn("STMT-IF: missing LPAR");}
/*if (cats ERROR*/
| IF LPAR Expr STMT ELSE STMT {warn("STMT-IF: missing RPAR");}
/*two elses ERROR*/
| IF LPAR Expr STMT ELSE ELSE STMT {warn(":too many elses");}
| WHILE LPAR Expr RPAR STMT
/*for(c=0;c<1;c++)*/
| FOR LPAR Assign SEMIC Expr SEMIC Assign RPAR STMT
/*for(;c<1;c++)*/
| FOR LPAR SEMIC Expr SEMIC Assign RPAR STMT
/*for(;;c++)*/
| FOR LPAR SEMIC SEMIC Assign RPAR STMT
/*for(;;)*/
| FOR LPAR SEMIC SEMIC RPAR STMT
/*for(c=0;;)*/
| FOR LPAR Assign SEMIC SEMIC RPAR STMT
/*for(c=0;c<1;)*/
| FOR LPAR Assign SEMIC Expr SEMIC RPAR STMT
/*for(c=0;;c++)*/
| FOR LPAR Assign SEMIC SEMIC Assign RPAR STMT
/*for(;c<1;)*/
| FOR LPAR SEMIC Expr SEMIC RPAR STMT
/*for() ERROR*/
| FOR LPAR RPAR STMT {warn("STMT-FOR: empty statement");}
/*for{;;;) ERROR*/
| FOR LPAR SEMIC SEMIC SEMIC RPAR {warn("STMT-FOR: too many semicolons");}
/*for;;) ERROR*/
| FOR SEMIC SEMIC RPAR STMT {warn("STMT-FOR: missing LPAR");}
/*for(;; ERROR*/
| FOR LPAR SEMIC SEMIC STMT {warn("STMT-FOR: missing RPAR");}
| RETURN Expr SEMIC
| RETURN SEMIC
/*return ERROR*/
| RETURN {warn("STMT-Return:missing semicolon");}
| Assign SEMIC
/*function call*/
| ID LPAR RPAR SEMIC
| ID LPAR Expr Expr2 RPAR SEMIC
/*No semic ERROR*/
| ID LPAR Expr Expr2 RPAR {warn(":missing semicolon");}
| LCURL STMT2 RCURL
| LCURL RCURL
| SEMIC
Assign : ID Assign1 EQUAL Expr
/*Error no semi*/
| Assign {warn( "Assign: missing semicolon on line");}
Assign1 :
| LBRAC Expr RBRAC
| LBRAC Expr error { warn("Assign1: missing RBRAC"); }
| error Expr RBRAC { warn("Assign1: missing LBRAC"); }
| LBRAC error RBRAC { warn("Assign1: Invalid array index"); }
Expr : SUB Expr %prec UMINUS
| ABANG Expr
| Expr Binop Expr
| Expr Relop Expr
| Expr Logop Expr
| ID
| ID LPAR RPAR
| ID LPAR Expr Expr2 RPAR
| ID LBRAC Expr RBRAC
| LPAR Expr RPAR
| INTCON
| CHARCON
| STRINGCON
| Array
| error {warn("Expr: invalid expression "); }
/*top is for no expression 2*/
Expr2:
| COMMA Expr
/*recursively looks for another expression in function call (exp1,exp2,exp3,...*/
| COMMA Expr Expr2
Array :
ID LBRAC Expr RBRAC
| ID error RBRAC {warn( "Array: invalid array expression"); }
Binop : ADD
| SUB
| MUL
| DIV
Logop : ANDCOM
| ORCOMP
Relop : EQUALS
| NOTEQU
| LESEQU
| GREEQU
| GREATE
| LESSTH
%%
main()
{
int result = yyparse();
if (lex_state==1) {
yyerror("End of file within a comment");
}
if (lex_state==2) {
yyerror("End of file within a string");
}
return result;
}
int yywrap(){
return 1;
}
yyerror(const char *s)
{
fprintf(stderr, "%s on line %d\n",s,input_line_no);
}
warn(char *s)
{
fprintf(stderr, "%s\n", s);
}
这是我要运行的测试:
/* function definitions interspersed with global declarations and
function prototypes */
void a( void ), b(int x), c(int x, int y, int z);
int a1( void ), b1(int x), c1(int x, char y, char z, int w);
int x, y[10], z;
int x0, y0, z0[20];
void foo0( void ) {}
void foo1( int x ) {}
char u0, u1[10];
char a2( void ), b2(char x), c2(char x, char y, char z, int w);
extern int a3( void ), b3(int x), c3(int x, char y, char z, int w);
extern char a4( void ), b4(char x), c4(char x, char y, char z, int w);
void foo2( int x, int y, int z ) {}
int foo3( int x[], char y, int z[], char w[] ) {}
int x1, x2[100], x3, x4, x5[1000];
int b5(int x[]), c5(int x, char y[], char z, int w[], int u[], int v);
char b6(char x[]), c6(char x, char y[], char z[], int w);
char foo4( int x[], char y, int z[], char w[] ) {}
extern int a7( void ), b7(int x[]), c7(int x[], char y, char z[], int w[]);
extern char a8( void ), b8(char x[]), c8(char x, char y[], char z, int w[]);
我尝试重写语法以评论,但是我似乎什么也没得到,只有我需要做些什么。任何帮助将不胜感激,谢谢!
答案 0 :(得分:0)
(F)lex自动添加默认的后备规则
<*>.|\n ECHO;
在规则集的末尾,因此规则无法识别的任何字符都将打印在标准输出上。这就是您所看到的。
这种行为很少是您在解析器中想要的,并且我几乎总是以以下方式启动我的flex文件
%option nodefault
[注1]
这将取消默认的回退规则,如果某个输入将使用该规则,则会产生警告。不幸的是,警告消息不是很清楚关于哪些输入可能无法匹配的信息,但是如果您忽略警告并使用生成的扫描程序,则会在运行时对不匹配的输入产生致命错误。
在这种情况下,很明显在COMMENT
起始条件下注释的内容不匹配。也许您打算对第四条规则使用.|\n
?尽管那样会使第三条规则变得多余。
实际上,我通常使用:
%option nodefault noinput nounput noyywrap 8bit yylineno
noinput
和nounput
抑制未使用函数的编译器警告(因为我通常不使用这些函数); noyywrap
避免了对yywrap
的需求,因此flex会在看到输入结束时立即发送输入令牌的结尾,而yylineno
告诉flex跟踪行号,这对于错误消息。
8bit
就是默认值,但是如果您要求使用“快速”扫描仪,则如果输入包含大于127的字符代码,则默认值将更改为产生未定义的行为我发现很难对快速表选项进行时序测试,因此尽管我通常不使用该选项(它不会加快速度,并且会使表更大),但似乎谨慎考虑其他人可能想要的可能性。
答案 1 :(得分:0)
由于多种原因,您的块注释模式基本上是错误的...
通常,对于块注释,词法模式是这样的:
"/*" { BEGIN COMMENT; }
<COMMENT>[^*/]+ { /* ignore anything that is not '*' or '/' */ }
<COMMENT>("*"+)"/" { BEGIN INITIAL; }
<COMMENT>[*/] { /* residual stuff */ }