Bison:解析结束时出现语法错误

时间:2015-06-14 16:43:28

标签: syntax-error bison

您好,这是我的迷你编程语言的bison语法文件:

    %{
 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>
 #include "projectbison.tab.h"

 void yyerror(char const *);


 extern FILE *yyin;
 extern FILE *yyout;

 extern int yylval;
 extern int yyparse(void);
 extern int n;
 int errNum = 0;
 int forNum = 0;
%}

%left PLUS MINUS
%left MULT DIV MOD
%nonassoc EQUAL NEQUAL LESS GREATER LEQUAL GEQUAL


%token INTEGER BOOLEAN STRING VOID 
%token ID
%token AND 
%token BEGINP 
%token ENDP
%token EXTERN 
%token COMMA
%token EQ
%token RETURN1
%token IF1 ELSE1 WHILE1 FOR1 DO1 
%token LOR LAND LNOT
%token TRUE FALSE
%token EQUAL NEQUAL LESS GREATER LEQUAL GEQUAL
%token LB1 RB1             
%token LCB1 RCB1
%token SEMIC
%token NEWLINE
%token PLUS MINUS 
%token MULT DIV MOD
%token DIGIT STRING1
%start program

%%


/*50*/
program : external-decl program-header defin-field command-field 
        ;
external-decl : external-decl external-prototype 
              | 
              ;
external-prototype : EXTERN prototype-func NEWLINE             
                   ;
program-header : VOID ID LB1 RB1 NEWLINE    

              ;
defin-field : defin-field definition
             | 
             ;
definition : variable-defin
           | func-defin
           | prototype-func
           ;
variable-defin : data-type var-list SEMIC newline 

             ;
data-type : INTEGER               
          | BOOLEAN
          | STRING
          ;
var-list : ID extra-ids 
         ;
extra-ids : COMMA var-list
      | 
      ;
func-defin : func-header defin-field  command-field
            ;
prototype-func : func-header SEMIC   

              ;
func-header : data-type ID LB1 lists RB1 newline 
            ;
lists: list-typ-param
    | 
    ;
list-typ-param : typical-param typical-params 
             ;
typical-params : COMMA list-typ-param  
        | 
        ;
typical-param : data-type AND ID
        ;
command-field : BEGINP  commands newline ENDP newline
    ;
commands : commands newline command 
    | 
    ;
command : simple-command SEMIC 
        | struct-command
        | complex-command

       ;
complex-command : LCB1 newline command newline RCB1  
                ;
struct-command  : if-command
                | while-command
                | for-command
                ;
simple-command : assign
              | func-call
              | return-command
              | null-command
              ;
if-command : IF1 LB1 gen-expr RB1 newline command else-clause
          ;
else-clause: ELSE1 newline command   
            ;
while-command : WHILE1 LB1 gen-expr RB1 DO1 newline RCB1 command LCB1 

             ;
for-command : FOR1 LB1 conditions RB1 newline RCB1 command LCB1  

           ;
conditions : condition SEMIC condition SEMIC condition SEMIC
       ;
condition : gen-expr
      | 
      ;
assign : ID EQ gen-expr   
       ;
func-call  : ID LB1 real-params-list RB1
          | ID LB1 RB1
          ;
real-params-list : real-param real-params
                 ;
real-params : COMMA real-param real-params 
        | 
        ;
real-param : gen-expr
         ;
return-command : RETURN1 gen-expr
              ;
null-command : 
            ;
gen-expr : gen-terms gen-term
         ;
gen-terms : gen-expr LOR    
      | 
          ;

gen-term : gen-factors gen-factor
         ;
gen-factors : gen-term LAND  
        | 
        ;
gen-factor : LNOT first-gen-factor  
              |  first-gen-factor   
              ;
first-gen-factor : simple-expr comparison
                 | simple-expr
                 ;
comparison : compare-operator simple-expr
            ;
compare-operator : EQUAL         
                 | NEQUAL
                 | LESS
                 | GREATER
                 | LEQUAL
                 | GEQUAL
                 ;
simple-expr : expresion simple-term
    ;
expresion : simple-expr PLUS    
    |simple-expr MINUS
    | 
    ;
simple-term : mul-expr simple-parag
            ;
mul-expr: simple-term MULT
    | simple-term DIV
    | simple-term MOD
    | 
    ;
simple-parag : simple-prot-oros
             | MINUS simple-prot-oros
             ;
simple-prot-oros : ID
                 | constant
                 | func-call
                 | LB1 gen-expr RB1
                 ;
constant : DIGIT
         | STRING1
         | TRUE
         | FALSE
         ;
newline:NEWLINE
    | 
    ;



%%

void yyerror(char const *msg) 
{
errNum++;
fprintf(stderr, "%s\n", msg);

}
int main(int argc, char **argv) 
{
++argv;
--argc;
if ( argc > 0 )
 {yyin= fopen( argv[0], "r" ); }
else
 {yyin = stdin; 
 yyout = fopen ( "output", "w" );}

int a = yyparse();
if(a==0)
{printf("Done parsing\n");}
else
{printf("Yparxei lathos sti grammi: %d\n", n);}

printf("Estimated number of errors: %d\n", errNum);

return 0;
}

这样的简单输入:

void main()
integer k;
boolean l;
begin
aek=32;
end

我得到以下内容:

$ ./MyParser.exe file2.txt
void , id ,left bracket , right bracket
integer , id ,semicolon
boolean , id ,semicolon
BEGIN PROGRAM
id ,equals , digit ,semicolon
END PROGRAM
syntax error
Yparxei lathos sti grammi: 8
Estimated number of errors: 1

无论我对输入文件做出什么改变,我都会在最后得到一个语法错误....为什么我得到这个以及我该怎么办?非常感谢提前!这里是flex文件以防万一有人需要它:

%{
#include "projectbison.tab.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int n=1;
%}
%option noyywrap 

digit   [0-9]+
id      [a-zA-Z][a-zA-Z0-9]*



%%

"(" {printf("left bracket , "); return LB1;}
")" {printf("right bracket\n"); return RB1;}
"{" {printf("left curly bracket , "); return LCB1;}
"}" {printf("right curly bracket\n"); return RCB1;}
"==" {printf("isotita ,"); return EQUAL;}
"!=" {printf("diafora ,"); return NEQUAL;}
"<" {printf("less_than ,"); return LESS;}
">" {printf("greater_than ,"); return GREATER;}
"<=" {printf("less_eq ,"); return LEQUAL;}
">=" {printf("greater_eq ,"); return GEQUAL;}
"||" {printf("lor\n"); return LOR;}
"&&" {printf("land\n"); return LAND;}
"&" {printf("and ,"); return AND;}
"!" {printf("lnot ,"); return LNOT;}
"+" {printf("plus ,"); return PLUS; }
"-" {printf("minus ,"); return MINUS;}
"*" {printf("multiply ,"); return MULT;}
"/" {printf("division ,"); return DIV;}
"%" {printf("mod ,"); return MOD;}
";" {printf("semicolon \n"); return SEMIC;}
"=" {printf("equals , "); return EQ;}
"," {printf("comma ,"); return COMMA;}
"\n" {n++; return NEWLINE;}
void {printf("void ,"); return VOID;}
return {printf("return ,"); return RETURN1;}
extern {printf("extern\n"); return EXTERN;}
integer {printf("integer ,"); return INTEGER;}
boolean {printf("boolean ,"); return BOOLEAN;}
string {printf("string ,"); return STRING;}
begin {printf("BEGIN PROGRAM\n"); return BEGINP;}
end {printf("END PROGRAM\n"); return ENDP;}
for {printf("for\n"); return FOR1;}
true {printf("true ,"); return TRUE;}
false {printf("false ,"); return FALSE;}
if {printf("if\n"); return IF1; }
else {printf("else\n"); return ELSE1; }
while {printf("while\n"); return WHILE1;}
{id} {printf("id ,"); return ID;}
{digit}  {printf("digit ,"); return DIGIT;}
[a-zA-Z0-9]+  {return STRING1;}
` {/*catchcall*/ printf("Mystery character %s\n", yytext); }
<<EOF>> { static int once = 0; return once++ ? 0 : '\n'; }

%%

1 个答案:

答案 0 :(得分:0)

您的扫描仪可以很好地保证在输入结束时发送两个换行符:一个来自输入中的换行符,另一个来自陷阱<<EOF>>。但是,您的语法似乎不会接受意外的换行符,因此第二个换行符将触发语法错误。

最简单的解决方案是删除<<EOF>>规则,因为没有终止换行符的文本文件非常罕见,将它们视为语法错误是完全合法的。更通用的解决方案是允许任意数量的换行符出现在预期换行符的位置,方法如下:

newlines: '\n' | newlines '\n';

(使用单字符令牌的实际字符可使您的语法更多更具可读性,并简化您的扫描仪。但这是一个副作用。)

您可能也会问自己是否确实需要强制执行换行符,因为您的语法似乎使用;作为语句终止符,使换行符多余(除了样式注意事项)。从语法中删除换行符(并忽略它们,就像扫描仪中的其他空格一样)也将简化您的代码。