语言解析器Bison

时间:2016-04-13 16:22:22

标签: c parsing compiler-construction bison flex-lexer

我有一个用于c语言的词法分析器和解析器,但我不明白为什么
给定输入存在语法错误

这是词法分析器

   %option yylineno
%{
#include "y.tab.h"
#include <stdio.h>

%}

DIGIT   [0-9]+
ID  [a-zA-Z][a-zA-Z0-9_]*

%%
"/*"([^\*\n\r]*) {printf("Non-terminated comments, Line Number: %d\n",yylineno);}
[/*]+[/*][a-zA-Z0-9]*[*/]+[*/]  {printf("<Comment, %s, %d>\n", yytext,yylineno);}
"if"                    {printf("<keyword, %s, %d>\n", yytext,yylineno); yylval = *yytext; return IF ;}
"else"                  {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return ELSE;}
"for"                   {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return FOR;}
"begin"                 {printf("<keyword, %s, %d>\n", yytext,yylineno ); }
"end"                   {printf("<keyword, %s, %d>\n", yytext,yylineno ); }
"procedure"             {printf("<keyword, %s, %d>\n", yytext,yylineno ); }
"function"              {printf("<keyword, %s, %d>\n", yytext,yylineno ); }
"void"                  {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return VOID;}
"main"                  {printf("<keyword, %s, %d>\n", yytext,yylineno ); }
"int"                   {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return INT;}
"float"                 {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return FLOAT;}
"printf"                {printf("<keyword, %s, %d>\n", yytext,yylineno ); }
"while"                 {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return WHILE;}
"char"                  {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return CHAR;}
"switch"                {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return SWITCH;}
"case"                  {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return CASE;}
"default"               {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return DEFAULT;}
"break"                 {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return BREAK;}
"do"                    {printf("<keyword, %s, %d>\n", yytext,yylineno );yylval = *yytext; return DO;}
"continue"              {printf("<keyword, %s, %d>\n", yytext,yylineno ); yylval = *yytext;return CONTINUE;}
"return"                {printf("<keyword, %s, %d>\n", yytext,yylineno ); yylval = *yytext;return RETURN;}
"+"                 {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return ADD;}
"-"                 {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return SUBTRACT;}
"*"                 {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return MULTIPLY;}
"/"                 {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return DIVIDE;}
"<"                 {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return LESSTHAN;}
">"                 {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return GREATERTHAN;}
"++"                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return INCREMENT;}
"--"                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return DECREMENT;}
"="                 {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return ASSIGN;}
"=="                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return ISEQUALTO;}
">="                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return GREATERTHANEQUALTO;}
"<="                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return LESSTHANEQUALTO;}
"%"                 {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return MODULUS;}
"!="                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return ISNOTEQUALTO;}
"+="                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return ADDANDASSIGN;}
"-="                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return SUBTRACTANDASSIGN;}
"*="                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return MULTIPLYANDASSIGN;}
"/="                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return DIVIDEANDASSIGN;}
"%="                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return MODULUSANDASSIGN;}
"?:"                    {printf("<operator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return CONDITIONALOPERATOR;}
","                 {printf("<Comma, %s, %d>\n", yytext,yylineno );yylval = *yytext;return COMMA;}
";"                 {printf("<Terminator, %s, %d>\n", yytext,yylineno );yylval = *yytext;return ENDOFSTATEMENT;}
":"                 {printf("<Colon, %s, %d>\n", yytext,yylineno );yylval = *yytext;return COLON;}
"("                 {printf("<OpeningRoundBracket, %s, %d>\n", yytext,yylineno );return OPENINGROUND;}
")"                 {printf("<ClosingRoundBracket, %s, %d>\n", yytext,yylineno );return CLOSINGROUND;}
"{"                 {printf("<OpeningCurlyBracket, %s, %d>\n", yytext,yylineno );yylval = *yytext;return OPENINGBRACE;}
"}"                 {printf("<ClosingCurlyBracket, %s, %d>\n", yytext,yylineno );yylval = *yytext;return CLOSINGBRACE;}
"["                 {printf("<OpeningSquareBracket, %s, %d>\n", yytext,yylineno );yylval = *yytext;return OPENINGSQUARE;}
"]"                     {printf("<ClosingSquareBracket, %s, %d>\n", yytext,yylineno );yylval = *yytext;return CLOSINGSQUARE;}
[*]+{ID}+               {printf("<Pointer, %s, %d>\n", yytext,yylineno);}
{DIGIT}+                    {printf("<INTEGER, %s, %d>\n", yytext,yylineno);yylval = atoi(yytext);return INTEGER;}
{DIGIT}+"."{DIGIT}*             {printf("<FLOAT, %s, %d>\n", yytext,yylineno);yylval = atoi(yytext);return INTEGER ;}
["]+{ID}*[ ]*[:]*[;]*[(]*[)]*{ID}*["]+  {printf("<STRING, %s, %d>\n", yytext,yylineno);yylval = atoi(yytext);return REAL ;}
[']{ID}[']              {printf("<CHAR, %s, %d>\n", yytext,yylineno);yylval = *yytext;return CHARACTER ;}
[']{ID}{ID}+[']             {printf("CHAR constant too long: %s, Line Number: %d\n", yytext,yylineno);}
{ID}+                       {printf("<Identifier, %s, %d>\n", yytext,yylineno ); yylval = *yytext; return IDENTIFIER ;}
" "                 ;{printf("Space");}/* eat up whitespace */
"\t"                    ;/* eat up whitespace */
"\n"                ;/* eat up whitespace */
"#"                 {printf("Undefined Symbol: %s, Line Number: %d>\n", yytext,yylineno );}
"~"                 {printf("Undefined Symbol: %s, Line Number: %d>\n", yytext,yylineno );}
.                       printf( "Undefined Symbol: %s, %d\n",yytext,yylineno );
%%


int yywrap(){
    return 1;
}

解析器.y文件

%{
#include <stdio.h>
#include <stdlib.h>
void yyerror();

FILE *f;
%} 

%token IF 
%token ELSE 
%token FOR 
%token VOID 
%token INT 
%token FLOAT 
%token WHILE 
%token CHAR 
%token SWITCH 
%token CASE 
%token DEFAULT 
%token BREAK
%token DO
%token CONTINUE
%token RETURN
%token ADD
%token SUBTRACT
%token MULTIPLY
%token DIVIDE
%token MODULUS
%token LESSTHAN
%token GREATERTHAN
%token INCREMENT
%token DECREMENT
%token ASSIGN
%token ISEQUALTO
%token GREATERTHANEQUALTO
%token LESSTHANEQUALTO
%token ISNOTEQUALTO
%token ADDANDASSIGN
%token SUBTRACTANDASSIGN
%token MULTIPLYANDASSIGN
%token DIVIDEANDASSIGN
%token MODULUSANDASSIGN
%token CONDITIONALOPERATOR
%token ENDOFSTATEMENT
%token COMMA
%token COLON
%token OPENINGROUND
%token CLOSINGROUND
%token OPENINGSQUARE
%token CLOSINGSQUARE
%token OPENINGBRACE
%token CLOSINGBRACE
%token INTEGER
%token REAL
%token CHARACTER
%token IDENTIFIER
%start TransitionUnit
%%

TransitionUnit : ExternalDeclaration TransitionUnit_a;

TransitionUnit_a : ExternalDeclaration TransitionUnit_a  
| /* NULL */
;

ExternalDeclaration : FunctionDefinition
|Declaration ENDOFSTATEMENT
;

FunctionDefinition : TypeSpecifier Declarator FunctionDefinition_a
|Declarator FunctionDefinition_b
;

FunctionDefinition_a : DeclarationList CompoundStatement
                        |CompoundStatement;

FunctionDefinition_b : DeclarationList CompoundStatement
                    |CompoundStatement
;

TypeSpecifier : VOID  
|CHAR 
|INT 
|FLOAT
;

Declarator : DirectDeclarator 
            |    Pointer DirectDeclarator;

DirectDeclarator : IDENTIFIER DirectDeclarator_a;

DirectDeclarator_a : OPENINGROUND DirectDeclarator_b 
                    |OPENINGSQUARE DirectDeclarator_c
                    |/* NULL */
                    ;

DirectDeclarator_b : Declarator CLOSINGROUND DirectDeclarator_a
                    |ParameterList CLOSINGROUND DirectDeclarator_a 
                    |CLOSINGROUND DirectDeclarator_a
                    ;

DirectDeclarator_c : CLOSINGSQUARE DirectDeclarator_a
|INT CLOSINGSQUARE DirectDeclarator_a
;

Pointer : MULTIPLY Pointer_a
;

Pointer_a : Pointer 
|/* NULL */
;

DeclarationList : Declaration ENDOFSTATEMENT DeclarationList_a
;

DeclarationList_a : Declaration ENDOFSTATEMENT DeclarationList_a 
|/* NULL */
;

Declaration : TypeSpecifier Declaration_a;

Declaration_a : InitDeclaratorList 
|/* NULL */
;

InitDeclaratorList : InitDeclarator InitDeclaratorList_a
;

InitDeclaratorList_a : COMMA InitDeclarator InitDeclaratorList_a 
|/* NULL */
;

InitDeclarator : Declarator InitDeclarator_a
;

InitDeclarator_a : ASSIGN Initializer 
|/* NULL */
;

Initializer : Constant
| OPENINGBRACE InitializerList Initializer_a
;

Initializer_a : CLOSINGBRACE
|COMMA CLOSINGBRACE
;

InitializerList : Initializer InitializerList_a
;

InitializerList_a : COMMA Initializer InitializerList_a 
|/* NULL */
;

ParameterList : ParameterDeclaration ParameterList_a;

ParameterList_a : COMMA ParameterDeclaration ParameterList_a 
|/* NULL */
;

ParameterDeclaration : TypeSpecifier ParameterDeclaration_a;

ParameterDeclaration_a : Declarator 
|/* NULL */;

CompoundStatement : OPENINGBRACE CompoundStatement_a;

CompoundStatement_a : CLOSINGBRACE
|StatementList CLOSINGBRACE
|DeclarationList CompoundStatement_b
;

CompoundStatement_b : StatementList CLOSINGBRACE
|CLOSINGBRACE
;

StatementList : Statement StatementList_a;

StatementList_a : Statement StatementList_a 
|/* NULL */
;

Statement : LabeledStatement
|CompoundStatement
|ExpressionStatement
|SelectionStatement
|IterationStatement
|JumpStatement;

LabeledStatement : CASE Constant COLON Statement
|DEFAULT COLON Statement;


ExpressionStatement : ENDOFSTATEMENT
|Expression ENDOFSTATEMENT
;

SelectionStatement : IF OPENINGROUND Expression CLOSINGROUND Statement SelectionStatement_a
|SWITCH OPENINGROUND Expression CLOSINGROUND Statement;

SelectionStatement_a : ELSE Statement 
|/* NULL */
;

JumpStatement : CONTINUE ENDOFSTATEMENT
|BREAK ENDOFSTATEMENT
|RETURN JumpStatement_a
;

JumpStatement_a : Expression ENDOFSTATEMENT;

IterationStatement : WHILE OPENINGROUND Expression CLOSINGROUND Statement
|DO Statement WHILE OPENINGROUND Expression CLOSINGROUND ENDOFSTATEMENT
|FOR OPENINGROUND Expression ENDOFSTATEMENT Expression ENDOFSTATEMENT Expression CLOSINGROUND Statement
;

ConditionalExpression : EqualityExpression ConditionalExpression_a;

ConditionalExpression_a : CONDITIONALOPERATOR Expression COLON ConditionalExpression 
| 
;

Expression : AssignmentExpression Expression_a;

Expression_a : COMMA AssignmentExpression Expression_a 
| 
;

AssignmentExpression : ConditionalExpression
|UnaryExpression AssignmentOperator AssignmentExpression;

EqualityExpression : RelationalExpression EqualityExpression_a;

EqualityExpression_a : ISEQUALTO RelationalExpression EqualityExpression_a '\n'
|ISNOTEQUALTO RelationalExpression EqualityExpression_a
| 
;

RelationalExpression : AdditiveExpression RelationalExpression_a;

RelationalExpression_a : LESSTHAN AdditiveExpression RelationalExpression_a '\n'
|GREATERTHAN AdditiveExpression RelationalExpression_a
|LESSTHANEQUALTO AdditiveExpression RelationalExpression_a
|GREATERTHANEQUALTO AdditiveExpression RelationalExpression_a
| 
;

AdditiveExpression : MultiplicativeExpression AdditiveExpression_a;

AdditiveExpression_a : ADD MultiplicativeExpression AdditiveExpression_a '\n'
|SUBTRACT MultiplicativeExpression AdditiveExpression_a
|
;

MultiplicativeExpression : UnaryExpression MultiplicativeExpression_a;

MultiplicativeExpression_a : MULTIPLY UnaryExpression MultiplicativeExpression_a '\n'
| DIVIDE UnaryExpression MultiplicativeExpression_a
| MODULUS UnaryExpression MultiplicativeExpression_a
| 
;

AssignmentOperator : ASSIGN
| ADDANDASSIGN
| SUBTRACTANDASSIGN
| MULTIPLYANDASSIGN
| DIVIDEANDASSIGN
| MODULUSANDASSIGN
;

Constant : INTEGER
| CHARACTER
| REAL
;

UnaryExpression : PostFixExpression
| INCREMENT UnaryExpression
| DECREMENT UnaryExpression
;

PostFixExpression : PrimaryExpression PostFixExpression_a;

PostFixExpression_a : OPENINGSQUARE Expression CLOSINGSQUARE PostFixExpression_a '\n'
| OPENINGROUND PostFixExpression_b
| INCREMENT PostFixExpression_a
| DECREMENT PostFixExpression_a
|
;

PostFixExpression_b : ArgumentExpressionList CLOSINGROUND PostFixExpression_a
| CLOSINGROUND PostFixExpression_a
;

ArgumentExpressionList : AdditiveExpression ArgumentExpressionList_a;

ArgumentExpressionList_a : COMMA AdditiveExpression ArgumentExpressionList_a '\n'
| 
;

PrimaryExpression : IDENTIFIER { fprintf(f, "Got an %s",yylval);}
| Constant
;
%%

void yyerror(char *s){
    extern int yylineno;
    printf("%s at line no. %d",s,yylineno);
}

int main(){
    extern FILE *yyin;
    yyin = fopen("input.txt","r");

    f = fopen("output.txt","w");
    fprintf(f,"Adha Hogaya");
    yyparse();

    return 1;
}

要解析的输入文件

int i;
int a;
void main(){
}

代码解析正常,直到&#39;(&#39;之后显示为 语法错误。我不明白一些帮助会有什么问题 不胜感激

代码通过以下命令编译

flex 'filename'.l  
bison -dy 'filename'.y  
gcc lex.yy.c y.tab.c -o 'filename'.exe

1 个答案:

答案 0 :(得分:1)

您的扫描仪会忽略&#34;关键字&#34; main(因为它不会返回任何内容)。

"main"                  {printf("<keyword, %s, %d>\n", yytext,yylineno ); }

我不清楚为什么你希望main成为关键字,因为你的语法似乎不允许名字不是标识符的函数。

真的,扫描仪定义中充满了问题,其中大部分与此问题无关。但是你真的应该看看一些示例扫描仪定义和(重新)阅读flex手册。首先,建议您使用单字符令牌({而不是OPENINGBRACE),这样可以使您的扫描程序和解析器更具可读性。但是您需要仔细查看一些扫描仪模式,尤其是注释模式和字符串/字符文字模式。您的ID不是单个字符,因此在其他模式中使用{ID}+肯定是错误的(并导致出现弹出警告)。

flexbison都提供调试跟踪功能,这比使用printf语句填充源代码要容易得多。如果您只是使用了这些痕迹,您会立即看到&#34;关键字&#34; main未发送给解析器。请参阅flex manualbison manual的调试部分。

您的语法文件看起来像是在尝试编写适合LL(1)(递归下降)解析器的语法。这是完全没必要的 - bison / yacc是一个LALR(1)解析器生成器,它可以很好地处理左递归 - 并且使你的语法更不易读。考虑一下简单的区别:

Prototype:     '(' ')'
         |     '(' ParameterList ')'
ParameterList
         :     ParameterDeclaration
         |     ParameterList ',' ParameterDeclaration;
ParameterDeclaration
         :     TypeSpecifier
         |     TypeSpecifier Declarator

最后,您需要修复语义值类型; yylval = *yytext很少有用。 (没有必要为没有语义值的令牌设置yylval。)默认的int类型不允许您将字符串(标识符或字符串文字)传递给扫描仪,它们会在可能很有用。