使用flex和Bison的解析器

时间:2010-12-17 04:59:05

标签: bison

我正在使用flex和bison来实现解析器。我遇到的一个问题是,如何将单独的flex文件的标记提供给解析器。当我尝试编译parser.tab.c时,它抱怨"undefined reference to yylex"。 然后我尝试在编译时设置-d选项并将头文件包含到flex文件中。

我只想知道使用flex和bison的正确步骤(使用相关命令编译和运行)我正在使用gcc编译器。

Thanx很多

/* Token Scanner for C- language */

%{

#include <stdio.h>
#include <stdlib.h>
#include "parser.tab.h"
extern YYSTYPE yylval;



%}



digit                [0-9]
letter               [a-zA-Z]
NUM              {digit}+
ID               {letter}+
KEY_WORD             else|if|int|return|void|while
PLUS_OPERATOR            "+"
MINUS_OPERATOR               "-"
MUL_OPERATOR                 "*"
DIV_OPERATOR                 "/"
LESS_THAN_OPERATOR           "<"
LESS_THAN_OR_EQUAL_OPER      "<="
GREATER_THAN_OPERATOR            ">"
GREATER_THAN_OR_EQUAL_OPERATOR   ">="
EQUAL_OPERATOR           "="
OBJ_EQUAL_OPERATOR           "=="
NOT_EQUAL_OPERATOR           "!="
COMMA_SYMBOL             ","
SEMI_COLON_SYMBOL        ";"
LEFT_BRAC_SYMBOL         "("
RIGHT_BRAC-SYMBOL        ")"
LEFT_SQUARE_BRAC_SYMBOL          "["
RIGHT_SQUARE_BRAC_SYMBOL     "]"
LEFT_CURLY_BRAC_SYMBOL       "{"
RIGHT_CURLY_BRAC_SYMBOL      "}"
LEFT_COMMENT             "/*"
RIGHT_COMMENT            "*/"
ELSE                "else"
IF              "if"
INT             "int"
RETURN              "return"
VOID                "void"
WHILE               "while"

SYMBOL               "+"|"-"|"*"|"/"|"<"|"<="|">"|">="|"=="|"!="|"="|";"|","|"("|")"|"{"|"}"|"["|"]"|"/*"|"*/"      

WHITESPACE                [ \t\n]+
COMMENT               "/*"(.)*({WHITESPACE})*(.)*"*/"

ERROR1                {NUM}(({ID}|{KEY_WORD})|{NUM})+
ERROR2                {ID}(({NUM}|{KEY_WORD})|{ID})+
ERROR3                {KEY_WORD}(({ID}|{NUM})|{KEY_WORD})+  

ERROR                 ERROR1|ERROR2|ERROR3


%%


{NUM}            {
                return NUM;
                }

{ELSE}               {
                return ELSE;
                        }

{IF}             {
                return IF;
                        }

{INT}            {
                return INT;
                        }

{RETURN}             {
                return RETURN;
                        }

{VOID}               {
                return VOID;
                        }

{WHILE}              {
                return WHILE;
                        }


{ID}                {
                return ID;
                }

{PLUS_OPERATOR}      {
                return PLUS_OPERATOR;
                     }

{MINUS_OPERATOR}    {
                return MINUS_OPERATOR;
                     }

{MUL_OPERATOR}          {
                return MUL_OPERATOR;
                        }
{DIV_OPERATOR}          {
                return DIV_OPERATOR;
                        }
{LESS_THAN_OPERATOR}    {
                return LESS_THAN_OPERATOR;
                        }
{LESS_THAN_OR_EQUAL_OPER}   {
                return LESS_THAN_OR_EQUAL_OPER;
                        }

{GREATER_THAN_OPERATOR}  {
                return GREATER_THAN_OPERATOR;
                        }

{GREATER_THAN_OR_EQUAL_OPERATOR}   {
                return GREATER_THAN_OR_EQUAL_OPERATOR;
                        }
{EQUAL_OPERATOR}        {
                return EQUAL_OPERATOR;
                        }
{OBJ_EQUAL_OPERATOR}    {
                return NOT_EQUAL_OPERATOR;
                        }
{NOT_EQUAL_OPERATOR}    {
                return NOT_EQUAL_OPERATOR;
                        }
{COMMA_SYMBOL}  {
                return COMMA_SYMBOL;
                        }
{SEMI_COLON_SYMBOL}   {
                return SEMI_COLON_SYMBOL;
                        }

{LEFT_BRAC_SYMBOL}   {
                return LEFT_BRAC_SYMBOL;
                        }
{RIGHT_BRAC-SYMBOL}  {
                return RIGHT_BRAC_SYMBOL;
                        }

{LEFT_SQUARE_BRAC_SYMBOL}   {
                return LEFT_SQUARE_BRAC_SYMBOL;
                        }
{RIGHT_SQUARE_BRAC_SYMBOL}  {
                return RIGHT_SQUARE_BRAC_SYMBOL;
                        }
{LEFT_CURLY_BRAC_SYMBOL}   {
                return LEFT_CURLY_BRAC_SYMBOL;
                                }
{RIGHT_CURLY_BRAC_SYMBOL}  {
                return RIGHT_CURLY_BRAC_SYMBOL;
                                }

{LEFT_COMMENT}  {
                return LEFT_COMMENT;
                                }

{RIGHT_COMMENT}  {
                return RIGHT_COMMENT;
                                }
 {WHITESPACE}   {

                                }

{ERROR}          {

                                }              
%%

main( argc, argv )
int argc;
char **argv;
    {
    ++argv, --argc;  /* skip over program name */
    if ( argc > 0 )
            yyin = fopen( argv[0], "r" );
    else
            yyin = stdin;

    yylex();
    }


int yywrap(void){return 1;}

解析器:

%{
#include <stdio.h>
#include <ctype.h>
#define YYDEBUG 1

%}

%token ID NUM PLUS_OPERATOR MINUS_OPERATOR MUL_OPERATOR DIV_OPERATOR LESS_THAN_OPERATOR LESS_THAN_OR_EQUAL_OPER GREATER_THAN_OPERATOR GREATER_THAN_OR_EQUAL_OPERATOR EQUAL_OPERATOR OBJ_EQUAL_OPERATOR           NOT_EQUAL_OPERATOR COMMA_SYMBOL SEMI_COLON_SYMBOL LEFT_BRAC_SYMBOL RIGHT_BRAC_SYMBOL LEFT_SQUARE_BRAC_SYMBOL RIGHT_SQUARE_BRAC_SYMBOL LEFT_CURLY_BRAC_SYMBOL RIGHT_CURLY_BRAC_SYMBOL           LEFT_COMMENT RIGHT_COMMENT ELSE IF INT RETURN VOID WHILE 

%expect 1


%%


program: declaration_list
;

declaration_list: declaration_list declaration
    | declaration                   { printf("njuwandusanduansduasdsdsdsa"); }
;

declaration : var_declaration 
    | fun_declaration               { printf("njuwandusanduansduasdsdsdsa");}
;

var_declaration : type_specifier ID SEMI_COLON_SYMBOL
    | type_specifier ID LEFT_SQUARE_BRAC_SYMBOL NUM RIGHT_SQUARE_BRAC_SYMBOL COMMA_SYMBOL       { printf("njuwandusanduansduasdsdsdsa"); }
;

type_specifier : INT
    | VOID                  { printf("njuwandusanduansduasdsdsdsa");}
;

fun_declaration : type_specifier ID LEFT_BRAC_SYMBOL params RIGHT_BRAC_SYMBOL compound_stmt
;

params :  param_list 
    | VOID
;

param_list : param_list COMMA_SYMBOL param
    | param
;

param : type_specifier ID 
    | type_specifier ID LEFT_SQUARE_BRAC_SYMBOL RIGHT_SQUARE_BRAC_SYMBOL
;
compound_stmt : LEFT_CURLY_BRAC_SYMBOL local_declarations statement_list RIGHT_CURLY_BRAC_SYMBOL
;

local_declarations : local_declarations var_declaration
    | /* empty */
;

statement_list : statement_list statement
    |/* empty */
;

statement : expression_stmt
    | compound_stmt
    | selection_stmt
    | iteration_stmt
    | return_stmt
;

expression_stmt : expression SEMI_COLON_SYMBOL
    | SEMI_COLON_SYMBOL
;

selection_stmt : IF LEFT_BRAC_SYMBOL  expression RIGHT_BRAC_SYMBOL  statement
    | IF LEFT_BRAC_SYMBOL  expression RIGHT_BRAC_SYMBOL statement ELSE statement
;

iteration_stmt : WHILE LEFT_BRAC_SYMBOL  expression RIGHT_BRAC_SYMBOL  statement
;

return_stmt : RETURN SEMI_COLON_SYMBOL
    | RETURN expression SEMI_COLON_SYMBOL
;

expression:  var EQUAL_OPERATOR expression 
    | simple_expression
;

var : ID
     | ID LEFT_SQUARE_BRAC_SYMBOL expression RIGHT_SQUARE_BRAC_SYMBOL
;

simple_expression : additive_expression relop additive_expression
    | additive_expression
;

relop : LESS_THAN_OR_EQUAL_OPER
    | LESS_THAN_OPERATOR
    | GREATER_THAN_OPERATOR
    | GREATER_THAN_OR_EQUAL_OPERATOR
    | OBJ_EQUAL_OPERATOR 
    | NOT_EQUAL_OPERATOR
;

additive_expression : additive_expression addop term
        | term          
;

addop : PLUS_OPERATOR
    | MINUS_OPERATOR    { printf("njuwandusanduansduasdsdsdsa"); }
;

term : term mulop factor     { $$ = $1 + $3; }
    | factor
;

mulop : MUL_OPERATOR
    | DIV_OPERATOR
;

factor : LEFT_BRAC_SYMBOL  expression RIGHT_BRAC_SYMBOL 
    | var
    | call
    | NUM
;

call : ID LEFT_BRAC_SYMBOL  args RIGHT_BRAC_SYMBOL 
;

args : arg_list
    |/* empty */
;

arg_list : arg_list COMMA_SYMBOL expression
    | expression

;



%%

main()
{

extern int yydebug;
yydebug=1;
return yyparse();
}


int yyerror(char * s)
{
fprintf(stderr, "%s\n",s);
return 0;
}

3 个答案:

答案 0 :(得分:2)

yylex 是词法分析器函数,它从输入流中识别令牌并将它们返回给解析器.Bison不会自动创建此函数,因此您必须将其写入以便 yyparse 可以调用它。“

资料来源:文件......

所以你必须编写yylex函数,以便调用flex函数。

Flex和Bison都提供了完整的示例和完美的文档:

答案 1 :(得分:0)

yylex是通过flex运行flex文件后定义的函数。来自GCC的“对yylex的未定义引用”告诉您链接器无法找到yylex的定义。您的问题是您正在尝试生成没有所有部分的可执行映像。

例如,假设您有3个文件,main.c,flex.l,parser.y。你可以这样做来编译它们。

flex flex.l
bison -d parser.y
gcc main.c parser.tab.c lex.yy.c -o myEXE

这会生成一个可执行文件。但是,假设您有许多文件,并且一直这样做会非常慢。你想要一块一块地编译。

flex flex.l
gcc -c lex.yy.c -o lex.o
bison -d parser.y
gcc -c parser.tab.c -o parser.o
gcc -c main.c -o main.o
gcc main.o parser.o lex.o

-c选项告诉gcc编译并生成一个目标文件(源代码 - &gt;编译为assembly-&gt;程序集组装)。它没有做任何链接。

现在它开始研究如何使用make,您可以在google上找到几个优秀的教程。

答案 2 :(得分:0)

如果你只包含2个文件,比如flex.l和parser.y,那么尝试编译就像这样编译

gcc -o myexe parser.tab.c lex.yy.c -lfl

lfl是lexers的链接文件库。如果您缺少包含-lfl,则表示您收到错误:对yylex的未定义引用。