使用lex和yacc打印令牌

时间:2014-03-20 19:24:30

标签: c++ token yacc lex flex-lexer

我有一个lex文件,一个yacc文件和一个main.cpp文件。

我的main.cpp看起来像

int main(int argc, char **argv)
{
    if (argc == 1)
    {   int token;
        curr_filename = "<stdin>";
        yyin = stdin;
        yyparse();
    }
    else
    {
        for (int i = 1; i < argc; ++i)
        {
            curr_filename = argv[i];
            yyin = std::fopen(argv[i], "r");

            if (yyin)
            {    

                yyparse();  

                std::fclose(yyin);
            }
            else
            {
                utility::print_error(argv[i], "cannot be opened");
            }
        }
    }

    if (yynerrs > 0)
    {
        std::cerr << "Compilation halted due to lexical or syntax errors.\n";
        exit(1);
    }

这有助于解析。但是现在我想打印从lex文件生成的令牌。所以我通过调用yylex()进行一些修改,如下所示

    int main(int argc, char **argv)
    {
        if (argc == 1)
        {   int token;
            curr_filename = "<stdin>";

       yyin = stdin;
// calling yylex to get token 
     while(token= yylex())
     {
        switch(token){
        case 258 : 
        std::cout << "class" ;
        default : 
        std::cout << "token " ;

                 }


            yyparse();
        }
//rest of the code same

但没有任何内容打印到输出。

有关如何在标准输出或文件上打印令牌的任何帮助

flex文件

%option noyywrap
%option yylineno

%{

#include "flexbison.hpp"
#include "tokentable.hpp"
#include "symboltable.hpp"
#include "y.tab.h"
#include <stdio.h>

#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno; 

static const int MAX_STR_CONST = 1025;

char string_buf[MAX_STR_CONST];  // buffer to store string contstants encountered in source file
char *string_buf_ptr;



int num_comment = 0;      // count to keep track how many opening comment tokens have been encountered
std::size_t curr_lineno = 0;      // keep track of current line number of source file
bool str_too_long = false;   // used to handle string constant size error check


%}

%x COMMENT
%x LINECOMMENT
%x STRING

DARROW =>

%%

"(*" {
    BEGIN(COMMENT);
    num_comment++;
}

"*)" {
    if (num_comment <= 0) {
        yylval.error_msg = "Unmatched *)";
        return ERROR;
    }
}

<COMMENT>"*)" {
    num_comment--;
    if (num_comment < 0) {
        yylval.error_msg = "Unmatched *)";
        return ERROR;
    }

    if (num_comment == 0) {
        BEGIN(INITIAL);
    }
}

<COMMENT>"(*" {
    num_comment++;
}

<COMMENT>[^\n] {
    // eat everything within comments
}

<COMMENT>\n {
    ++curr_lineno;
}

"--"[^\n]* {
    BEGIN(LINECOMMENT);
}

<LINECOMMENT>\n {
    ++curr_lineno;
    BEGIN(INITIAL);
}

<COMMENT><<EOF>> {
    BEGIN(INITIAL);
    yylval.error_msg = "EOF in comment";
    return ERROR;
}

"=>" {
    return DARROW; 
}

(?i:class) {
    return CLASS;
}

(?i:else) {
    return ELSE;
}

(?i:in) {
    return IN;
}

(?i:then) {
    return THEN;
}

(?i:fi) {
    return FI;
}

(?i:if) {
    return IF;
}

(?i:inherits) {
    return INHERITS;
}

(?i:let) {
    return LET;
}

(?i:loop) {
    return LOOP;
}

(?i:pool) {
    return POOL;
}

(?i:while) {
    return WHILE;
}

(?i:case) {
    return CASE;
}

(?i:esac) {
    return ESAC;
}

(?i:of) {
    return OF;
}

(?i:new) {
    return NEW;
}

(?i:isvoid) {
    return ISVOID;
}

(?i:not) {
    return NOT;
}

t(?i:rue) {
    yylval.boolean = true;
    return BOOL_CONST;
}

f(?i:alse) {
    yylval.boolean = false;
    return BOOL_CONST;
}

[0-9]+ {
    yylval.symbol = inttable().add(yytext);
    return INT_CONST;
}

"<=" {
    return LE;
}

"<-" {
    return ASSIGN;
}


[A-Z][a-zA-Z0-9_]* {
    yylval.symbol = idtable().add(yytext);
    return TYPEID;
}


[a-z][a-zA-Z0-9_]* {
    yylval.symbol = idtable().add(yytext);
    return OBJECTID;
}

";"|","|"{"|"}"|":"|"("|")"|"+"|"-"|"*"|"/"|"="|"~"|"<"|"."|"@" { 
    return *yytext;
}

\n {
    ++curr_lineno;
}

[ \f\r\t\v] {
    // eat whitespace
}

 /*
  *  String constants (C syntax)
  *  Escape sequence \c is accepted for all characters c. Except for 
  *  \n \t \b \f, the result is c.
  *
  */

\" {
    BEGIN(STRING);
    string_buf_ptr = string_buf;
    memset(string_buf, 0, MAX_STR_CONST);
}

<STRING>\" {
    BEGIN(INITIAL);
    yylval.symbol = stringtable().add(string_buf);
    return STR_CONST;
}

<STRING>\0[^\n]*\" {
    BEGIN(INITIAL);
    if (str_too_long) {
        str_too_long = false;
    }
    else {
        yylval.error_msg = "String contains null character";
        return ERROR;
    }
}

<STRING>\0[^"]*\n {
    if (str_too_long) {
        yyinput(); /* eat quote */
        BEGIN(INITIAL);
        str_too_long = false;
    }
    else {
        if (yytext[yyleng - 1] != '\\') {
            BEGIN(INITIAL);
            yylval.error_msg = "String contains null character";
            return ERROR;
        }
    }
}

<STRING><<EOF>> {
    BEGIN(INITIAL);
    yylval.error_msg = "EOF in string constant";
    return ERROR;
}

<STRING>\\ {
    if (strlen(string_buf) >= MAX_STR_CONST - 1) {
        str_too_long = true;
        unput('\0');
        yylval.error_msg = "String constant too long";
        return ERROR;
    }

    char ahead = yyinput();
    switch (ahead) {
        case 'b':
            *string_buf_ptr++ = '\b';
            break;
        case 't':
            *string_buf_ptr++ = '\t';
            break;
        case 'n':
            *string_buf_ptr++ = '\n';
            break;
        case 'f':
            *string_buf_ptr++ = '\f';
            break;
        case '\n':
            ++curr_lineno;
            *string_buf_ptr++ = '\n';
            break;
        case '\0':
            unput(ahead);
            break;
        default:
            *string_buf_ptr++ = ahead;
    }
}

<STRING>\n {
    ++curr_lineno;
    BEGIN(INITIAL);
    yylval.error_msg = "Unterminated string constant";
    return ERROR;
}

<STRING>. {
    if (strlen(string_buf) >= MAX_STR_CONST - 1) {
        str_too_long = true;
        unput('\0');
        yylval.error_msg = "String constant too long";
        return ERROR;
    }

    *string_buf_ptr++ = *yytext;
}

. /* error for invalid tokens */ {
    yylval.error_msg = std::string(yytext) + " is not a valid character in the current context.";
    return ERROR;
}

%%

野牛档案

%{

#include "flexbison.hpp"
#include "symboltable.hpp"
#include "tokentable.hpp"
#include "ast.hpp"

#include <iostream>

// convinience function for setting location of each ast node
#define SETLOC(lval,node) (lval)->setloc((node).first_line, curr_filename)

// both defined in main.cpp
extern ProgramPtr ast_root;
extern std::string curr_filename;

// both defined in lexer
extern int yylex();
extern int yylineno;

void yyerror(char *);        
%}

%token CLASS 258 ELSE 259 FI 260 IF 261 IN 262 
%token INHERITS 263 LET 264 LOOP 265 POOL 266 THEN 267 WHILE 268
%token CASE 269 ESAC 270 OF 271 DARROW 272 NEW 273 ISVOID 274
%token <symbol>  STR_CONST 275 INT_CONST 276 
%token <boolean> BOOL_CONST 277
%token <symbol>  TYPEID 278 OBJECTID 279 
%token ASSIGN 280 NOT 281 LE 282 ERROR 283

%type <program> program
%type <clazz> class
%type <classes> class_list
%type <attribute> attribute
%type <attributes> attribute_list
%type <method> method
%type <methods> method_list
%type <expression> expression
%type <expression> let_expr 
%type <expressions> expression_list
%type <expressions> method_expr_list
%type <formal> formal
%type <formals> formal_list
%type <branch> case
%type <cases> case_list

%nonassoc '='
%left LET
%right ASSIGN
%left NOT
%left '+' '-'
%left '*' '/' 
%left ISVOID
%left '~'
%left '@'
%left '.'
%nonassoc LE '<'

%%
program : class_list    { @$ = @1; ast_root = std::make_shared<Program>($1); }
;

class_list : class { $$ = Classes(); $$.push_back($1); }
            | class_list class { $$.push_back($2); }
;

class : CLASS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, idtable().add("Object"), $4, $5); SETLOC($$, @1); }
        | CLASS TYPEID INHERITS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, $4, $6, $7); SETLOC($$, @1); }
        | error ';' { yyerrok; } 
;

attribute_list : attribute ';' { $$ = Attributes(); $$.push_back($1); }
               | attribute_list attribute ';' { $$.push_back($2); }
               | error ';' { yyerrok; }
;

attribute : OBJECTID ':' TYPEID { $$ = std::make_shared<Attribute>($1, $3, std::make_shared<NoExpr>()); SETLOC($$, @1); }
          | OBJECTID ':' TYPEID ASSIGN expression { $$ = std::make_shared<Attribute>($1, $3, $5); SETLOC($$, @5); }
;

method_list : method ';' { $$ = Methods(); $$.push_back($1); }
            | method_list method ';' { $$.push_back($2); }
            | error ';' { yyerrok; }
;

method : OBJECTID '(' formal_list ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $6, $3, $8); SETLOC($$, @1); }
       | OBJECTID '(' ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $5, Formals(), $7); SETLOC($$, @1); }
;

formal_list : formal { $$ = Formals(); $$.push_back($1); }
            | formal_list ',' formal { $$.push_back($3); } 
;

formal : OBJECTID ':' TYPEID { $$ = std::make_shared<Formal>($1, $3); SETLOC($$, @1); }
;

case_list : case { $$ = Cases(); $$.push_back($1); }
            | case_list case { $$.push_back($2); }
;

case : OBJECTID ':' TYPEID DARROW expression ';' { $$ = std::make_shared<CaseBranch>($1, $3, $5); SETLOC($$, @5); }
;

method_expr_list : expression { $$ = Expressions(); $$.push_back($1); }
                    | method_expr_list ',' expression { $$.push_back($3); }
;

expression_list : expression ';' { $$ = Expressions(); $$.push_back($1); }
                | expression_list expression ';' { $$.push_back($2); }
                | error ';' { yyerrok; }
;

let_expr : OBJECTID ':' TYPEID IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ASSIGN expression IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ',' let_expr { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ASSIGN expression ',' let_expr { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, @4); }
            | error ',' let_expr { yyerrok; }
;


expression : OBJECTID ASSIGN expression { $$ = std::make_shared<Assign>($1, $3); SETLOC($$, @3); }
            | expression '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, $5); SETLOC($$, @1); }
            | expression '.' OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, Expressions()); SETLOC($$, @1); }
            | expression '@' TYPEID '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, $7); SETLOC($$, @1); }
            | expression '@' TYPEID '.' OBJECTID '(' ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, Expressions()); SETLOC($$, @1);}
            | OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, $3); 
                                                  SETLOC($$, @1); } 
            | OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, Expressions()); 
                                 SETLOC($$, @1); } 
            | IF expression THEN expression ELSE expression FI { $$ = std::make_shared<If>($2, $4, $6); SETLOC($$, @2); }
            | WHILE expression LOOP expression POOL { $$ = std::make_shared<While>($2, $4); SETLOC($$, @2); }
            | '{' expression_list '}' { $$ = std::make_shared<Block>($2); SETLOC($$, @2); }
            | LET let_expr { $$ = $2; SETLOC($$, @2); }
            | CASE expression OF case_list ESAC { $$ = std::make_shared<Case>($2, $4); SETLOC($$, @2); }
            | NEW TYPEID { $$ = std::make_shared<New>($2); SETLOC($$, @2); }
            | ISVOID expression { $$ = std::make_shared<IsVoid>($2); SETLOC($$, @2); }
            | expression '+' expression { $$ = std::make_shared<Plus>($1, $3); SETLOC($$, @1); }
            | expression '-' expression { $$ = std::make_shared<Sub>($1, $3); SETLOC($$, @1); }
            | expression '*' expression { $$ = std::make_shared<Mul>($1, $3); SETLOC($$, @1); }
            | expression '/' expression { $$ = std::make_shared<Div>($1, $3); SETLOC($$, @1); }
            | '~' expression { $$ = std::make_shared<Complement>($2); SETLOC($$, @2); }
            | expression '<' expression { $$ = std::make_shared<LessThan>($1, $3); SETLOC($$, @1); }
            | expression LE expression { $$ = std::make_shared<LessThanEqualTo>($1, $3); SETLOC($$, @1); }
            | expression '=' expression { $$ = std::make_shared<EqualTo>($1, $3); SETLOC($$, @1); }
            | NOT expression { $$ = std::make_shared<Not>($2); SETLOC($$, @2); }
            | '(' expression ')' { $$ = $2; SETLOC($$, @2); } 
            | OBJECTID { $$ = std::make_shared<Object>($1); SETLOC($$, @1); }
            | INT_CONST { $$ = std::make_shared<IntConst>($1); SETLOC($$, @1); }
            | STR_CONST { $$ = std::make_shared<StringConst>($1); SETLOC($$, @1); }
            | BOOL_CONST { $$ = std::make_shared<BoolConst>($1); SETLOC($$, @1); } 
;

%%

// utility function for converting bison tokens to its string representation
// for better error reporting
std::string convert_token(int token)
{
    std::string rep;

    switch (token)
    {
        case CLASS: rep = "class"; break;
        case ELSE: rep = "else"; break;
        case FI: rep = "fi"; break;
        case IF: rep = "if"; break;
        case IN: rep = "in"; break;
        case INHERITS: rep = "inherits"; break;
        case LET: rep = "let"; break;
        case LOOP: rep = "loop"; break;
        case POOL: rep = "pool"; break;
        case THEN: rep = "then"; break;
        case WHILE: rep = "while"; break;
        case CASE: rep = "case"; break;
        case ESAC: rep = "esac"; break;
        case OF: rep = "of"; break;
        case DARROW: rep = "=>"; break;
        case NEW: rep = "new"; break;
        case ISVOID: rep = "isvoid"; break;
        case ASSIGN: rep = "<-"; break;
        case NOT: rep = "not"; break;
        case LE: rep = "<="; break;
        case STR_CONST: rep = "STR_CONST = " + yylval.symbol.get_val(); break;
        case INT_CONST: rep = "INT_CONST = " + yylval.symbol.get_val(); break;
        case BOOL_CONST: rep = "BOOL_CONST = " + yylval.boolean; break;
        case TYPEID: rep = "TYPEID = " + yylval.symbol.get_val(); break;
        case OBJECTID: rep = "OBJECTID = " + yylval.symbol.get_val(); break;
        default: rep = (char) token;
    }     

    return rep;
}

void yyerror(char *)
{
    if (yylval.error_msg.length() <= 0)
        std::cerr << curr_filename << ":" << yylineno << ": " << "error: " <<  "syntax error near or at character or token '" << convert_token(yychar) << "'\n";
    else
        std::cerr << curr_filename << ":" << yylineno << ": " << "error: " << yylval.error_msg << "\n";
}

1 个答案:

答案 0 :(得分:2)

我不确定为什么你没有看到任何输出,但我没有查看所有代码。如果您从yylex拨打main,则会读取并有效地丢弃一个令牌。然后,当您致电yyparse时,yyparse会自动致电yylex,直到yylex返回0.大概(但不一定),下次您致电yylexwhile中的main循环,它将再次返回0并且循环将结束。结果应该是从while循环打印一个单词,然后是yyparse(如果有)生成的任何输出,这可能表示语法错误,因为它从未看到第一个标记来自输入。

我怀疑这是你想做的,但并不完全清楚。

如果要查看标记,因为它们是lexed,然后插入语句以在每个lex操作中打印标记。或者告诉flex调用扫描函数,例如yylex_internal并创建一个名为yylex()的函数,调用yylex_internal,然后在返回之前打印结果。

如果看起来很可能只是出于调试目的而对此感兴趣,那么最好使用-d命令行选项flex,这将自动生成调试输出。它可能不是您想要的调试格式,但它更容易做和撤消:)

要更改yylex生成的flex函数的名称,请在.l文件顶部的代码块中插入以下内容:

#define YY_DECL int yylex_internal()

flex生成的文件声明扫描功能如下:

YY_DECL {
  /* body of function
}

因此,您可以重命名该函数或添加参数,甚至可以通过定义YY_DECL宏来更改返回类型。请参阅弹性手册的Generated Scanner部分。

顺便说一句,手动编号所有终端令牌通常不被认为是好的风格,即使野牛允许你这样做。您应该让bison对它们进行编号,并将定义包含在#include "y.tab.h"的源文件中(或者您称之为bison头文件;您可以使用{{1}轻松更改名称选项)。