如何使用lex / yacc构建解析器来定义c

时间:2016-02-23 02:21:31

标签: yacc lex

您好我是lex / yacc的新手,我正在练习实现一个解析器来定义一组变量,比如int x,y,z;双a,b; char c。 我需要为变量分配一个常量值,并管理字符常量以及打印所有变量及其类型和值。 我有一些在线编码,我编辑但总是遇到错误。 如果有人可以帮助我。

lex code(calc.l)

%{
#include "y.tab.h"
%}
%%
"print"                {return print;}
"exit"                 {return exit_command;}
[a-zA-Z]               {yylval.id = yytext[0]; return identifier;}
[0-9]+                 {yylval.num = atoi(yytext); return number;}
[a-zA-Z]               {yylval.const = yytext[0]; return character;}
[0-9]*\.[0-9]+         {yylval.num = atoi(yytext); return decimal;}
[ \t\n]                ;
[-+=;]                 {return yytext[0];}
.                      {ECHO; yyerror ("unexpected character");}

%%
int yywrap (void) {return 1;}

yacc code(calc.y)

 %{
    void yyerror (char *s);

#include <stdio.h>     /* C declarations used in actions */
#include <stdlib.h>
int symbols[52];
int symbolVal(char symbol);
void updateSymbolVal(char symbol, int val);
void updateSymbolValD(char symbol, double dval);
void updateSymbolValC(char symbol, char cval);
%}

%union {int num; char id;double deci;char const;}         /* Yacc definitions */
%start line
%token print
%token exit_command
%token <num> number
%token <id> identifier
%token <deci> decimal
%token <const> character
%type <num> line exp term 
%type <id> assignment

%%

/* descriptions of expected inputs     corresponding actions (in C) */

line    : assignment ';'        {;}
        | exit_command ';'      {exit(EXIT_SUCCESS);}
        | print exp ';'         {printf("Printing %d\n", $2);}
        | line assignment ';'   {;}
        | line print exp ';'    {printf("Printing %d\n", $3);}
        | line exit_command ';' {exit(EXIT_SUCCESS);}
        ;

assignment : identifier '=' exp  { updateSymbolVal($1,$3); }
            ;
exp     : term                  {$$ = $1;}
        ;
term    : number                {$$ = $1;}
        | character             {$$ = $1;}
        | decimal               {$$ = $1;}
        | identifier            {$$ = symbolVal($1);} 
        ;

%%                     /* C code */

int computeSymbolIndex(char token)
{
    int idx = -1;
    if(islower(token)) {
        idx = token - 'a' + 26;
    } else if(isupper(token)) {
        idx = token - 'A';
    }
    return idx;
} 

/* returns the value of a given symbol */
int symbolVal(char symbol)
{
    int bucket = computeSymbolIndex(symbol);
    return symbols[bucket];
}

/* updates the value of a given integer symbol */
void updateSymbolVal(char symbol, int val)
{
    int bucket = computeSymbolIndex(symbol);
    symbols[bucket] = val;
}

/* updates the value of a given double symbol */
void updateSymbolVal(char symbol, double dval)
{
    int bucket = computeSymbolIndex(symbol);
    symbols[bucket] = dval;
}

/* updates the value of a given character symbol */
void updateSymbolVal(char symbol, char cval)
{
    int bucket = computeSymbolIndex(symbol);
    symbols[bucket] = cval;
}

int main (void) {
    /* init symbol table */
    int i;
    for(i=0; i<52; i++) {
        symbols[i] = 0;
    }

    return yyparse ( );
}

void yyerror (char *s) {fprintf (stderr, "%s\n", s);} 

它总是给我错误&#34;&#34; calc.l&#34;,第9行:警告,规则无法匹配&#34;这是一行代码&#34; [a-zA-Z] {yylval.const = yytext [0];返回字符;}&#34;

我已将我的lex和yacc代码更改为以下代码并遇到另一个错误。我希望有人可以帮助我理解错误以及如何使代码运行。我在网上搜索错误的含义但是没有得到理解。

**Lex codes**

calc.l

    %{
    #include "y.tab.h"
    %}
    %%
    "print"                {return print;}
    "exit"                 {return exit_command;}
    [a-zA-Z]               {yylval.id = yytext[0]; return identifier;}
    [0-9]+                 {yylval.num = atoi(yytext); return number;}
    [\'.\']                {yylval.const = yytext[0]; return character;}
    [0-9]*\.[0-9]+         {yylval.deci = atof(yytext); return decimal;}
    [ \t\n]                ;
    [=;]               {return yytext[0];}
    .                      {ECHO; yyerror ("unexpected character");}

    %%
    int yywrap (void) {return 1;}

**yacc codes**
  **calc.y**

%{
void yyerror (char *s);
#include <stdio.h>     /* C declarations used in actions */
#include <stdlib.h>
int symbols[52];
int symbolVal(char symbol);
void updateSymbolVal(char symbol, int val);
void updateSymbolValD(char symbol, double dval);
void updateSymbolValC(char symbol, char cval);
%}

%union {int num; char id;double deci;char const;}         /* Yacc definitions */
%start line
%token print
%token exit_command
%token <num>  number
%token <id> identifier
%token <deci> decimal
%token <const> character
%type <id> assignment 
%type <id> charact_assign 
%type <id> double_assign
%type <num>  exp 
%type <const> char_con
%type <deci> real_val


%%

/* descriptions of expected inputs     corresponding actions (in C) */

line    : assignment  ';'       {;}
        | charact_assign ';'    {;}
        | double_assign  ';'    {;}     
        | exit_command ';'      {exit(EXIT_SUCCESS);}
        | print assignment ';'  {printf("Printing %d\n", $2);}
        | line assignment ';'   {;}
        | line charact_assign ';' {;}
        | line double_assign ';'  {;}
        | line print assignment ';' {printf("Printing %d\n", $3);}
        | line exit_command ';' {exit(EXIT_SUCCESS);}
        ;

assignment : identifier '=' exp  { updateSymbolVal($1,$3); }
            ;

exp     : number                {$$ = $1;} 
        ;
charact_assign: identifier '=' char_con        { updateSymbolVal($1,$3); }
              ;

char_con      : character       {$$ = $1;}    
              ;
double_assign: identifier '=' real_val  { updateSymbolVal($1,$3); }
             ;

real_val     : decimal    {$$ = $1;}
             ;

%%                     /* C code */

int computeSymbolIndex(char token)
{
    int idx = -1;
    if(islower(token)) {
        idx = token - 'a' + 26;
    } else if(isupper(token)) {
        idx = token - 'A';
    }
    return idx;
} 

/* returns the value of a given symbol */
int symbolVal(char symbol)
{
    int bucket = computeSymbolIndex(symbol);
    return symbols[bucket];
}

/* updates the value of a given integer symbol */
void updateSymbolVal(char symbol, int val)
{
    int bucket = computeSymbolIndex(symbol);
    symbols[bucket] = val;
}

/* updates the value of a given double symbol */
void updateSymbolVal(char symbol, double dval)
{
    int bucket = computeSymbolIndex(symbol);
    symbols[bucket] = dval;
}

/* updates the value of a given character symbol */
void updateSymbolVal(char symbol, char cval)
{
    int bucket = computeSymbolIndex(symbol);
    symbols[bucket] = cval;
}

int main (void) {
    /* init symbol table */
    int i;
    for(i=0; i<52; i++) {
        symbols[i] = 0;
    }

    return yyparse ( );
}

void yyerror (char *s) {fprintf (stderr, "%s\n", s);} 


**`Below are the errors i am running into:`**

    "calc.y", line 12: unrecognized '%' directive
    "calc.y", line 14: unrecognized '%' directive
    "calc.y", line 15: unrecognized '%' directive
    "calc.y", line 16: unrecognized '%' directive
    "calc.y", line 17: unrecognized '%' directive
    "calc.y", line 18: unrecognized '%' directive
    "calc.y", line 19: unrecognized '%' directive
    "calc.y", line 20: unrecognized '%' directive
    "calc.y", line 21: unrecognized '%' directive
    "calc.y", line 22: unrecognized '%' directive
    "calc.y", line 23: unrecognized '%' directive
    "calc.y", line 24: unrecognized '%' directive
    "calc.y", line 25: unrecognized '%' directive
    "calc.y", line 30: unrecognized rule
    "calc.y", line 30: unrecognized rule
    "calc.y", line 30: unrecognized rule
    "calc.y", line 34: unrecognized rule
    "calc.y", line 35: unrecognized rule
    "calc.y", line 36: unrecognized rule
    "calc.y", line 37: unrecognized rule
    "calc.y", line 38: unrecognized rule
    "calc.y", line 39: unrecognized rule
    "calc.y", line 40: unrecognized rule
    "calc.y", line 41: unrecognized rule
    "calc.y", line 42: unrecognized rule

1 个答案:

答案 0 :(得分:2)

你有两次相同的模式:

[a-zA-Z]               {yylval.id = yytext[0]; return identifier;}
[0-9]+                 {yylval.num = atoi(yytext); return number;}
[a-zA-Z]               {yylval.const = yytext[0]; return character;}

因此,匹配a-zA-Z的任何内容都将与第一个匹配,并且任何内容都不会匹配第二个。

如果不清楚:lex / flex lexer按顺序考虑规则,那么匹配输入的第一条规则是使用的规则(即使它们匹配,以后也是如此)如果早期的模式匹配,则不考虑模式)。这就是为什么(例如)你总是把.模式放在最后(假设你使用它,但你通常这样做) - 因为它会匹配任何东西,所以跟随它的模式都不能匹配任何东西。