c编译器的语义阶段

时间:2011-04-14 19:14:18

标签: semantics yacc lex

如果在示例c程序中写入1 = a,则它不会将其检测为错误。我该如何解决这个问题?另外,我如何做变量的全局和局部范围。谢谢,如果有人能解决它

clexer.lex源代码

D           [0-9]
L           [a-zA-Z_]
H           [a-fA-F0-9]
E           [Ee][+-]?{D}+
FS          (f|F|l|L)
IS          (u|U|l|L)*

%{
#include <stdio.h>
#include "y.tab.h"
int cnt=1;
int line=1;
char tempid[100];
%}

%%
"/*"            {comment();}

"auto"          { cnt+=yyleng;ECHO; return(AUTO); }
"break"         { cnt+=yyleng;ECHO; return(BREAK); }
"case"          { cnt+=yyleng;ECHO; return(CASE); }
"char"          { cnt+=yyleng;ECHO; return(CHAR); }
"const"         { cnt+=yyleng;ECHO; return(CONST); }
"continue"      { cnt+=yyleng;ECHO; return(CONTINUE); }
"default"       { cnt+=yyleng;ECHO; return(DEFAULT); }
"do"            { cnt+=yyleng;ECHO; return(DO); }
"double"        { cnt+=yyleng;ECHO; return(DOUBLE); }
"else"          { cnt+=yyleng;ECHO; return(ELSE); }
"enum"          { cnt+=yyleng;ECHO; return(ENUM); }
"extern"        { cnt+=yyleng;ECHO; return(EXTERN); }
"float"         { cnt+=yyleng;ECHO; return(FLOAT); }
"for"           { cnt+=yyleng;ECHO; return(FOR); }
"goto"          { cnt+=yyleng;ECHO; return(GOTO); }
"if"            { cnt+=yyleng;ECHO; return(IF); }
"int"           { cnt+=yyleng;ECHO; return(INT); }
"long"          { cnt+=yyleng;ECHO; return(LONG); }
"register"      { cnt+=yyleng;ECHO; return(REGISTER); }
"return"        { cnt+=yyleng;ECHO; return(RETURN); }
"short"         { cnt+=yyleng;ECHO; return(SHORT); }
"signed"        { cnt+=yyleng;ECHO; return(SIGNED); }
"sizeof"        { cnt+=yyleng;ECHO; return(SIZEOF); }
"static"        { cnt+=yyleng;ECHO; return(STATIC); }
"struct"        { cnt+=yyleng;ECHO; return(STRUCT); }
"switch"        { cnt+=yyleng;ECHO; return(SWITCH); }
"typedef"       { cnt+=yyleng;ECHO; return(TYPEDEF); }
"union"         { cnt+=yyleng;ECHO; return(UNION); }
"unsigned"      { cnt+=yyleng;ECHO; return(UNSIGNED); }
"void"          { cnt+=yyleng;ECHO; return(VOID); }
"volatile"      { cnt+=yyleng;ECHO; return(VOLATILE); }
"while"         { cnt+=yyleng;ECHO; return(WHILE); }
(['])+({L}|{D})+(['])           { cnt+=yyleng;ECHO; return(SINGLE); }
{L}({L}|{D})*       { cnt+=yyleng;ECHO; strcpy(tempid,yytext);return(IDENTIFIER); }

0[xX]{H}+{IS}?      { cnt+=yyleng;ECHO; return(CONSTANT); }
0{D}+{IS}?      { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+{IS}?       { cnt+=yyleng;ECHO; return(CONSTANT); }
L?'(\\.|[^\\'])+'   { cnt+=yyleng;ECHO; return(CONSTANT); }

{D}+{E}{FS}?        { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}*"."{D}+({E})?{FS}?  { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+"."{D}*({E})?{FS}?  { cnt+=yyleng;ECHO; return(CONSTANT); }

L?\"(\\.|[^\\"])*\" { cnt+=yyleng;ECHO; return(STRING_LITERAL); }

"..."           { cnt+=yyleng;ECHO; return(ELLIPSIS); }
">>="           { cnt+=yyleng;ECHO; return(RIGHT_ASSIGN); }
"<<="           { cnt+=yyleng;ECHO; return(LEFT_ASSIGN); }
"+="            { cnt+=yyleng;ECHO; return(ADD_ASSIGN); }
"-="            { cnt+=yyleng;ECHO; return(SUB_ASSIGN); }
"*="            { cnt+=yyleng;ECHO; return(MUL_ASSIGN); }
"/="            { cnt+=yyleng;ECHO; return(DIV_ASSIGN); }
"%="            { cnt+=yyleng;ECHO; return(MOD_ASSIGN); }
"&="            { cnt+=yyleng;ECHO; return(AND_ASSIGN); }
"^="            { cnt+=yyleng;ECHO; return(XOR_ASSIGN); }
"|="            { cnt+=yyleng;ECHO; return(OR_ASSIGN); }
">>"            { cnt+=yyleng;ECHO; return(RIGHT_OP); }
"<<"            { cnt+=yyleng;ECHO; return(LEFT_OP); }
"++"            { cnt+=yyleng;ECHO; return(INC_OP); }
"--"            { cnt+=yyleng;ECHO; return(DEC_OP); }
"->"            { cnt+=yyleng;ECHO; return(PTR_OP); }
"&&"            { cnt+=yyleng;ECHO; return(AND_OP); }
"||"            { cnt+=yyleng;ECHO; return(OR_OP); }
"<="            { cnt+=yyleng;ECHO; return(LE_OP); }
">="            { cnt+=yyleng;ECHO; return(GE_OP); }
"=="            { cnt+=yyleng;ECHO; return(EQ_OP); }
"!="            { cnt+=yyleng;ECHO; return(NE_OP); }
";"         { cnt+=yyleng;ECHO; return(';'); }
("{"|"<%")      { cnt+=yyleng;ECHO; return('{'); }
("}"|"%>")      { cnt+=yyleng;ECHO; return('}'); }
","         { cnt+=yyleng;ECHO; return(','); }
":"         { cnt+=yyleng;ECHO; return(':'); }
"="         { cnt+=yyleng;ECHO; return('='); }
"("         { cnt+=yyleng;ECHO; return('('); }
")"         { cnt+=yyleng;ECHO; return(')'); }
("["|"<:")      { cnt+=yyleng;ECHO; return('['); }
("]"|":>")      { cnt+=yyleng;ECHO; return(']'); }
"."         { cnt+=yyleng;ECHO; return('.'); }
"&"         { cnt+=yyleng;ECHO; return('&'); }
"!"         { cnt+=yyleng;ECHO; return('!'); }
"~"         { cnt+=yyleng;ECHO; return('~'); }
"-"         { cnt+=yyleng;ECHO; return('-'); }
"+"         { cnt+=yyleng;ECHO; return('+'); }
"*"         { cnt+=yyleng;ECHO; return('*'); }
"/"         { cnt+=yyleng;ECHO; return('/'); }
"%"         { cnt+=yyleng;ECHO; return('%'); }
"<"         { cnt+=yyleng;ECHO; return('<'); }
">"         { cnt+=yyleng;ECHO; return('>'); }
"^"         { cnt+=yyleng;ECHO; return('^'); }
"|"         { cnt+=yyleng;ECHO; return('|'); }
"?"         { cnt+=yyleng;ECHO; return('?'); }

[ ]         {cnt+=yyleng;ECHO;}
[\t\v\f]        { cnt+=yyleng; }
[\n]            {line++;cnt=1;}
.           { /* ignore bad characters */ }

%%
yywrap()
{
    return(1);
}
comment()
{
    char c, c1;
loop:
    while ((c = input()) != '*' && c != 0)
    {
        if(c=='\n') {line++;cnt=1;} 
        else    {cnt++;}
    }
        //putchar(c); PUTCHAR only if comments need to be shown! 
    if ((c1 = input()) != '/' && c1 != 0)
    {
        unput(c1);
        goto loop;
    }
}

cparser.yacc源代码

%{
#include <stdio.h>
#include <string.h>
#include "symbol_table.h"
extern FILE *yyin;
extern FILE *yyout;
extern int column;
extern int line;
extern int cnt;
extern char *yytext,tempid[100];
int temp,err,err1=0;

install()
{ 
    symrec *s;
    s = getsym (tempid);
    if (s == 0)
    s = putsym (tempid,temp);
    else 
    {
        printf(" VOID=1 ");
     printf(" CHAR=2 ");
     printf(" INT=3 ");
     printf(" FLOAT=4 ");
     printf(" DOUBLE=4 ");
        printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is already defined as %d\n\n",line,cnt,s->name,s->type );
        exit(0);    
    }
    err1=1;
}
int context_check()
{ 
    symrec *s;
    s = getsym(tempid); 
    if (s == 0 )
    {printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is an undeclared identifier\n\n",line,cnt,tempid);exit(0);return 0;}
    else
    return(s->type);
    err1=1;

}
type_err(int t1,int t2)
{
    if(t1&&t2)
    {
     printf(" VOID=1 ");
     printf(" CHAR=2 ");
     printf(" INT=3 ");
     printf(" FLOAT=4 ");
     printf(" DOUBLE=4 ");  
    printf( "\n\nThere is a Semantic error at Pos : %d : %d : Type mismatch for %s between %d and %d \n\n",line,cnt,tempid,t1,t2);
    err1=1;
    exit(0);    
    }   
}

%}



%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME SINGLE

%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token STRUCT UNION ENUM ELLIPSIS

%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%nonassoc LOWER_THAN_ELSE
%nonassoc ELSE

%start translation_unit
%%

primary_expression
    : IDENTIFIER    {$$=context_check();}
    | CONSTANT
    | STRING_LITERAL
    | '(' expression ')' {$$= $2;}
    ;

postfix_expression
    : primary_expression    {$$=$1;}
    | postfix_expression '[' expression ']'
    | postfix_expression '(' ')'
    | postfix_expression '(' argument_expression_list ')'
    | postfix_expression '.' IDENTIFIER 
    | postfix_expression PTR_OP IDENTIFIER
    | postfix_expression INC_OP
    | postfix_expression DEC_OP
    ;

argument_expression_list
    : assignment_expression
    | argument_expression_list ',' assignment_expression
    ;

unary_expression
    : postfix_expression    {$$=$1;}
    | INC_OP unary_expression
    | DEC_OP unary_expression
    | unary_operator cast_expression
    | SIZEOF unary_expression
    | SIZEOF '(' type_name ')'
    ;

unary_operator
    : '&'
    | '*'
    | '+'
    | '-'
    | '~'
    | '!'
    ;

cast_expression
    : unary_expression  {$$=$1;}
    | '(' type_name ')' cast_expression
    ;

multiplicative_expression
    : cast_expression   {$$=$1;}
    | multiplicative_expression '*' cast_expression
    | multiplicative_expression '/' cast_expression
    | multiplicative_expression '%' cast_expression
    ;

additive_expression
    : multiplicative_expression {$$=$1;}
    | additive_expression '+' multiplicative_expression
    | additive_expression '-' multiplicative_expression
    ;

shift_expression
    : additive_expression   {$$=$1;}
    | shift_expression LEFT_OP additive_expression
    | shift_expression RIGHT_OP additive_expression
    ;

relational_expression
    : shift_expression  {$$=$1;}
    | relational_expression '<' shift_expression
    | relational_expression '>' shift_expression
    | relational_expression LE_OP shift_expression
    | relational_expression GE_OP shift_expression
    ;

equality_expression
    : relational_expression {$$=$1;}
    | equality_expression EQ_OP relational_expression
    | equality_expression NE_OP relational_expression
    ;

and_expression
    : equality_expression   {$$=$1;}
    | and_expression '&' equality_expression
    ;

exclusive_or_expression
    : and_expression    {$$=$1;}
    | exclusive_or_expression '^' and_expression
    ;

inclusive_or_expression 
    : exclusive_or_expression   {$$=$1;}
    | inclusive_or_expression '|' exclusive_or_expression
    ;

logical_and_expression
    : inclusive_or_expression   {$$=$1;}
    | logical_and_expression AND_OP inclusive_or_expression
    ;

logical_or_expression
    : logical_and_expression    {$$=$1;}
    | logical_or_expression OR_OP logical_and_expression
    ;

conditional_expression
    : logical_or_expression {$$=$1;}
    | logical_or_expression '?' expression ':' conditional_expression
    ;

assignment_expression
    : conditional_expression    {$$=$1;}
    | unary_expression assignment_operator assignment_expression    {if($1!=$3){type_err($1,$3);}}
    ;

assignment_operator
    : '='
    | MUL_ASSIGN
    | DIV_ASSIGN
    | MOD_ASSIGN
    | ADD_ASSIGN
    | SUB_ASSIGN
    | LEFT_ASSIGN
    | RIGHT_ASSIGN
    | AND_ASSIGN
    | XOR_ASSIGN
    | OR_ASSIGN
    ;

expression
    : assignment_expression {$$=$1;}
    | expression ',' assignment_expression
    ;

constant_expression
    : conditional_expression
    ;

declaration
    : declaration_specifiers ';'
    | declaration_specifiers init_declarator_list ';'
    ;

declaration_specifiers
    : storage_class_specifier
    | storage_class_specifier declaration_specifiers
    | type_specifier
    | type_specifier declaration_specifiers
    | type_qualifier
    | type_qualifier declaration_specifiers
    ;

init_declarator_list
    : init_declarator
    | init_declarator_list ',' init_declarator
    ;

init_declarator
    : declarator
    | declarator '=' initializer
    ;

storage_class_specifier
    : TYPEDEF
    | EXTERN
    | STATIC
    | AUTO
    | REGISTER
    ;

type_specifier
    : VOID  {temp=1;}
    | CHAR  {temp=2;}
    | SHORT {temp=3;}
    | INT   {temp=3;}
    | LONG  {temp=3;}
    | FLOAT {temp=4;}
    | DOUBLE    {temp=4;}
    | SIGNED
    | UNSIGNED
    | struct_or_union_specifier
    | enum_specifier
    | TYPE_NAME
    ;

struct_or_union_specifier
    : struct_or_union IDENTIFIER '{' struct_declaration_list '}'    {install();}
    | struct_or_union '{' struct_declaration_list '}'
    | struct_or_union IDENTIFIER    {install();}
    ;

struct_or_union
    : STRUCT
    | UNION
    ;

struct_declaration_list
    : struct_declaration
    | struct_declaration_list struct_declaration
    ;

struct_declaration
    : specifier_qualifier_list struct_declarator_list ';'
    ;

specifier_qualifier_list
    : type_specifier specifier_qualifier_list
    | type_specifier
    | type_qualifier specifier_qualifier_list
    | type_qualifier
    ;

struct_declarator_list
    : struct_declarator
    | struct_declarator_list ',' struct_declarator
    ;

struct_declarator
    : declarator
    | ':' constant_expression
    | declarator ':' constant_expression
    ;

enum_specifier
    : ENUM '{' enumerator_list '}'
    | ENUM IDENTIFIER '{' enumerator_list '}'
    | ENUM IDENTIFIER
    ;

enumerator_list
    : enumerator
    | enumerator_list ',' enumerator
    ;

enumerator
    : IDENTIFIER    {context_check();}
    | IDENTIFIER '=' constant_expression    //{context_check();}
    ;

type_qualifier
    : CONST
    | VOLATILE
    ;

declarator
    : pointer direct_declarator
    | direct_declarator
    ;

direct_declarator
    : IDENTIFIER    {install();}
    | '(' declarator ')'
    | direct_declarator '[' constant_expression ']'
    | direct_declarator '[' ']'
    | direct_declarator '(' parameter_type_list ')'
    | direct_declarator '(' identifier_list ')'
    | direct_declarator '(' ')'
    ;

pointer
    : '*'
    | '*' type_qualifier_list
    | '*' pointer
    | '*' type_qualifier_list pointer
    ;

type_qualifier_list
    : type_qualifier
    | type_qualifier_list type_qualifier
    ;


parameter_type_list
    : parameter_list
    | parameter_list ',' ELLIPSIS
    ;

parameter_list
    : parameter_declaration
    | parameter_list ',' parameter_declaration
    ;

parameter_declaration
    : declaration_specifiers declarator
    | declaration_specifiers abstract_declarator
    | declaration_specifiers
    ;

identifier_list
    : IDENTIFIER    {install();}
    | identifier_list ',' IDENTIFIER    {install();}
    ;

type_name
    : specifier_qualifier_list
    | specifier_qualifier_list abstract_declarator
    ;

abstract_declarator
    : pointer
    | direct_abstract_declarator
    | pointer direct_abstract_declarator
    ;

direct_abstract_declarator
    : '(' abstract_declarator ')'
    | '[' ']'
    | '[' constant_expression ']'
    | direct_abstract_declarator '[' ']'
    | direct_abstract_declarator '[' constant_expression ']'
    | '(' ')'
    | '(' parameter_type_list ')'
    | direct_abstract_declarator '(' ')'
    | direct_abstract_declarator '(' parameter_type_list ')'
    ;

initializer
    : assignment_expression {$$=$1;}
    | '{' initializer_list '}'
    | '{' initializer_list ',' '}'
    ;

initializer_list
    : initializer
    | initializer_list ',' initializer
    ;

statement
    : labeled_statement
    | compound_statement
    | expression_statement
    | selection_statement
    | iteration_statement
    | jump_statement
    ;

labeled_statement
    : IDENTIFIER ':' statement  //{context_check();}
    | CASE constant_expression ':' statement
    | DEFAULT ':' statement
    ;

compound_statement
    : '{' '}'
    | '{' statement_list '}'
    | '{' declaration_list '}'
    | '{' declaration_list statement_list '}'
    ;

declaration_list
    : declaration
    | declaration_list declaration
    ;

statement_list
    : statement
    | statement_list statement
    ;

expression_statement
    : ';'
    | expression ';'
    ;

selection_statement
    : IF '(' expression ')' statement  %prec LOWER_THAN_ELSE ;

    | IF '(' expression ')' statement ELSE statement
    | SWITCH '(' expression ')' statement
    ;

iteration_statement
    : WHILE '(' expression ')' statement
    | DO statement WHILE '(' expression ')' ';'
    | FOR '(' expression_statement expression_statement ')' statement
    | FOR '(' expression_statement expression_statement expression ')' statement
    ;

jump_statement
    : GOTO IDENTIFIER ';'   //{context_check();}
    | CONTINUE ';'
    | BREAK ';'
    | RETURN ';'
    | RETURN expression ';'
    ;

translation_unit
    : external_declaration
    | translation_unit external_declaration
    ;

external_declaration
    : function_definition
    | declaration
    ;

function_definition
    : declaration_specifiers declarator declaration_list compound_statement
    | declaration_specifiers declarator compound_statement
    | declarator declaration_list compound_statement
    | declarator compound_statement
    ;
%%
yyerror(s)
char *s;
{
    fflush(stdout);err=1;
    printf("Syntax error at Pos : %d : %d\n",line,cnt);
    exit(0);
    //printf("\n%*s\n%*s\n", column, "^", column, s);
}
main(argc,argv)
int argc;
char **argv;
{

    char *fname;    
    ++argv,--argc;/*skip program name*/
    if(argc>0)
    {
        yyin=fopen(argv[0],"r");
        fname=argv[0];
        strcat(fname,"_output");
        yyout=fopen(fname,"w");
    }
    else
    {
        printf("Please give the c filename as an argument.\n");
    }
    yyparse();
    if(err==0)
    printf("No Syntax errors found!\n");
    fname=argv[0];strcat(fname,"_symbol-table");
    FILE *sym_tab=fopen(fname,"w");
    fprintf(sym_tab,"Type\tSymbol\n");
    symrec *ptr;    
    for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
    {
        fprintf(sym_tab,"%d\t%s\n",ptr->type,ptr->name);
    }
    fclose(sym_tab);    

}   

符号table.h源代码

#define t_void  1
#define t_char  2
#define t_int   3
#define t_float 4
struct symrec
{
    char *name;
    int type;
    struct symrec *next;
};
typedef struct symrec symrec;
symrec *sym_table = (symrec *)0;
symrec *putsym();
symrec *getsym();
symrec *putsym(char *sym_name,int sym_type)
{
    symrec *ptr;
    ptr=(symrec *)malloc(sizeof(symrec));
    ptr->name=(char *)malloc(strlen(sym_name)+1);
    strcpy(ptr->name,sym_name);
    ptr->type=sym_type;
    ptr->next=(struct symrec *)sym_table;
    sym_table=ptr;
    return ptr;
}
symrec *getsym(char *sym_name)
{
    symrec *ptr;
    for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
    if(strcmp(ptr->name,sym_name)==0)
    return ptr;
    return 0;
}

1 个答案:

答案 0 :(得分:2)

一般而言,当您进行赋值操作时,需要检查左操作数以确保其为左值,如果不是则发出错误。这通常是作为类型检查的一部分完成的 - 您保留有关值的属性(例如,是否为左值)以及类型,并检查这些属性对于每次使用值是否正确。

所以你可以做的是使用%union来定义一个可以保存这个信息的解析器值对象:

%union {
    struct {
        Type  *type;
        int   is_lvalue;
    } valinfo;
}
%type<valinfo> assignment_expression unary_expression

然后,您的作业规则将与类型一起检查:

assignment_expression:
    unary_expression assignment_operator assignment_expression {
        if (!$1.is_lvalue)
            error("assigning to non-lvalue");
        if ($1.type != $3.type && !type_is_implicitly_convertable($3.type, $1.type))
            error("type mismatch in assignment");
        $$.type = $1. type;
        $$.is_lvalue = 0; }

请注意,您需要确保在每个规则操作中正确设置$$,该操作可能会使某些其他规则操作使用其值;你的代码没有这样做,所以很可能不会做任何有用的事情。