Question

我试图在lex和yacc中构建我的编译，以获得新的语言名称CSimple。（这是该语言的手册：http://www.cs.ucsb.edu/~chris/teaching/cs160/projects/language.html）

我需要预先打印解析语法树给定输入，如：

x=x+4;

并且 -

procedure foo(i,j :integer) return integer { return 0;}

依旧......

对于这个输入，它只是打印lexamas并且编译器错误总是说：

50, x20, =50, x30, +56, 439, ; //lexemas for the first input 
4, procedure7650, foo35, (50, i42, ,50, j61,  :7, integer36, )7615, return767, integer7640, {7615, return7656, 039, ;41, } //lexemas for the second input 
line 1: syntax error, unexpected $end, expecting FUNCTION //The error

这是我的lex文件：

%{
#include <stdlib.h>
#include <stdio.h>

#define YYDEBUG 0
void yyerror(const char *);
extern char* yytext;
char* func (char* token, int index);
%}

%%
boolean {func("BOOLEAN", 1);}
true {func("TRUE", 2);}
false {func("FALSE", 3);}
procedure {func("FUNCTION", 4);}
float {func("FLOAT", 5);}
char {func("CHAR", 6);}
integer {func("INT", 7);}
string {func("STRING", 8);}
intptr {func("INTPTR", 9);}
charptr {func("CHARPTR", 10);}
if {func("COND", 11);}
else {func("BLOCK", 12);}
while {func("WHILE_COND", 13);}
var {func("VARIABLE", 14);}
return {func("RETURN", 15);}
null {func("NL", 16);}

\&\& {func("AND", 17);}
\/ {func("DIVISION_OP", 18);}
\/\%.*\%\/ {func("COMMENT", 19);} 
\= {func("ASSIGN", 20);}
\=\= {func("EQUAL", 21);}
\> {func("BIGGER_THEN", 22);}
\>\= {func("BIGGER_OR_EQUAL", 23);}
\< {func("SMALLER_THEN", 24);}
\<\= {func("SMALLER_OR_EQUAL", 25);}
\- {func("MINUS", 26);}
\! {func("LOGICAL_NOT", 27);}
\!\= {func("NOT_EQUAL", 28);}
\|\| {func("OR", 29);}
\+ {func("PLUS", 30);}
\* {func("MUL", 31);}
\& {func("ADDRESS_OF", 32);}
\^ {func("DEREFERANCE", 33);}
\^\^ {func("SYNTAX_ERROR", 34);}
\( {func("L_BRACKET", 35);}
\) {func("R_BRACKET", 36);}
\[ {func("L_STRING_INDEX", 37);}
\] {func("R_STRING_INDEX", 38);}
\; {func("EOS", 39);}
\{ {func("OB", 40);}
\} {func("CB", 41);}
\, {func("COMMA", 42);}
\: {func("VAR_DEC", 43);}
\_ {func("UNDERSCORE", 44);}
\|[\-]*[0-9]+\| {func("ABSULUTE_VALUE_OF_INT", 45);}
\|[a-zA-Z0-9]+\| {func("DECLARED_LENGTH_OF_STRING", 46);}
\&[0-9]* {func("LINKER_ERROR", 47);}
\&[a-zA-Z]+[\+|\-|\*|\/][a-zA-Z]+ {func("LINKER_ERROR", 48);}
\&[^STRING_TYPE\[0-9]+\]] {func("LINKER_ERROR", 49);}

[a-zA-Z]+[_]*[a-zA-Z0-9]* {func("IDENTIFIER", 50);}
[\"][a-zA-Z0-9]+[\"] {func("STRING_TYPE", 51);}
[\'].[\'] {func("CHAR_TYPE", 52);}
[\']..+[\'] {func("SYNTAX_ERROR", 53);}
[0-9]+[\.][0-9]+ {func("FLOAT_CONST", 54);}
[\-][0-9]+[\.][0-9]+ {func("FLOAT_CONST", 55);}
0|[1-9]+[0-9]* {func("INTEGER_CONST", 56);}
[\-][1-9]+[0-9]* {func("INTEGER_CONST", 57);}
0[x|X][0-9]+[a-fA-F0-9]*[a-fA-F0-9]* {func("HEX_NUMBER", 58);}
[0][^xX][1-7]+[0-7]* {func("OCTAL_NUMBER", 59);}
[0|1]+[b] {func("BINARY_NUMBER", 60);}
[^IDENTIFIER][\:] {func("SYNTAX_ERROR", 61);}
[IDENTIFIER\,]*[IDENTIFIER\:] {func("PARAMETER_LIST", 62);}

\([.*]\)\[[.*]\] {func("SYNTAX_ERROR", 63);}
[^[[IDENTIFIER|string\[integer\]|[\^][a-zA-Z]]+]][\=] {func("TYPE_MISMATCH_ERROR", 64);}
[a-zA-Z]+[=][a-zA-Z]+[=] {func("SYNTAX_ERROR", 65);}
\"\m\a\i\n\(\)\" {func("CASE_SENSETIVE_ERROR", 66);}
\([^[\)]] {func("SYNTAX_ERROR", 67);} 
\{[^[\}]] {func("SYNTAX_ERROR", 68);}
if|while[^\(] {func("SYNTAX_ERROR", 69);}
else[^\{] {func("SYNTAX_ERROR", 70);}

procedure[^[[IDENTIFIER][\(][PARAMETER_LIST]*[\)]return[boolean|char|integer|intptr|charptr][\{]]] {func("FUNC_DECL_ERROR", 71);}
[PARAMETER_LIST][boolean|char|integer|intptr|charptr][\;] {func("DECL_LIST", 72);}
[IDENTIFIER\:][boolean|char|integer|intptr|charptr|string[INTEGER_CONST]][\;] {func("DECL", 73);}
var[^[DECL_LIST|DECL]] {func("DECL_ERROR", 74);}
return[^[[true|false|CHAR_TYPE|INTEGER_CONST][\;]]] {func("RETURN_ERROR", 75);}
[ ]+ {printf("76");}
--[^ \n\;\:\[\]\{\}\(\)\,]+ {func("SYNTAX_ERROR", 77);}
%%
char* func (char* token, int index)
{
 printf("%d, %s", index, yytext);
 return token;
}

int yywrap(void) {
    return 1;
}

这是yacc文件：

%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int yylex(void);
void yyerror(const char *);

typedef struct node{
        char *token;
    int line_number;
        struct node *left;
        struct node *right;
} node;

#define YYSTYPE struct node *

node *mknode(node *left, node *right, char *token);
void printtree(node *tree);
int yacc_line_number = 1;

%}

%error-verbose

%start program

%token BOOLEAN TRUE FALSE FUNCTION FLOAT CHAR INT STRING INTPTR CHARPTR DECL
%token L_BRACKET COND WHILE_COND BLOCK VARIABLE RETURN OB CB EOS FUNC_DECL_ERROR
%token AND DIVISION_OP COMMENT ASSIGN EQUAL BIGGER_THEN BIGGER_OR_EQUAL DECL_ERROR
%token SMALLER_THEN SMALLER_OR_EQUAL MINUS LOGICAL_NOT NOT_EQUAL OR PLUS DECL_LIST
%token MUL ADDRESS_OF SEMANTIC_ERROR DEREFERANCE SYNTAX_ERROR PARAMETER_LIST
%token R_BRACKET L_STRING_INDEX R_STRING_INDEX COMMA VAR_DEC NL TYPE_MISMATCH_ERROR
%token UNDERSCORE ABSULUTE_VALUE_OF_INT DECLARED_LENGTH_OF_STRING IDENTIFIER
%token STRING_TYPE CHAR_TYPE FLOAT_CONST INTEGER_CONST HEX_NUMBER OCTAL_NUMBER
%token BINARY_NUMBER CASE_SENSETIVE_ERROR LINKER_ERROR IFX RETURN_ERROR
%left EOS
//%right ASSIGN   

%nonassoc IFX
%nonassoc BLOCK

%%
program:method_declarations {printtree($1);};
method_declarations:method_declaration {$$=$1;}
                   |method_declarations method_declaration {$$ = mknode($1,$2,"");};

method_declaration:FUNCTION IDENTIFIER L_BRACKET R_BRACKET RETURN type OB statement_block CB {$1->left=$2; $1->right=$8; $$=$1;}
                   |FUNCTION IDENTIFIER L_BRACKET PARAMETER_LIST type R_BRACKET RETURN type OB statement_block CB {$1->left=$2; $1->right=$10; $$=$1;};
type: BOOLEAN {$$=$1;} | CHAR {$$=$1;} | CHARPTR {$$=$1;} | INTPTR {$$=$1;} | INT {$$=$1;};

statement_block: /* none */ {$$ = 0;} | statement_block statement {$$ = mknode($1,$2,"");};
statement: simple_statement EOS {$$=$1;} | compound_statement {$$=$1;} | OB statement_block CB {$$=$2;};

simple_statement: declarative_statement {$$=$1;}| assignment_statement {$$=$1;};
declarative_statement: VARIABLE IDENTIFIER dec_statement {$1->left = $2; $1->right=$3; $$=$1;};
dec_statement: VAR_DEC type EOS {$$=$2;};
assignment_statement: IDENTIFIER {$$=$1;} | IDENTIFIER ASSIGN expression { $2->left=$1; $2->right=$3; $$=$2;};

expression: or_expression {$$=$1;};
or_expression: and_expression {$$=$1;} | or_expression OR and_expression {$2->left=$1; $2->right=$3; $$=$2;};
and_expression: relop_expression {$$=$1;} | and_expression AND relop_expression {$2->left=$1; $2->right=$3; $$=$2;};
relop_expression: ltgt_expression {$$=$1;} | relop_expression NOT_EQUAL ltgt_expression {$2->left=$1;$2->right=$3; $$=$2;} | relop_expression EQUAL ltgt_expression {$2->left=$1;$2->right=$3; $$=$2;};
ltgt_expression: addop_expression {$$=$1;} | ltgt_expression BIGGER_THEN addop_expression {$2->left=$1; $2->right=$3; $$=$2;} | ltgt_expression SMALLER_THEN addop_expression {$2->left=$1; $2->right=$3; $$=$2;} | ltgt_expression BIGGER_OR_EQUAL addop_expression {$2->left=$1; $2->right=$3; $$=$2;} | ltgt_expression SMALLER_OR_EQUAL addop_expression {$2->left=$1; $2->right=$3; $$=$2;};
addop_expression: mulop_expression {$$=$1;} | addop_expression PLUS mulop_expression {$2->left=$1; $2->right=$3; $$=$2;} | addop_expression MINUS mulop_expression {$2->left=$1; $2->right=$3; $$=$2;};

mulop_expression: term {$$=$1;} | mulop_expression MUL term {$2->left=$1; $2->right=$3; $$=$2;} | mulop_expression DIVISION_OP term {$2->left=$1; $2->right=$3; $$=$2;};

term: LOGICAL_NOT value {$1->left=$2; $$=$1;} | PLUS value {$1->left=$2; $$=$1;} | MINUS value {$1->left=$2; $$=$1;} | value {$$=$1;};
value: IDENTIFIER {$$=$1;} | STRING_TYPE {$$=$1;} | CHAR_TYPE {$$=$1;} | FLOAT_CONST {$$=$1;} | HEX_NUMBER {$$=$1;} | INTEGER_CONST {$$=$1;} | OCTAL_NUMBER {$$=$1;} | BINARY_NUMBER {$$=$1;} | TRUE {$$=$1;} | FALSE {$$=$1;} | L_BRACKET expression R_BRACKET {$$=$2;};

compound_statement: if_statement {$$=$1;} | l_statement {$$=$1;};
if_statement: COND L_BRACKET expression R_BRACKET statement %prec IFX { $1->left=$3; $1->right=$5; $$=$1;}
            | COND L_BRACKET expression R_BRACKET statement BLOCK statement {$1->left=$3; $1->right = mknode($5,$6,""); $6->left=$7; $$=$1;};
l_statement: while_statement {$$=$1;};
while_statement: WHILE_COND L_BRACKET expression R_BRACKET statement {$1->left=$3; $1->right=$5; $$=$1;};

%%
#include "lex.yy.c"

int main (void) {yyparse(); return 0;}

node *mknode(node *left, node *right, char *token)
{
 node *newnode = (node *)malloc(sizeof(node));
 char *newstr = (char *)malloc(sizeof(token)+1);
 strcpy(newstr,token);
 newnode->left = left;    
 newnode->right = right;
 newnode->token = newstr;
 return(newnode);
}

void printtree(node *tree)
{
 int i; 
 static int line1 = 0;
 if(!tree){
  return;
 }
 if (tree->line_number > yacc_line_number){
   printf("\nLine(%d)",tree->line_number);
   yacc_line_number = tree->line_number;
 }
 if (tree->left || tree->right){
     if (tree->line_number == 1 && !line1){
        printf("\nLine(%d)",tree->line_number);
        line1 = 1;
     }
     if (tree->line_number>0) { 
       printf("\n");
            for(i = 0; i < tree->line_number; i++){
               printf("_");
                }
            printf("(");
      }
        }

        printf(" %s ",tree->token);

        if (tree->left){
          printtree(tree->left);
        }

        if (tree->right){
          printtree(tree->right);
        }

        if (tree->left || tree->right){
          printf(")");
        }
}

extern int yylineno;

void yyerror(const char *s) {
    fprintf(stderr, "line %d: %s\n", yylineno, s);
}

需要帮助:)谢谢。

Answer 1

你的词法分析器识别标记并打印它们，但从不将它们返回到解析器，因此它读取整个输入，打印标记，然后将$end（EOF，0）标记返回给解析器。解析器会看到该令牌并提供语法错误，因为它在输入中至少需要一个method_declaration。

你想要的是让你的词法分析器在识别它们时返回标记，而不是继续阅读更多的标记。解析器将重复调用它，每次都期望下一个令牌。您的解析器也设置为node *中词法分析器设置的yylval值。所以你需要像以下那样的lex规则：

boolean { yylval = mknode(0, 0, "BOOLEAN"); return BOOLEAN; }
true    { yylval = mknode(0, 0, "TRUE");    return TRUE; }
false   { yylval = mknode(0, 0, "FALSE");   return FALSE; }
    :

等等。

Lex / Yacc：＆＃34;第1行：语法错误，意外的$ end，期待FUNCTION＆＃34; lex编译错误

1 个答案: