我试图在lex和yacc中构建我的编译,以获得新的语言名称CSimple。 (这是该语言的手册:http://www.cs.ucsb.edu/~chris/teaching/cs160/projects/language.html)
我需要预先打印解析语法树给定输入,如:
x=x+4;
并且 -
procedure foo(i,j :integer) return integer { return 0;}
依旧......
对于这个输入,它只是打印lexamas并且编译器错误总是说:
50, x20, =50, x30, +56, 439, ; //lexemas for the first input
4, procedure7650, foo35, (50, i42, ,50, j61, :7, integer36, )7615, return767, integer7640, {7615, return7656, 039, ;41, } //lexemas for the second input
line 1: syntax error, unexpected $end, expecting FUNCTION //The error
这是我的lex文件:
%{
#include <stdlib.h>
#include <stdio.h>
#define YYDEBUG 0
void yyerror(const char *);
extern char* yytext;
char* func (char* token, int index);
%}
%%
boolean {func("BOOLEAN", 1);}
true {func("TRUE", 2);}
false {func("FALSE", 3);}
procedure {func("FUNCTION", 4);}
float {func("FLOAT", 5);}
char {func("CHAR", 6);}
integer {func("INT", 7);}
string {func("STRING", 8);}
intptr {func("INTPTR", 9);}
charptr {func("CHARPTR", 10);}
if {func("COND", 11);}
else {func("BLOCK", 12);}
while {func("WHILE_COND", 13);}
var {func("VARIABLE", 14);}
return {func("RETURN", 15);}
null {func("NL", 16);}
\&\& {func("AND", 17);}
\/ {func("DIVISION_OP", 18);}
\/\%.*\%\/ {func("COMMENT", 19);}
\= {func("ASSIGN", 20);}
\=\= {func("EQUAL", 21);}
\> {func("BIGGER_THEN", 22);}
\>\= {func("BIGGER_OR_EQUAL", 23);}
\< {func("SMALLER_THEN", 24);}
\<\= {func("SMALLER_OR_EQUAL", 25);}
\- {func("MINUS", 26);}
\! {func("LOGICAL_NOT", 27);}
\!\= {func("NOT_EQUAL", 28);}
\|\| {func("OR", 29);}
\+ {func("PLUS", 30);}
\* {func("MUL", 31);}
\& {func("ADDRESS_OF", 32);}
\^ {func("DEREFERANCE", 33);}
\^\^ {func("SYNTAX_ERROR", 34);}
\( {func("L_BRACKET", 35);}
\) {func("R_BRACKET", 36);}
\[ {func("L_STRING_INDEX", 37);}
\] {func("R_STRING_INDEX", 38);}
\; {func("EOS", 39);}
\{ {func("OB", 40);}
\} {func("CB", 41);}
\, {func("COMMA", 42);}
\: {func("VAR_DEC", 43);}
\_ {func("UNDERSCORE", 44);}
\|[\-]*[0-9]+\| {func("ABSULUTE_VALUE_OF_INT", 45);}
\|[a-zA-Z0-9]+\| {func("DECLARED_LENGTH_OF_STRING", 46);}
\&[0-9]* {func("LINKER_ERROR", 47);}
\&[a-zA-Z]+[\+|\-|\*|\/][a-zA-Z]+ {func("LINKER_ERROR", 48);}
\&[^STRING_TYPE\[0-9]+\]] {func("LINKER_ERROR", 49);}
[a-zA-Z]+[_]*[a-zA-Z0-9]* {func("IDENTIFIER", 50);}
[\"][a-zA-Z0-9]+[\"] {func("STRING_TYPE", 51);}
[\'].[\'] {func("CHAR_TYPE", 52);}
[\']..+[\'] {func("SYNTAX_ERROR", 53);}
[0-9]+[\.][0-9]+ {func("FLOAT_CONST", 54);}
[\-][0-9]+[\.][0-9]+ {func("FLOAT_CONST", 55);}
0|[1-9]+[0-9]* {func("INTEGER_CONST", 56);}
[\-][1-9]+[0-9]* {func("INTEGER_CONST", 57);}
0[x|X][0-9]+[a-fA-F0-9]*[a-fA-F0-9]* {func("HEX_NUMBER", 58);}
[0][^xX][1-7]+[0-7]* {func("OCTAL_NUMBER", 59);}
[0|1]+[b] {func("BINARY_NUMBER", 60);}
[^IDENTIFIER][\:] {func("SYNTAX_ERROR", 61);}
[IDENTIFIER\,]*[IDENTIFIER\:] {func("PARAMETER_LIST", 62);}
\([.*]\)\[[.*]\] {func("SYNTAX_ERROR", 63);}
[^[[IDENTIFIER|string\[integer\]|[\^][a-zA-Z]]+]][\=] {func("TYPE_MISMATCH_ERROR", 64);}
[a-zA-Z]+[=][a-zA-Z]+[=] {func("SYNTAX_ERROR", 65);}
\"\m\a\i\n\(\)\" {func("CASE_SENSETIVE_ERROR", 66);}
\([^[\)]] {func("SYNTAX_ERROR", 67);}
\{[^[\}]] {func("SYNTAX_ERROR", 68);}
if|while[^\(] {func("SYNTAX_ERROR", 69);}
else[^\{] {func("SYNTAX_ERROR", 70);}
procedure[^[[IDENTIFIER][\(][PARAMETER_LIST]*[\)]return[boolean|char|integer|intptr|charptr][\{]]] {func("FUNC_DECL_ERROR", 71);}
[PARAMETER_LIST][boolean|char|integer|intptr|charptr][\;] {func("DECL_LIST", 72);}
[IDENTIFIER\:][boolean|char|integer|intptr|charptr|string[INTEGER_CONST]][\;] {func("DECL", 73);}
var[^[DECL_LIST|DECL]] {func("DECL_ERROR", 74);}
return[^[[true|false|CHAR_TYPE|INTEGER_CONST][\;]]] {func("RETURN_ERROR", 75);}
[ ]+ {printf("76");}
--[^ \n\;\:\[\]\{\}\(\)\,]+ {func("SYNTAX_ERROR", 77);}
%%
char* func (char* token, int index)
{
printf("%d, %s", index, yytext);
return token;
}
int yywrap(void) {
return 1;
}
这是yacc文件:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int yylex(void);
void yyerror(const char *);
typedef struct node{
char *token;
int line_number;
struct node *left;
struct node *right;
} node;
#define YYSTYPE struct node *
node *mknode(node *left, node *right, char *token);
void printtree(node *tree);
int yacc_line_number = 1;
%}
%error-verbose
%start program
%token BOOLEAN TRUE FALSE FUNCTION FLOAT CHAR INT STRING INTPTR CHARPTR DECL
%token L_BRACKET COND WHILE_COND BLOCK VARIABLE RETURN OB CB EOS FUNC_DECL_ERROR
%token AND DIVISION_OP COMMENT ASSIGN EQUAL BIGGER_THEN BIGGER_OR_EQUAL DECL_ERROR
%token SMALLER_THEN SMALLER_OR_EQUAL MINUS LOGICAL_NOT NOT_EQUAL OR PLUS DECL_LIST
%token MUL ADDRESS_OF SEMANTIC_ERROR DEREFERANCE SYNTAX_ERROR PARAMETER_LIST
%token R_BRACKET L_STRING_INDEX R_STRING_INDEX COMMA VAR_DEC NL TYPE_MISMATCH_ERROR
%token UNDERSCORE ABSULUTE_VALUE_OF_INT DECLARED_LENGTH_OF_STRING IDENTIFIER
%token STRING_TYPE CHAR_TYPE FLOAT_CONST INTEGER_CONST HEX_NUMBER OCTAL_NUMBER
%token BINARY_NUMBER CASE_SENSETIVE_ERROR LINKER_ERROR IFX RETURN_ERROR
%left EOS
//%right ASSIGN
%nonassoc IFX
%nonassoc BLOCK
%%
program:method_declarations {printtree($1);};
method_declarations:method_declaration {$$=$1;}
|method_declarations method_declaration {$$ = mknode($1,$2,"");};
method_declaration:FUNCTION IDENTIFIER L_BRACKET R_BRACKET RETURN type OB statement_block CB {$1->left=$2; $1->right=$8; $$=$1;}
|FUNCTION IDENTIFIER L_BRACKET PARAMETER_LIST type R_BRACKET RETURN type OB statement_block CB {$1->left=$2; $1->right=$10; $$=$1;};
type: BOOLEAN {$$=$1;} | CHAR {$$=$1;} | CHARPTR {$$=$1;} | INTPTR {$$=$1;} | INT {$$=$1;};
statement_block: /* none */ {$$ = 0;} | statement_block statement {$$ = mknode($1,$2,"");};
statement: simple_statement EOS {$$=$1;} | compound_statement {$$=$1;} | OB statement_block CB {$$=$2;};
simple_statement: declarative_statement {$$=$1;}| assignment_statement {$$=$1;};
declarative_statement: VARIABLE IDENTIFIER dec_statement {$1->left = $2; $1->right=$3; $$=$1;};
dec_statement: VAR_DEC type EOS {$$=$2;};
assignment_statement: IDENTIFIER {$$=$1;} | IDENTIFIER ASSIGN expression { $2->left=$1; $2->right=$3; $$=$2;};
expression: or_expression {$$=$1;};
or_expression: and_expression {$$=$1;} | or_expression OR and_expression {$2->left=$1; $2->right=$3; $$=$2;};
and_expression: relop_expression {$$=$1;} | and_expression AND relop_expression {$2->left=$1; $2->right=$3; $$=$2;};
relop_expression: ltgt_expression {$$=$1;} | relop_expression NOT_EQUAL ltgt_expression {$2->left=$1;$2->right=$3; $$=$2;} | relop_expression EQUAL ltgt_expression {$2->left=$1;$2->right=$3; $$=$2;};
ltgt_expression: addop_expression {$$=$1;} | ltgt_expression BIGGER_THEN addop_expression {$2->left=$1; $2->right=$3; $$=$2;} | ltgt_expression SMALLER_THEN addop_expression {$2->left=$1; $2->right=$3; $$=$2;} | ltgt_expression BIGGER_OR_EQUAL addop_expression {$2->left=$1; $2->right=$3; $$=$2;} | ltgt_expression SMALLER_OR_EQUAL addop_expression {$2->left=$1; $2->right=$3; $$=$2;};
addop_expression: mulop_expression {$$=$1;} | addop_expression PLUS mulop_expression {$2->left=$1; $2->right=$3; $$=$2;} | addop_expression MINUS mulop_expression {$2->left=$1; $2->right=$3; $$=$2;};
mulop_expression: term {$$=$1;} | mulop_expression MUL term {$2->left=$1; $2->right=$3; $$=$2;} | mulop_expression DIVISION_OP term {$2->left=$1; $2->right=$3; $$=$2;};
term: LOGICAL_NOT value {$1->left=$2; $$=$1;} | PLUS value {$1->left=$2; $$=$1;} | MINUS value {$1->left=$2; $$=$1;} | value {$$=$1;};
value: IDENTIFIER {$$=$1;} | STRING_TYPE {$$=$1;} | CHAR_TYPE {$$=$1;} | FLOAT_CONST {$$=$1;} | HEX_NUMBER {$$=$1;} | INTEGER_CONST {$$=$1;} | OCTAL_NUMBER {$$=$1;} | BINARY_NUMBER {$$=$1;} | TRUE {$$=$1;} | FALSE {$$=$1;} | L_BRACKET expression R_BRACKET {$$=$2;};
compound_statement: if_statement {$$=$1;} | l_statement {$$=$1;};
if_statement: COND L_BRACKET expression R_BRACKET statement %prec IFX { $1->left=$3; $1->right=$5; $$=$1;}
| COND L_BRACKET expression R_BRACKET statement BLOCK statement {$1->left=$3; $1->right = mknode($5,$6,""); $6->left=$7; $$=$1;};
l_statement: while_statement {$$=$1;};
while_statement: WHILE_COND L_BRACKET expression R_BRACKET statement {$1->left=$3; $1->right=$5; $$=$1;};
%%
#include "lex.yy.c"
int main (void) {yyparse(); return 0;}
node *mknode(node *left, node *right, char *token)
{
node *newnode = (node *)malloc(sizeof(node));
char *newstr = (char *)malloc(sizeof(token)+1);
strcpy(newstr,token);
newnode->left = left;
newnode->right = right;
newnode->token = newstr;
return(newnode);
}
void printtree(node *tree)
{
int i;
static int line1 = 0;
if(!tree){
return;
}
if (tree->line_number > yacc_line_number){
printf("\nLine(%d)",tree->line_number);
yacc_line_number = tree->line_number;
}
if (tree->left || tree->right){
if (tree->line_number == 1 && !line1){
printf("\nLine(%d)",tree->line_number);
line1 = 1;
}
if (tree->line_number>0) {
printf("\n");
for(i = 0; i < tree->line_number; i++){
printf("_");
}
printf("(");
}
}
printf(" %s ",tree->token);
if (tree->left){
printtree(tree->left);
}
if (tree->right){
printtree(tree->right);
}
if (tree->left || tree->right){
printf(")");
}
}
extern int yylineno;
void yyerror(const char *s) {
fprintf(stderr, "line %d: %s\n", yylineno, s);
}
需要帮助:)谢谢。
答案 0 :(得分:2)
你的词法分析器识别标记并打印它们,但从不将它们返回到解析器,因此它读取整个输入,打印标记,然后将$end
(EOF,0)标记返回给解析器。解析器会看到该令牌并提供语法错误,因为它在输入中至少需要一个method_declaration
。
你想要的是让你的词法分析器在识别它们时返回标记,而不是继续阅读更多的标记。解析器将重复调用它,每次都期望下一个令牌。您的解析器也设置为node *
中词法分析器设置的yylval
值。所以你需要像以下那样的lex规则:
boolean { yylval = mknode(0, 0, "BOOLEAN"); return BOOLEAN; }
true { yylval = mknode(0, 0, "TRUE"); return TRUE; }
false { yylval = mknode(0, 0, "FALSE"); return FALSE; }
:
等等。