我正在尝试用bison和lex创建一个c类型的解析器。是的,这是一个学校作业,但我太丢了,我在网上上学,所以我得不到多少帮助。我需要这个来将信息解析成一个表,但我很确定我的parse.y文件中有错误,因为它没有将信息加载到表中,它表示没有代码。我知道我的.y文件缺少一些语法,但我认为其中一些应该加载到符号表中。
运行make文件时的输出:
default:
clear
yacc -d -v --debug parse.y
lex -l scan.l
gcc -o cfc symtab.c lex.yy.c y.tab.c
clean:
$(RM) cfc *.o lex.yy.c parse.tab.c y.tab.h y.output dump.symtab
输出到屏幕:
> yacc -d -v --debug parse.y parse.y:39 parser name defined to default
> :"parse" conflicts: 2 reduce/reduce lex -l scan.l gcc -o cfc symtab.c
> lex.yy.c y.tab.c
通过解析器传递60.cf:
> semantic error cnt: 0 lines of code: -1
解析器的结果是符号表为空,输出结果如下:
> Starting parse
> Entering state 0
> Reading a token: Next token is 292 (INT)
> parse error: line 0: Shifting error token, Entering state 1
> Reducing via rule 3 (line 57), error -> block
加载60.cf的示例代码:
int foo()
{
while ( i >= 0 ) {
i = i * 1;
c = c + 1;
}
if ( c >= 0 )
{ ; }
else
{ i = 7;
c = i;
}
return 0;
}
这是scan.l文件:
%option yylineno
%{
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "symtab.h"
#include "y.tab.h"
int badtoken_cnt = 0;
int token_cnt = 0;
int col_cnt = 0;
int lineno = 0;
%}
comment \/\*([^*]|\n)*\*\/
digit [0-9]
ichar [A-Z_a-z]
integer {digit}+
newline \n
strchar ([ ~]|\\n)
identifier {ichar}([0-9]|{ichar})*
whitespace [ \t]+
float ([+-]?{digit}+)?\.{digit}*(e?[+-]?{digit}+)?
chrliteral '([!*]|\\n)'
nullstring \"\"
escquote [^"]*\\\"[^"]*
strliteral \"[^"]*{escquote}*\"
%%
"if" { return IF;}
"then" { return THEN;}
"else" { return ELSE;}
"while" { return WHILE;}
"return" { return RETURN;}
"break" { return GOTO;}
"goto" { return GOTO;}
"read" { return READ;}
"write" { return WRITE;}
"float" { return REAL;}
"int" { return INT;}
"void" { return VOID;}
"char" { return CHAR;}
"=" { return ASSIGN;}
"!=" { return NE;}
"==" { return EQ;}
"<" { return LT;}
"<=" { return LE;}
">" { return GT;}
">=" { return GE;}
"&&" { return AND;}
"||" { return OR;}
"+" { return PLUS;}
"-" { return MINUS;}
"*" { return TIMES;}
"/" { return OVER;}
"%" { return MOD;}
"{" { return LBRACE;}
"}" { return RBRACE;}
"[" { return LBRACK;}
"]" { return RBRACK;}
"(" { return LPAREN;}
")" { return RPAREN;}
";" { return SEMI;}
"," { return COMMA;}
{float} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, REAL_TYPE, lineno);
printf("yytext: %s\n",yytext);
return FLOAT;
}
{integer} {
yylval.tokname = malloc(sizeof(yytext));
printf("yylval: %s\n",yylval.tokname);
strncpy(yylval.tokname,yytext,yyleng);
insert(yytext, yyleng, INT_TYPE, lineno);
printf("yytext: %s\n",yytext);
return INTEGER;
}
{chrliteral} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, -1, lineno);
printf("yytext: %s\n",yytext);
return CHRLIT;
}
{nullstring} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, -1, lineno);
printf("yytext: %s\n",yytext);
return STRLIT;
}
{strliteral} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, STR_TYPE, lineno);
printf("yytext: %s\n",yytext);
return STRLIT;
}
{identifier} {
return IDENT;
}
{newline} { col_cnt = 1; }
{whitespace} { col_cnt+=yyleng; }
{comment} { col_cnt = 0; }
"//" { /* handle C++ style comments */
char c;
do { c = input();
} while (c != '\n');
lineno++;
}
. { return ERROR;}
%%
这是parse.y文件,我相信错误的地方是:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "symtab.h"
#define DEBUG 0
#define TRUE 1
#define FALSE 0
#define MAX_MSG_LEN 50
#define YYDEBUG 1
int errcnt = 0;
char errmsg[40];
extern char *yytext;
extern FILE *yyin;
extern FILE *yyout;
extern int yyparse();
extern int lineno;
int yydebug = 1;
int t;
%}
/* no warning for fewer than 1 shift/reduce conflicts and 0 reduce/reduce */
%expect 1
%union { int tokid;
char *tokname;
}
%token <tokname> IDENT NUMBER
%token <tokid> ASSIGN PLUS LBRACE RBRACE LPAREN RPAREN SEMI ERROR FLOAT INTEGER
/* ADDED */
%token <tokid> IF THEN ELSE WHILE RETURN GOTO READ WRITE VOID CHAR
/* ADDED */
%token <tokid> NE EQ LT LE GT GE AND OR MINUS TIMES OVER MOD INT REAL
/* ADDED */
%token <tokid> LBRACK RBRACK COMMA CHRLIT STRLIT
%type <tokid> block stmt_seq stmt decl expr term assignmnt decltype error
%start block
%%
block : LBRACE stmt_seq RBRACE
| LPAREN stmt_seq RPAREN
| error { yyerrok; return 0; }
;
stmt_seq : stmt_seq stmt SEMI
| stmt SEMI
| error { yyerrok; return 0;}
;
stmt : expr
| decl
| assignmnt { $$ = $1; }
| error { yyerrok; return 0;}
;
decl : decltype IDENT {
setType($2,$1);
fprintf(stdout,"set decltype to: %d for %s\n",$$,$2);
}
;
expr : expr PLUS term
{ /* add constraint here */ }
| term { $$ = $1; }
| error { yyerrok; return 0;}
;
assignmnt : IDENT ASSIGN expr
{ /* add constraint here */ }
;
term : NUMBER { $$ = lookupType($1); }
| IDENT { $$ = lookupType($1); }
;
decltype : INTEGER { $$ = INT_TYPE; }
| FLOAT { $$ = REAL_TYPE; }
;
%%
int main( int argc,char *argv[] )
{
strcpy(errmsg,"type error\n");
int i;
if(argc < 2) {
printf("Usage: ./cfc <source filename>\n");
exit(0);
}
FILE *fp = fopen(argv[1],"r");
if(!fp) {
printf("Unable to open file for reading\n");
exit(0);
}
yyin = fp;
fp = fopen("dump.symtab","w");
if(!fp) {
printf("Unable to open file for writing\n");
exit(0);
}
int flag = yyparse();
/* dump symtab for debugging if necessary */
symtab_dump(fp);
lineno--; /* don't count the last newline */
printf("\nsemantic error cnt: %d \tlines of code: %d\n",errcnt,lineno);
/* cleanup */
fclose(fp);
fclose(yyin);
return flag;
}
yywrap()
{
return(1);
}
int yyerror(char * msg)
{ fprintf(stderr,"%s: line %d: \n",msg,lineno);
return 0;
}
如果你需要它,这里是我正在使用的符号表:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "symtab.h"
/* maximum size of hash table */
#define SIZE 200
#define MAXTOKENLEN 40
/* power of two multiplier in hash function */
#define SHIFT 4
/* the hash function */
static int hash ( char * key )
{ int temp = 0;
int i = 0;
while (key[i] != '\0')
{ temp = ((temp << SHIFT) + key[i]) % SIZE;
++i;
}
return temp;
}
/* a linked list of references (line nos) for each variable */
typedef struct RefListRec {
int lineno;
struct RefListRec * next;
/* ADDED */
int type;
} * RefList;
/* hash entry holds variable name and its reference list */
typedef struct HashRec {
char st_name[MAXTOKENLEN];
int st_size;
RefList lines;
int st_value;
/* ADDED */
int st_type;
struct HashRec * next;
} * Node;
/* the hash table */
static Node hashTable[SIZE];
/* insert an entry with its line number - if entry
* already exists just add its reference line no.
*/
void insert( char * name, int len, int type, int lineno )
{
/* ADDED */
/*int len = strlen(name);*/
int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) /* variable not yet in table */
{ l = (Node) malloc(sizeof(struct HashRec));
strncpy(l->st_name, name, len);
/* ADDED */
l->st_type = type;
l->lines = (RefList) malloc(sizeof(struct RefListRec));
l->lines->lineno = lineno;
l->lines->next = NULL;
l->next = hashTable[h];
hashTable[h] = l; }
else /* found in table, so just add line number */
{ RefList t = l->lines;
while (t->next != NULL) t = t->next;
t->next = (RefList) malloc(sizeof(struct RefListRec));
t->next->lineno = lineno;
t->next->next = NULL;
}
}
/* return value (address) of symbol if found or -1 if not found */
int lookup ( char * name )
{ int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) return -1;
else return l->st_value;
}
/* return type value of symbol or -1 if symbol not found */
int lookupType( char * name )
{
int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) return -1;
else return l->st_type;
}
/* set datatype of symbol returns 0 if symbol not found */
int setType( char * name, int t )
{
int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) return -1;
else {
l->st_type = t;
return 0;
}
}
/* print to stdout by default */
void symtab_dump(FILE * of) {
int i;
fprintf(of,"------------ ------ ------------\n");
fprintf(of,"Name Type Line Numbers\n");
fprintf(of,"------------ ------ -------------\n");
for (i=0; i < SIZE; ++i)
{ if (hashTable[i] != NULL)
{ Node l = hashTable[i];
while (l != NULL)
{ RefList t = l->lines;
fprintf(of,"%-12s ",l->st_name);
if (l->st_type == INT_TYPE)
fprintf(of,"%-7s","int ");
if (l->st_type == REAL_TYPE)
fprintf(of,"%-7s","real");
if (l->st_type == STR_TYPE)
fprintf(of,"%-7s","string");
while (t != NULL)
{ fprintf(of,"%4d ",t->lineno);
t = t->next;
}
fprintf(of,"\n");
l = l->next;
}
}
}
}
答案 0 :(得分:3)
由于竞争error
制作而发生冲突。您的stmt
派生error
。但stmt
也派生expr
派生error
。将一个放在expr
内。
查看y.output
生成的yacc
文件(因为您通过了-v
)。
您的语法不可能与输入程序匹配,因为您的起始符号派生括号或括号括起的语句块,而不是一系列外部函数定义。您需要添加将处理函数定义的语法片段:decltype IDENT arglist body
,等等。
立即脱颖而出的一件小事:
comment \/\*([^*]|\n)*\*\/
您说评论不能包含星号。
匹配真实C评论的正则表达式为:[/][*]([^*]|[*]*[^*/])*[*]+[/]
(但请注意,这并未考虑Lex对换行符的约定)。
您无需为单个字符标记指定令牌名称。这只会增加语法的含义,你必须写出像LBRACE这样的东西而不仅仅是&#39; {&#39;。