我正在编写格式化语言的编译器,我正在编写bison文件。我的语法是正确的,但是当我添加一个递归规则然后读取测试源文件时,它说它接受结束标记的规则,但是令牌是意外的...事情是在我添加之前递归规则(对于开始和结束标记之间的某些标记)它工作正常......以下是一些细节
这是源文件
\begin{document}
\title{test}
\author{test}
\date{21/02/1985}
\pagesetup{35, 80}
\end{document}
这是野牛档案
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern int yylex();
extern int yyparse();
extern FILE *yyin;
extern FILE *yyout;
extern int yylineno;
void yyerror(const char*);
int header_status(int,int,int,int,int);
// counters to check nubmer or document properties used, must all become 1
int title = 0;
int author = 0;
int date = 0;
int pgsetup = 0;
int tabsz = 0;
%}
%union{
int iVal;
char* sVal;
}
%error-verbose
%start source
%token <sVal> SLASH
%token <sVal> BLOCK_S BLOCK_E
%token <sVal> DOC LIST ENUM
%token <sVal> TITLE AUTHOR DATE PG_SETUP TAB_SZ SECTION PARAGRAPH ITEM LINE
%token <sVal> LBRACE RBRACE LPAREN RPAREN
%token <sVal> DOCUMENT DIMENSIONS DATE_VAL STRING
%token <iVal> NUMBER
%token <sVal> ERROR_UN ERROR_IL WORD
%%
source
: /* empty */
| entry_point doc_properties txt_properties exit_point
{
if ( header_status(title, author, date, pgsetup, tabsz) == 0 )
printf("\nfail\n"); //YYABORT;
}
;
entry_point
: SLASH BLOCK_S LBRACE DOC RBRACE
;
doc_properties
: /* empty */
| doc_properties header_properties
;
header_properties
: title_property { title++; }
| author_property { author++; }
| date_property { date++; }
| pg_setup_property { pgsetup++; }
| tab_sz_property { tabsz++; }
;
txt_properties
: /* empty */
;
title_property
: SLASH TITLE LBRACE STRING RBRACE
;
author_property
: SLASH AUTHOR LBRACE STRING RBRACE
;
date_property
: SLASH DATE LBRACE DATE_VAL RBRACE
;
pg_setup_property
: SLASH PG_SETUP LBRACE DIMENSIONS RBRACE
;
tab_sz_property
: SLASH TAB_SZ LPAREN NUMBER RPAREN
;
exit_point
: SLASH BLOCK_E LBRACE DOC RBRACE
;
%%
int main (int argc, char* argv[])
{
if ( argc < 2 || argc > 3)
{
fprintf(stdout, "%s: fatal error: needs one or two arguments\n\n\t%s inputFileName [outputFileName]\n\n", argv[0], argv[0]);
}
else if ( argc == 2 )
{
char* fn = (char *)calloc(strlen(argv[1])+12, sizeof(char));
strcpy(fn, argv[1]);
strcat(fn, ".output.txt");
fprintf(stderr, "%s: using default output naming: <%s>\n\n", argv[0], fn);
yyin = fopen(argv[1], "r");
yyout = fopen(fn, "w");
yyparse();
fclose(yyin);
fclose(yyout);
}
else if ( argc == 3 )
{
yyin = fopen(argv[1], "r");
yyout = fopen(argv[2], "w");
yyparse();
fclose(yyin);
fclose(yyout);
}
return 0;
}
void yyerror(const char* str)
{
fprintf(stderr,"syntax error[%d]: %s\n",yylineno, str);
}
int header_status(int title, int author, int date, int pgsetup, int tabsz)
{
if ( title == 1 && author == 1 && date == 1 && pgsetup == 1 && tabsz == 1 )
{
return 1;
}
else
{
if ( title > 1 ) fprintf(stderr,"syntax error: title property was declared more than once\n");
else if ( title < 1 ) fprintf(stderr,"syntax error: title property was not declared (all document properties must be present)\n");
if ( author > 1 ) fprintf(stderr,"syntax error: author property was declared more than once\n");
else if ( author < 1 ) fprintf(stderr,"syntax error: author property was not declared (all document properties must be present)\n");
if ( date > 1 ) fprintf(stderr,"syntax error: date property was declared more than once\n");
else if ( date < 1 ) fprintf(stderr,"syntax error: date property was not declared (all document properties must be present)\n");
if ( pgsetup > 1 ) fprintf(stderr,"syntax error: pagesetup property was declared more than once\n");
else if ( pgsetup < 1 ) fprintf(stderr,"syntax error: pagesetup property was not declared (all document properties must be present)\n");
if ( tabsz > 1 ) fprintf(stderr,"syntax error: title tabsize was declared more than once\n");
else if ( tabsz < 1 ) fprintf(stderr,"syntax error: title tabsize was not declared (all document properties must be present)\n");
return 0;
}
}
我认为我的问题在于
doc_properties
: /* empty */
| doc_properties header_properties
;
当我把它弄空时只是
\begin{document}
\end{document}
源文件很好。特别是令牌将是
SLASH BLOCK_S LBRACE DOC RBRACE
SLASH BLOCK_E LBRACE DOC RBRACE
当我使用递归添加规则时,虽然它到达'end'时跟踪会说它接受了规则(lexical)然后它生成了语法错误“unexpected BLOCK_E”。我唯一能想到的是它期待一些其他标签,但在递归中我有空的替代,所以为什么......
当我添加最终标签时
\begin{document}
\title{test}
\author{test}
\date{21/02/1985}
\pagesetup{35, 80}
\tabsize(4)
\end{document}
当它达到4时,它表示接受lex文件中的规则和该规则
return NUMBER;
但它表示意外$ undefined,期待NUMBER,它只是说它接受规则,坦率地说我不认为它可以读取任何其他内容......
我的问题是第一部分虽然......
如果有任何帮助,这是flex文件
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "UnicTextLang.y.tab.h"
#define SAVE_S yylval.sVal = strdup(yytext)
#define SAVE_I yylval.iVal = atoi(yytext)
%}
WS [ \t\n\r]
TAG [a-zA-Z_][a-zA-Z0-9\-_]+
WORD [a-zA-Z0-9`~!@#$%\^&*()\-_=+[\]{}\\|;:'",<.>/?]
NUMBER ([1-9])|([1-9][0-9])|([1-3][0-9][0-9])
DIMEN {NUMBER}{WS}*,{WS}*{NUMBER}
DAY (0[1-9])|([12][0-9])|(3[01])
MONTH (0[1-9])|(1[0-2])
YEAR (19|20)[0-9]{2}
DATE {DAY}\/{MONTH}\/{YEAR}
%option yylineno
%option noyywrap
%option noinput
%option nounput
%option debug
%x PROPERTY
%x VALUE
%x BLOCK
%x NUMBER
%%
^\\|{WS}\\ { BEGIN(PROPERTY); /* fprintf(stdout, "FLEX> BEGINING PROPERTY [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return SLASH; }
{WS}?\{ { BEGIN(VALUE); /* fprintf(stdout, "FLEX> READING PROPERTY VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return LBRACE; }
{WS}?\( { BEGIN(NUMBER); /* fprintf(stdout, "FLEX> READING NUMBER VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return LPAREN; }
{WS} { /* fprintf(stdout, "FLEX> EATING WHITESPACE(i)\n"); */ }
[^ \t\n\r\{(\\][^ \t\n\r]+ { fprintf(stderr, "lexical error[%d]: hingeless word: %s\n", yylineno, yytext); SAVE_S; return WORD; }
. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<PROPERTY>begin { BEGIN(BLOCK); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return BLOCK_S; }
<PROPERTY>end { BEGIN(BLOCK); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return BLOCK_E; }
<PROPERTY>title { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return TITLE; }
<PROPERTY>author { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return AUTHOR; }
<PROPERTY>date { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return DATE; }
<PROPERTY>pagesetup { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return PG_SETUP; }
<PROPERTY>tabsize { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return TAB_SZ; }
<PROPERTY>section { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return SECTION; }
<PROPERTY>paragraph { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return PARAGRAPH; }
<PROPERTY>item { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return ITEM; }
<PROPERTY>newline { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return LINE; }
<PROPERTY>{TAG} { BEGIN(INITIAL); fprintf(stderr, "lexical error[%d]: |%s| undefined property: expecting property\n", yylineno, yytext); SAVE_S; return ERROR_UN; }
<PROPERTY>{WS} { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> EATING WHITESPACE(p)\n"); */ }
<PROPERTY>[^ \t\n\r\{(]+ { BEGIN(INITIAL); fprintf(stderr, "lexical error[%d]: |%s| undefined property: illegal character detected\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<PROPERTY>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<VALUE>{WS}*{DIMEN}{WS}* { /* fprintf(stdout, "FLEX> \n\tdims: %s\n\n", yytext); */ SAVE_S; return DIMENSIONS; }
<VALUE>{WS}*{DATE}{WS}* { /* fprintf(stdout, "FLEX> \n\tdate: %s\n\n", yytext); */ SAVE_S; return DATE_VAL; }
<VALUE>[^}]* { /* fprintf(stdout, "FLEX> \n\tstrg: %s\n\n", yytext); */ SAVE_S; return STRING; }
<VALUE>\} { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> FINISHED READING PROPERTY VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return RBRACE; }
<VALUE>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<NUMBER>{WS}*{NUMBER}{WS}* { /* fprintf(stdout, "FLEX> \n\tnumb: %s\n\n", yytext); */ SAVE_I; return NUMBER; }
<NUMBER>[^)]* { fprintf(stderr, "lexical error[%d]: |%s| illegal value: expecting number(1-399)\n", yylineno, yytext); SAVE_S; return STRING; }
<NUMBER>\) { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> FINISHED READING NUMBER VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return RPAREN; }
<NUMBER>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<BLOCK>{WS}?\{ { /* fprintf(stdout, "FLEX> READING BLOCK TYPE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return LBRACE; }
<BLOCK>{WS}*document{WS}* { /* fprintf(stdout, "FLEX> \n\tresv: %s\n\n", yytext); */ SAVE_S; return DOC; }
<BLOCK>{WS}*itemize{WS}* { /* fprintf(stdout, "FLEX> \n\tresv: %s\n\n", yytext); */ SAVE_S; return LIST; }
<BLOCK>{WS}*enumerate{WS}* { /* fprintf(stdout, "FLEX> \n\tresv: %s\n\n", yytext); */ SAVE_S; return ENUM; }
<BLOCK>[^{}]* { fprintf(stderr, "lexical error[%d]: |%s| undefined block type: expecting block type\n", yylineno, yytext); SAVE_S; return ERROR_UN; }
<BLOCK>\} { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> FINISHED READING BLOCK TYPE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return RBRACE;}
<BLOCK>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
%%
答案 0 :(得分:3)
您遇到的基本问题是解析器需要双令牌前瞻来确定doc_properties
结束的位置。这是因为您将'\'
识别为属性字符串中的单独标记,因此在看到输入SLASH BLOCK_S
并且下一个输入标记为SLASH
之后,它不知道它是否应该减少一个空的txt_properties
(在BLOCK_E
之后预期SLASH
),或者转移到header_properties
规则以预期匹配标题属性。
有很多可以解决这个问题的方法。也许最简单的方法是完全删除SLASH
令牌,因为它只是告诉词法分析器何时查找属性字符串。摆脱第一个lex操作中的return SLASH;
语句(因此它不会返回一个令牌,而是在\
之后继续寻找该属性以返回该令牌),并删除{{1它出现在你的语法中。
另一种可能性是解除语法以摆脱ε规则(因为它们需要早期减少导致转移/减少冲突)。如果没有epsilon规则,解析器可以转换为复合状态,同时在RHS上同时识别具有相同前缀的多个规则(这种能力是LR解析LL的优势)。为此,您将拥有以下规则:
SLASH
并会更改source: /* empty */
| entry_point exit_point
| entry_point doc_properties exit_point
| entry_point txt_properties exit_point
| entry_point doc_properties txt_properties exit_point
;
和doc_properties
以识别1或更多而不是0或更多:
txt_properties