Question

我开始为单一语言编写一个轻型解释器来管理图形。我正在使用flex和bison，我在定义语法时遇到了一些问题。

目前，我只想解析三个单一命令：

load "file-name"
save "file-name"
exit

这是yacc中的语法：

%{

# include <iostream>  

  using namespace std;

 int yylex(void);
 void yyerror(char const *);

%}

%token LOAD SAVE RIF COD EXIT STRCONST VARNAME 

%%

input: line
;

line: cmd_unit '\n'
{
  cout << "PARSED LINE with EOL" << endl; 
}
| cmd_unit
{
  cout << "PARSED LINE without EOL" << endl; 
}
;  

cmd_unit: LOAD STRCONST
{
  cout << "PARSED LOAD" << endl;
}
| SAVE STRCONST 
{
  cout << "PARSED SAVE" << endl;
}
| EXIT { }
;

%%

现在这是lexer和lex中非常简单的repl：

%{

# include <net-parser.H>
# include "test.tab.h"


  YYSTYPE netyylval;
  size_t curr_lineno = 0;

# define yylval netyylval

/* Max size of string constants */
# define MAX_STR_CONST 4097
# define MAX_CWD_SIZE 4097
# define YY_NO_UNPUT   /* keep g++ happy */



/* define YY_INPUT so we read thorugh readline */
/* # undef YY_INPUT */
/* # define YY_INPUT(buf, result, max_size) result = get_input(buf, max_size); */


char string_buf[MAX_STR_CONST]; /* to assemble string constants */
char *string_buf_ptr = string_buf;


/*
 *  Add Your own definitions here
 */

 bool string_error = false;

 inline bool put_char_in_buf(char c)
 {
   if (string_buf_ptr == &string_buf[MAX_STR_CONST - 1])
     {
       yylval.error_msg = "String constant too long";
       string_error = true;
       return false;
     }
   *string_buf_ptr++ = c;
   return true;
 }

%}

%x STRING

/*
 * Define names for regular expressions here.
 */
/* Keywords */
LOAD         [lL][oO][aA][dD]
SAVE         [sS][aA][vV][eE]
RIF          [rR][iI][fF]
COD          [cC][oO][dD]
EXIT         [eE][xX][iI][tT]

DIGIT           [0-9]
UPPER_LETTER    [A-Z]
LOWER_LETTER    [a-z]
ANY_LETTER      ({UPPER_LETTER}|{LOWER_LETTER})
SPACE           [ \f\r\t\v]
NEWLINE         \n

INTEGER         {DIGIT}+
ID              {INTEGER}
VARNAME         {ANY_LETTER}([_\.-]|{ANY_LETTER}|{DIGIT})*

%%

{SPACE}  /* Ignore spaces */ 

{NEWLINE} { ++curr_lineno; return NEWLINE; }

 /*
  * Keywords are case-insensitive except for the values true and false,
  * which must begin with a lower-case letter.
  */
{LOAD}       return LOAD; 
{SAVE}       return SAVE;
{RIF}        return RIF;
{COD}        return COD;
{EXIT}       return EXIT;

 /*
  * The single-characters tokens 
  */
[=;]          return *yytext;


 /*
  *  String constants (C syntax)
  *  Escape sequence \c is accepted for all characters c. Except for 
  *  \n \t \b \f, the result is c.
  *
  */

\" { /* start of string */
  string_buf_ptr = &string_buf[0];
  string_error = false;
  BEGIN(STRING); 
} 
<STRING>[^\\\"\n\0] {
  if (not put_char_in_buf(*yytext))
    return ERROR;
 }
<STRING>\\\n {  // escaped string
  if (not put_char_in_buf('\n'))
    return ERROR;
  ++curr_lineno;
 } 
<STRING>\\n {
  if (not put_char_in_buf('\n'))
    return ERROR;
 }
<STRING>\\t {
  if (not put_char_in_buf('\t'))
    return ERROR;
 }
<STRING>\\b {
  if (not put_char_in_buf('\b'))
    return ERROR;
 }
<STRING>\\f {
  if (not put_char_in_buf('\f'))
    return ERROR; 
}
<STRING>\\\0 {
  yylval.error_msg = "String contains escaped null character.";
  string_error = true;
  return ERROR;
 }
<STRING>{NEWLINE} {
  BEGIN(INITIAL);
  ++curr_lineno;
  yylval.error_msg = "Unterminated string constant";
  return ERROR;
 }
<STRING>\" { /* end of string */
  *string_buf_ptr = '\0';
  BEGIN(INITIAL);  
  if (not string_error)
    {
      yylval.symbol = strdup(string_buf); // TODO: ojo con este memory leak
      return STRCONST;
    }
 }
<STRING>\\[^\n\0ntbf] {
  if (not put_char_in_buf(yytext[1]))
    return ERROR; 
 }
<STRING>'\0' {
  yylval.error_msg = "String contains escaped null character.";
  string_error = true;
  return ERROR;
 }
<STRING><<EOF>> {
  yylval.error_msg = "EOF in string constant";
  BEGIN(INITIAL);
  return ERROR;
 }

{ID} { // matches integer constant 
  yylval.symbol = yytext;
  return ID;  
}

{VARNAME} {
  yylval.symbol = yytext;
  return VARNAME;
}

. {
  cout << "LEX ERROR" << endl;
  yylval.error_msg = yytext;
  return ERROR; 
 }
%%

int yywrap()
{
  return 1;
}

extern int yyparse();

string get_prompt(size_t i)
{
  stringstream s;
  s << i << " > ";
  return s.str();
}

int main()
{

  for (size_t i = 0; true; ++i)
     {
       string prompt = get_prompt(i);
       char * line = readline(prompt.c_str());
       if (line == nullptr)
     break;

       YY_BUFFER_STATE bp = yy_scan_string(line);
       yy_switch_to_buffer(bp);
       free(line);

       int status = yyparse();

       cout << "PARSING STATUS = " << status << endl;

       yy_delete_buffer(bp);
     }
}

正如可能看到的那样，词法分析器的很大一部分专用于识别的字符串常量。我不知道这个词法分析器是否完美而优雅，但我可以说我对它进行了深入的测试并确保其有效。

现在，当程序被调用时，这是一个跟踪：

0 > load "name"
ERROR syntax error 
PARSING STATUS = 1
1 >

也就是说，肯定被错误指定的语法无法识别规则

cmd_unit: LOAD STRCONST

好吧，虽然我确信我不会主宰语法世界，但我已经花了一些重要时间来理解这个简单明了的规范，我仍然无法理解为什么它无法解析一个非常单一的规则。我几乎可以肯定这是一个愚蠢的错误，但我确实知道它是什么。

所以，我真的很感激任何帮助。

Answer 1

这里有一个问题：

{NEWLINE} { ++curr_lineno; return NEWLINE; }

我不确定这是如何编译的，因为NEWLINE未定义为令牌。我没有在任何地方看到任何定义（模式宏不计算，因为它们在生成的扫描仪生成之前被解析。）

由于你的语法期望'\n'作为换行符的标记值，这就是你需要返回的内容：

{NEWLINE} { ++curr_lineno; return '\n'; }

在没有调试辅助工具的情况下解决这样的问题可能会非常棘手。幸运的是，flex和bison都带有调试选项，可以非常简单地查看正在发生的事情（并避免在野外行动中包含您自己的跟踪消息）。

对于flex，在生成扫描仪时使用-d标志。这将打印有关扫描仪进度的大量信息。（在这种情况下，无论如何，这似乎是最有可能开始的地方。）

对于bison，在生成解析器时使用-t标志，并将全局变量yydebug设置为非零值。由于野牛追踪取决于yydebug全局变量（其默认值为0）的设置，您只需将-t标志添加到您的野兔调用中，这样您就不必重新生成要关闭跟踪的文件。

注意：在ID和VARNAME规则中，您将yytext插入语义值：

yylval.symbol = yytext;

那不行。 yytext仅在下次调用yylex之前有效，因此在执行使用语义值的bison操作时，yytext指向的字符串将发生更改。（即使野牛行动仅引用右侧的最后一个令牌，这可能也是如此，因为野牛通常在决定执行减少之前读取先行令牌。）您必须复制令牌（例如，使用，strdup）并且当你不再需要这个值时，记得要释放它。

关于风格的说明。只是个人意见，随意忽略：

就个人而言，我发现过度使用模式宏会分散注意力。您可以将该规则写为：

\n        { ++curr_lineno; return '\n'; }

同样，您可以使用Posix标准字符类，而不是定义DIGIT，UPPER_LETTER等等。

INTEGER   [[:digit:]]+
VAR_NAME  [[:alpha:]][[:alnum:]_.-]*

（在字符类中不需要反斜杠转义。。）

使用yacc和readline解析行

1 个答案: