flex / bison解析器编译分段错误

时间:2012-09-30 23:22:44

标签: bison flex-lexer

我正在编写一个带有flex / bison的解析器(我可以用Python编写解析器,但我总是喜欢经典。)

当我用这个编译代码时:

gcc -lfl -ly chance.tab.c lex.yy.c -o chance

当我用文件运行程序时,我得到这样的结果:

Segmentation fault (core dumped)

供任何人参考,以下是文件:

chance.y

%{
    #include <stdio.h>
%}

%union {
    char* str;
}

%token ASSERT BREAK CATCH CLASS CONTINUE DEL EACH ELSE ELSEIF FINALLY FROM
%token FUNC IF LOAD PASS PRINT REPEAT RETURN RUN THROW TRY WHILE UNTIL
%token YIELD AND OR NOT KTRUE KFALSE NONE

%token MINUS EXCLAM PERCENT LAND LPAREN RPAREN STAR COMMA DOT SLASH COLON
%token SEMICOLON QUESTION AT LBRACKET BACKSLASH RBRACKET CIRCUMFLEX LBRACE
%token BAR RBRACE TILDE PLUS LTHAN EQUAL GTHAN INTDIV

%token ADDASS SUBASS MULASS DIVASS INTDASS MODASS ANDASS ORASS LTEQ EQUALS
%token GTEQ INCREMENT DECREMENT DBLSTAR

%token<str> NAME STRING INTEGER FLOAT
%token INDENT DEDENT NEWLINE

%type<str> exprs names args kwdspec dfltarg arg arglist exprlist name namelist
%type<str> funcargs parenexpr lstexpr eachspec optargs inheritance addop
%type<str> expr ifs elifs elif elses trys catchs catchx finally suite stmts
%type<str> stmt program

%start program

%%

exprs: expr                         { $$ = $1; }
|   exprs COMMA expr                { sprintf($$, "%s %s", $1, $3); }
;

names: name                         { $$ = $1; }
|   names COMMA name                { sprintf($$, "%s %s", $1, $3); }
;

args: arg                           { $$ = $1; }
|   args COMMA arg                  { sprintf($$, "%s %s", $1, $3); }
;

kwdspec:                            { $$ = "regular"; }
|   STAR                            { $$ = "list"; }
|   DBLSTAR                         { $$ = "keyword"; }
;

dfltarg:                            { $$ = "null"; }
|   EQUAL expr                      { $$ = $2; }
;

arg: kwdspec name dfltarg
        { sprintf($$, "(argument %s %s %s)", $1, $2, $3); } ;

arglist: args                       { sprintf($$, "[%s]", $1); } ;
exprlist: exprs                     { sprintf($$, "[%s]", $1); } ;
name: NAME                          { sprintf($$, "(name %s)", $1); } ;
namelist: names                     { sprintf($$, "[%s]", $1); } ;
funcargs: LPAREN arglist RPAREN     { $$ = $2 } ;
parenexpr: LPAREN exprlist RPAREN   { sprintf($$, "(tuple %s)", $2); } ;
lstexpr: LBRACKET exprlist RBRACKET { sprintf($$, "(list %s)", $2); } ;

eachspec: BAR namelist BAR          { sprintf($$, "(each-spec %s)", $2); } ;

optargs:                            { $$ = ""; }
|   funcargs                        { $$ = $1; }
;

inheritance:                        { $$ = ""; }
|   parenexpr                       { $$ = $1; }
;

addop:
    ADDASS                          { $$ = "add"; }
|   SUBASS                          { $$ = "sub"; }
|   MULASS                          { $$ = "mul"; }
|   DIVASS                          { $$ = "div"; }
|   INTDASS                         { $$ = "int-div"; }
|   MODASS                          { $$ = "mod"; }
|   ANDASS                          { $$ = "and"; }
|   ORASS                           { $$ = "or"; }
;

expr:       /* NotYetImplemented! */
    NUMBER                          { sprintf($$, "(number %s)", $1); }
|   TRUE                            { $$ = "(true)"; }
|   FALSE                           { $$ = "(false)"; }
|   NONE                            { $$ = "(none)"; }
|   STRING                          { sprintf($$, "(string %s)", $1); }
|   lstexpr                         { $$ = $1; }
;

ifs: IF expr suite                  { sprintf($$, "(if %s %s)", $2, $3); } ;

elifs:                              { $$ = ""; }
|   elifs elif                      { sprintf($$, "%s %s", $1, $2); }
;

elif: ELSEIF expr suite             { sprintf($$, "(else-if %s %s)", $2, $3); } ;

elses:                              { $$ = ""; }
|   ELSE suite                      { sprintf($$, "(else %s)", $2); }
;

trys: TRY suite                     { sprintf($$, "(try %s)", $2); } ;

catchs:                             { $$ = ""; }
| catchs catchx                     { sprintf($$, "%s %s", $1, $2); }
;

catchx: CATCH expr suite            { sprintf($$, "(catch %s %s)", $2, $3); } ;

finally: FINALLY suite              { sprintf($$, "(finally %s)", $2); } ;

suite: COLON stmts SEMICOLON        { sprintf($$, "(block [%s])", $2); } ;

stmts:                              { $$ = ""; }
|   stmts NEWLINE stmt              { sprintf($$, "%s %s", $1, $3); }
;

stmt:
    ASSERT expr                     { printf("(assert %s)", $2); }
|   BREAK                           { printf("(break)"); }
|   CATCH expr suite                { printf("(catch %s %s)", $2, $3); }
|   CLASS name inheritance suite    { printf("(class %s %s %s)", $2, $3, $4); }
|   CONTINUE                        { printf("(continue)"); }
|   DEL expr                        { printf("(del %s)", $2); }
|   expr DOT EACH eachspec suite    { printf("(each %s %s %s)", $1, $4, $5); }
|   FROM name LOAD namelist         { printf("(from %s %s)", $2, $4); }
|   FUNC name optargs suite         { printf("(func %s %s %s)", $2, $3, $4); }
|   ifs elifs elses                 { printf("(if-block %s %s %s)", $1, $2, $3); }
|   LOAD namelist                   { printf("(load %s)", $2); }
|   PASS                            { printf("(pass)"); }
|   PRINT expr                      { printf("(print %s)", $2); }
|   REPEAT expr suite               { printf("(repeat %s %s)", $2, $3); }
|   RUN expr                        { printf("(run %s)", $2); }
|   THROW expr                      { printf("(throw %s)", $2); }
|   trys catchs elses finally       { printf("(try-block %s %s %s %s)", $1, $2, $3, $4); }
|   WHILE expr suite                { printf("(while %s %s)", $2, $3); }
|   UNTIL expr suite                { printf("(until %s %s)", $2, $3); }
|   YIELD expr                      { printf("(yield %s)", $2); }
|   RETURN expr                     { printf("(return %s)", $2); }
|   expr addop expr                 { printf("(%s-assign %s %s)", $2, $1, $3); }
|   expr INCREMENT                  { printf("(increment %s)", $1); }
|   expr DECREMENT                  { printf("(decrement %s)", $1); }
|   expr                            { printf("(expr-stmt %s)", $1); }
;

program: stmts                      { printf("(program [%s])", $1); } ;

chance.l

%{
    #include <assert.h>
    #include <stdio.h>

    #include "parser.tab.h"
%}

%option yylineno
%option noyywrap

%%

"assert"    { return ASSERT; }
"break"     { return BREAK; }
"catch"     { return CATCH; }
"class"     { return CLASS; }
"continue"  { return CONTINUE; }
"del"       { return DEL; }
"each"      { return EACH; }
"else"      { return ELSE; }
"elseif"    { return ELSEIF; }
"finally"   { return FINALLY; }
"from"      { return FROM; }
"func"      { return FUNC; }
"if"        { return IF; }
"load"      { return LOAD; }
"pass"      { return PASS; }
"print"     { return PRINT; }
"repeat"    { return REPEAT; }
"return"    { return RETURN; }
"run"       { return RUN; }
"throw"     { return THROW; }
"try"       { return TRY; }
"while"     { return WHILE; }
"until"     { return UNTIL; }
"yield"     { return YIELD; }
"and"       { return AND; }
"or"        { return OR; }
"not"       { return NOT; }
"true"      { return KTRUE; }
"false"     { return KFALSE; }
"none"      { return NONE; }

-           { return MINUS; }
!           { return EXCLAM; }
%           { return PERCENT; }
&           { return LAND; }
\(          { return LPAREN; }
\)          { return RPAREN; }
\*          { return STAR; }
,           { return COMMA; }
\.          { return DOT; }
\/          { return SLASH; }
:           { return COLON; }
;           { return SEMICOLON; }
\?          { return QUESTION; }
 @          { return AT; }
\[          { return LBRACKET; }
\]          { return RBRACKET; }
\^          { return CIRCUMFLEX; }
\{          { return LBRACE; }
\}          { return RBRACE; }
\|          { return BAR; }
~           { return TILDE; }
\+          { return PLUS; }
\<          { return LTHAN; }
=           { return EQUAL; }
\>          { return GTHAN; }
\/\/        { return INTDIV; }
\+=         { return ADDASS; }
-=          { return SUBASS; }
\*=         { return MULASS; }
\/=         { return DIVASS; }
\/\/=       { return INTDASS; }
%=          { return MODASS; }
&=          { return ANDASS; }
\|=         { return ORASS; }
\<=         { return LTEQ; }
==          { return EQUALS; }
\>=         { return GTEQ; }
\+\+        { return INCREMENT; }
--          { return DECREMENT; }
\*\*        { return DBLSTAR; }

[[:digit:]]+([eE][+-]?[[:digit:]]+)?                { yylval.str = strdup(yytext); return INTEGER; }
[[:digit:]]+\.[[:digit:]]+([eE][+-]?[[:digit:]]+)?  { yylval.str = strdup(yytext); return FLOAT; }
[a-zA-Z_][a-zA-Z0-9_]*                              { yylval.str = strdup(yytext); return NAME; }

\"([^\"])*\"    { yylval.str = strdup(yytext); return STRING; }
\'([^\'])*\'    { yylval.str = strdup(yytext); return STRING; }
`([^`])*`       { yylval.str = strdup(yytext); return STRING; }

"<INDENT>"  { return INDENT; }
"<DEDENT>"  { return DEDENT; }
"<NEWLINE>" { return NEWLINE; }

#.*         { }

[ \\\t]     {}
\n          { (yylineno) += 0.5; }
.           { yyerror(); }

%%

int yyerror(void)
{
    printf("Invalid syntax on line %d: '%s'\n", yylineno, yytext);
}

int main()
{
    yyparse();
    printf("\n");
    return 0;
}

如果上述程序适用于任何人,请使用我的小编程语言中的示例代码:

test.ch

from timer load x

func x(f=0, **k):
    5.each|x|:
        continue;;

class OhHey:
    func __init__:
        print 5;;

while true:
    print x;

[1, 2, 3]
(1, 2, 3)

提前致谢。可能有~~

编辑:输入了新的和改进的代码(不幸的是,它仍会产生段错误。)

1 个答案:

答案 0 :(得分:3)

你的词法分析器从不设置yylval,因此当你的解析器读取一个令牌的值时,它会得到随机的垃圾。例如,在您的规则中:

expr: NUMBER { sprintf($$, "(number %s)", $1); }

$1引用来自NUMBER的令牌值,因此将是随机垃圾。此外,$$是规则的输出,因此您在此处传递给sprintf的值也将是随机垃圾(因为您没有先将其设置为某些内容)。

修改

一个“简单”的解决方案是大量使用strdup/asprintf为字符串分配内存。例如,在你的.l文件中,你会有类似的东西:

[+-]?[0-9]+(\.[0-9]+)?([Ee][+-]?[0-9]+)? { yylval = strdup(yytext); return NUMBER; }

然后你的expr规则是:

expr: NUMBER { asprintf(&$$, "(number %s)", $1); free($1); }

当然,问题在于找出所有释放应该避免泄漏记忆的地方可能很难。