Bison / Flext Concat Char *

时间:2015-08-07 12:27:11

标签: c++ string concatenation bison interpreter

我为我的新语言解析和计算结果制作了.l和.y文件:它运行正常!像这样的字符串:

  

SET(IFEL(MAJEQ(IFEL(EQ(VAL(16),MUL(VAL(2),VAL(8))),VAL(11),VAL(10)),VAL(10)),MUL (VAL(3),VAL(4)),SUB(VAL(6),VAL(2))))

由我的两个文件正确解析和计算:

%{
#include <stdio.h>
#include <string>
#include <cstring>
using namespace std;
extern int yylex();
extern void yyerror(char*);
%}

//Symbols
%union
{
    char *str_val;
    int int_val;
};

%token OPEN;
%token CLOSE;
%token SET;
%token STORE;
%token MUL;
%token ADD;
%token DIV;
%token SUB;
%token ABS;
%token IFEL;
%token AND;
%token OR;
%token NOT;
%token MAJEQ;
%token MINEQ;
%token MAJ;
%token MIN;
%token EQ;
%token GET;
%token S; /* separator */
%token VAR;
%token VAL;
%token <int_val>    NUMBER
%token <str_val>    IDENTIFIER

%type <int_val> Exp
%type <int_val> Cond
%type <int_val> Check
%type <int_val> Var

%start Expression

%%

Expression:
    /* empty */
    | SET OPEN Exp CLOSE
    {
        printf("value set %d\n",$3);
    }
    | STORE OPEN VAR OPEN IDENTIFIER CLOSE S Exp CLOSE
    {
        printf("var %s set on %d\n",$5,$8);
    }
    ;

Exp:
    Var
    | IFEL OPEN Cond S Exp S Exp CLOSE
    {
        if($3==1){
            $$ = $5;
        }else{
            $$ = $7;
        }
    }
    | ADD OPEN Exp S Exp CLOSE
    {
        $$ = $3+$5;
    }
    | SUB OPEN Exp S Exp CLOSE
    {
        $$ = $3-$5;
    }
    | MUL OPEN Exp S Exp CLOSE
    {
        $$ = $3*$5;
    }
    | DIV OPEN Exp S Exp CLOSE
    {
        $$ = $3/$5; //TBD check div 0
    }
    | ABS OPEN Exp CLOSE
    {
        $$ = $3; //TBD
    }
    ;

Cond:
    NOT OPEN Cond CLOSE
    {
        int result = $3;
        if(result==1) $$ = 0;
        else $$ = 1;
    }
    | AND OPEN Cond S Cond CLOSE
    {
        int result1 = $3;
        int result2 = $5;
        if(result1==1 && result2==1) $$ = 1;
        else $$ = 0;
    }
    | OR OPEN Cond S Cond CLOSE
    {
        int result1 = $3;
        int result2 = $5;
        if(result1==1 || result2==1) $$ = 1;
        else $$ = 0;
    }
    | Check
    ;

Check:
    MAJ OPEN Exp S Exp CLOSE
    {
        int val1 = $3;
        int val2 = $5;
        if(val1>val2) $$ = 1;
        else $$ = 0;    
    }
    | MIN OPEN Exp S Exp CLOSE
    {
        int val1 = $3;
        int val2 = $5;
        if(val1<val2) $$ = 1;
        else $$ = 0;    
    }
    | EQ OPEN Exp S Exp CLOSE
    {
        int val1 = $3;
        int val2 = $5;
        if(val1==val2) $$ = 1;
        else $$ = 0;    
    }
    | MAJEQ OPEN Exp S Exp CLOSE
    {
        int val1 = $3;
        int val2 = $5;
        if(val1>=val2) $$ = 1;
        else $$ = 0;    
    }
    | MINEQ OPEN Exp S Exp CLOSE
    {
        int val1 = $3;
        int val2 = $5;
        if(val1<=val2) $$ = 1;
        else $$ = 0;    
    }
    ;

Var:
    VAR OPEN IDENTIFIER CLOSE
    {
        $$ = atoi($3); //TBD 
    }
    | VAL OPEN NUMBER CLOSE
    {
        $$ = $3;
    }
    | GET OPEN CLOSE
    {
        $$ = 11; //TBD 
    }
    ;
%%

%{
#include <string>
#include "expression.tab.h"
void yyerror(char*);
extern void printVars();
int yyparse(void);
%}

%%

[ \t\n]+    { /* ignore */ };

"("         return(OPEN);
")"         return(CLOSE);
"SET"       return(SET);
"STORE"     return(STORE);
"MUL"       return(MUL);
"ADD"       return(ADD);
"DIV"       return(DIV);
"SUB"       return(SUB);
"ABS"       return(ABS);
"IFEL"      return(IFEL);
"NOT"       return(NOT);
"AND"       return(AND);
"OR"        return(OR);
"MAJEQ"     return(MAJEQ);
"MINEQ"     return(MINEQ);
"MAJ"       return(MAJ);
"MIN"       return(MIN);
"EQ"        return(EQ);
"VAR"       return(VAR);
"VAL"       return(VAL);
"GET"       return(GET);
","         return(S);

[[:digit:]]+  { yylval.int_val = atoi(yytext);   return NUMBER;}
[[:alnum:]]+  { yylval.str_val = strdup(yytext); return IDENTIFIER;}

.           return yytext[0];

%%

void yyerror(char *s){ 
    printf("<ERR> %s at %s in this line:\n", s, yytext);
    }

int yywrap (void){
    }

int main(int num_args, char** args){
    if(num_args != 2) {printf("usage: ./parser filename\n"); exit(0);}
    FILE* file = fopen(args[1],"r");
    if(file == NULL) {printf("couldn't open %s\n",args[1]); exit(0);}
    yyin = file;
    yyparse();
    fclose(file);
}

但实际上你可以看到输入中Var内的值不是静态的,而应该是动态的。所以我的下一步是修改项目:而不是计算应该写一个C ++代码,以使计算动态。

我的问题: 1)你有一个更好的解决方案,而不是连接每个步骤char *用于制作代码? 2)如果没有,你能帮我找到一种巧妙的方法来连接所有字符串并解决我在编译时遇到的以下错误:

  

expression.y:75:43:错误:类型'const char [2]'的操作数无效   和'char *'到二元'运算符+'$$ =“(”+ $ 3 +“ - ”+ $ 5 +“)”;

......我不想每次都使用“malloc”......

char* str;
str = malloc(1+strlen(text1)+strlen(text2));
strcpy(str, text1);
strcat(str, text2);

有更聪明的方法吗?遵循flex和bison修改过的文件:

expression.l

%{
#include <string>
#include "expression.tab.h"
void yyerror(char*);
extern void printVars();
int yyparse(void);
%}

%%

[ \t\n]+    { /* ignore */ };

"("         return(OPEN);
")"         return(CLOSE);
"SET"       return(SET);
"STORE"     return(STORE);
"MUL"       return(MUL);
"ADD"       return(ADD);
"DIV"       return(DIV);
"SUB"       return(SUB);
"ABS"       return(ABS);
"IFEL"      return(IFEL);
"NOT"       return(NOT);
"AND"       return(AND);
"OR"        return(OR);
"MAJEQ"     return(MAJEQ);
"MINEQ"     return(MINEQ);
"MAJ"       return(MAJ);
"MIN"       return(MIN);
"EQ"        return(EQ);
"VAR"       return(VAR);
"VAL"       return(VAL);
"GET"       return(GET);
","         return(S);

([a-z0-9]+)|([0-9]+.[0-9]+)     { yylval.str_val = strdup(yytext); return IDENTIFIER;}

.           return yytext[0];

%%

void yyerror(char *s){ 
    printf("<ERR> %s at %s in this line:\n", s, yytext);
    }

int yywrap (void){
    }

int main(int num_args, char** args){
    if(num_args != 2) {printf("usage: ./parser filename\n"); exit(0);}
    FILE* file = fopen(args[1],"r");
    if(file == NULL) {printf("couldn't open %s\n",args[1]); exit(0);}
    yyin = file;
    yyparse();
    fclose(file);
}

expression.y

%{
#include <stdio.h>
#include <string>
#include <cstring>
using namespace std;
extern int yylex();
extern void yyerror(char*);
%}

//Symbols
%union
{
    char *str_val;
    int int_val;
};

%token OPEN;
%token CLOSE;
%token SET;
%token STORE;
%token MUL;
%token ADD;
%token DIV;
%token SUB;
%token ABS;
%token IFEL;
%token AND;
%token OR;
%token NOT;
%token MAJEQ;
%token MINEQ;
%token MAJ;
%token MIN;
%token EQ;
%token GET;
%token S; /* separator */
%token VAR;
%token VAL;

%token <str_val> IDENTIFIER

%type <str_val> Exp
%type <str_val> Cond
%type <str_val> Check
%type <str_val> Var

%start Expression

%%

Expression:
    /* empty */
    | SET OPEN Exp CLOSE
    {
        printf("%s\n",$3);
    }
    | STORE OPEN VAR OPEN IDENTIFIER CLOSE S Exp CLOSE
    {
        printf("var %s with following code:\n%s\n",$5,$8);
    }
    ;

Exp:
    Var
    | IFEL OPEN Cond S Exp S Exp CLOSE
    {
        $$ = "("+$3+" == 'true') ? ("+$5+") : ("+$7+")";
    }
    | ADD OPEN Exp S Exp CLOSE
    {
        $$ = "("+$3+"+"+$5+")"; 
    }
    | SUB OPEN Exp S Exp CLOSE
    {
        $$ = "("+$3+"-"+$5+")"; 
    }
    | MUL OPEN Exp S Exp CLOSE
    {
        $$ = "("+$3+"*"+$5+")"; 
    }
    | DIV OPEN Exp S Exp CLOSE
    {
        $$ = "("+$3+"/"+$5+")"; //TBD check div 0
    }
    | ABS OPEN Exp CLOSE
    {
        $$ = "("+$3+">0) ? "+$3+" : "(+$3+"*(-1))";
    }
    ;

Cond:
    NOT OPEN Cond CLOSE
    {
        $$ = "("+$3+"=='true') ? 'false' : 'true'";
    }
    | AND OPEN Cond S Cond CLOSE
    {
        $$ = "("+$3+"=='true' && "+$5+"=='true') ? 'true' : 'false'";
    }
    | OR OPEN Cond S Cond CLOSE
    {
        $$ = "("+$3+"=='true' || "+$5+"=='true') ? 'true' : 'false'";
    }
    | Check
    ;

Check:
    MAJ OPEN Exp S Exp CLOSE
    {
        $$ = "("+$3+">"+$5+") ? 'true' : 'false'";
    }
    | MIN OPEN Exp S Exp CLOSE
    {
        $$ = "("+$3+"<"+$5+") ? 'true' : 'false'";  
    }
    | EQ OPEN Exp S Exp CLOSE
    {
        $$ = "("+$3+"=="+$5+") ? 'true' : 'false'"; 
    }
    | MAJEQ OPEN Exp S Exp CLOSE
    {
        $$ = "("+$3+">="+$5+") ? 'true' : 'false'";
    }
    | MINEQ OPEN Exp S Exp CLOSE
    {
        $$ = "("+$3+"<="+$5+") ? 'true' : 'false'";
    }
    ;

Var:
    VAR OPEN IDENTIFIER CLOSE
    {
        //TBD check if variable exists in the engine
        $$ = $3;
    }
    | VAL OPEN IDENTIFIER CLOSE
    {
        //TBD check correct value
        $$ = $3;
    }
    | GET OPEN CLOSE
    {
        $$ = "getField()"; //TBD to implement in the engine
    }
    ;
%%

2 个答案:

答案 0 :(得分:1)

如果没有某种形式的内存分配,很难进行字符串连接。当然,可以避免避免malloc - 您可以使用new代替,或隐藏std::stringstd::stringstream内的内存分配 - 但最终,你将不得不处理动态内存分配,并在不再需要时释放内存。

值得注意的是,在strdup的扫描程序操作中(正确)使用IDENTIFIER是内存泄漏,因为您永远不会释放已分配的内存。所以你已经需要处理这个问题了。

正如您所注意到的,在C中进行字符串连接可能非常笨重。在这样的情况下,减少笨拙是值得的。我首选的解决方案是我的包装函数concatf,其原型就像printf,除了它返回malloc字符串而不是打印。 (有关各种平台的实现,请参阅this answer。)

借助此功能,可以编写:

Exp:
    Var
    | IFEL OPEN Cond S Exp S Exp CLOSE
    {
        $$ = concatf("(%s == 'true') ? (%s) : (%s)", $3, $5, $7);
    }

请注意x == 'true'无效C ++。你可能意味着== true,但这是一个危险的习语;更好的是对bool的显式转换(虽然这在三元运算符的上下文中实际上是多余的),所以我认为你真的想要

        $$ = concatf("bool(%s) ? (%s) : (%s)", $3, $5, $7);

或只是

        $$ = concatf("(%s) ? (%s) : (%s)", $3, $5, $7);

但是,如上所述,这会导致内存泄漏,因为malloc的字符串永远不会被释放。所以让我们解决这个问题。首先,在每个操作中,必须明确释放所有从未再次使用过的malloc值。在像你这样的简单情况下,这将是所有malloc'd值,除了单位产品,其中malloc'd值只被分配给不同的非终端。由于所有IDENTIFIER都具有由strdup创建的语义值,因此可以合理地假设所有str_val值都已被malloc化(并且这需要是约束;如果您创建了{ {1}}来自文字字符串的值,你最终会遇到问题)。现在,我们可以编写规则:

str_val

另一个例子。请注意最后一条规则中添加的Exp: Var { /* No free needed; this is a unit production */ } | IFEL OPEN Cond S Exp S Exp CLOSE { $$ = concatf("(%s) ? (%s) : (%s)", $3, $5, $7); free($3); free($5); free($7); }

strdup

(还有替代方法可以在文字上调用Var: VAR OPEN IDENTIFIER CLOSE { $$ = $3; /* No free needed; value is moved on the stack */ } | VAL OPEN IDENTIFIER CLOSE { $$ = $3; /* As above */ } | GET OPEN CLOSE { $$ = strdup("getField()"); /* str_val's must be malloc'd */ } ; ,但通常用例并不常见,而且开销很小。)

该样式将处理执行规则操作的所有情况,但也有一些情况下,bison将在不调用规则的情况下从堆栈中丢弃值。这将在错误恢复期间发生,并且在解析器堆栈非空时在解析失败时结束。为了帮助解决这种情况,bison允许你声明一个destructor action,它将在它丢弃的每个堆栈值上调用。在这种情况下,声明几乎是微不足道的:

strdup

答案 1 :(得分:0)

嗯......我用这种方式解决了这个问题:

...
Exp:
    Var
    | IFEL OPEN Cond S Exp S Exp CLOSE
    {
        string t1 = $3;
        string t2 = $5;
        string t3 = $7;
        string result = "("+t1+" == 'true') ? ("+t2+") : ("+t3+")";
        $$ = (char*)result.c_str();
    }
...

工作正常......