Jison Google-Like Parser

时间:2014-06-24 22:08:11

标签: javascript jison

我编写了一个使用Jison的解析器,它能够使用运算符和布尔运算支持处理类似谷歌的搜索查询。目前,我很难弄清楚如何接受AND OR和NOT运算符之间的空格。任何帮助将不胜感激,我在下面附上了一些所需输入/输出的例子。

输入:

  1. true&&假||真
  2. (true)&& (假|| true)
  3. 真安培;&安培;假||真
  4. 结果:

    1-3。 ([TRUE]&安培;及([FALSE] || [TRUE]))

    代码:

    %lex
    %%
    
    /* Lexical Grammar */
    
    "AND"|"&&"          { return "AND" }
    "OR"|"||"           { return "OR" }
    "NOT"|"!"           { return "NOT" }
    "("                 { return "OPEN" }
    ")"                 { return "CLOSE" }
    ":"                 { return "QUAL" }
    "-"                 { return "DASH" }
    "\""|"'"            { return "QUOTE" }
    \s+                 { return "SPACE" }
    \w+                 { return "WORD" }
    "."                 { return "DOT" }
    <<EOF>>             { return "EOF" }
    .                   { return "INVALID" }
    /lex
    
    /* Operators */
    
    %right AND OR
    %right NOT
    %right QUAL DASH DOT
    
    %start START
    
    %%
    
    /* Language Grammar */
    
    START
        : EXP EOF
            { return $1; }
        ;
    
    EXP
        : EXP AND EXP
            { $$ = "(" + $1 + "&&" + $3 + ")"; }
        | EXP OR EXP
            { $$ = "(" + $1 + "||" + $3 + ")"; }
        | NOT EXP
            { $$ = "(!" + $2 + ")"; }
        | OPEN EXP CLOSE
            { $$ = $2; }
        | ARGS
            { $$ = "[" + $1 + "]"; }
        ;
    
    ARGS
        : ARG SPACE ARGS
            { $$ = [ $1 ].concat($3); }
        | OP SPACE ARGS
            { $$ = [ $1 ].concat($3); }
        | ARG
            { $$ = [ $1 ]; }
        | OP
            { $$ = [ $1 ]; }
        ;
    
    OP
        : DASH OP
            { $$ = "-" + $2; }
        | ARG QUAL ARG
            { $$ = $1 + ":" + $3; }
        ;
    
    ARG
        : DASH ARG
            { $$ = "-" + $2; }
        | QUOTE TERMS QUOTE
            { $$ = $2.join(" "); }
        | TERM
            { $$ = $1; }
        ;
    
    TERMS
        : TERM SPACE TERMS
            { $$ = [ $1 ].concat($3); }
        | TERM
            { $$ = [ $1 ]; }
        ;
    
    TERM
        : TERM DASH TERM
            { $$ = $1 + $2 + $3; }
        | TERM DOT TERM
            { $$ = $1 + $2 + $3; }
        | WORD
            { $$ = $1; }
        ;
    

1 个答案:

答案 0 :(得分:2)

想出来。我开始忽略空白,改变了一些规则,并解决了冲突。解析器返回一个函数,用于确定某个对象是否与查询匹配。这是最终结果:

/* Google-Like Parser */

/* Lexical Grammar */

%lex
%%

\s+                   { /* ignore whitespace */ }
"AND"|"&&"            { return "AND" }
"OR"|"||"             { return "OR" }
"NOT"|"!"             { return "NOT" }
"("                   { return "OPEN" }
")"                   { return "CLOSE" }
":"                   { return "QUAL" }
"-"                   { return "NEG" }
"\""|"'"              { return "QUOTE" }
\w+                   { return "WORD" }
"."                   { return "DOT" }
<<EOF>>               { return "EOF" }
.                     { return "INVALID" }

/lex

/* Operators */

%right AND OR
%right NOT
%right QUAL NEG DOT

%start START

%%

/* Language Grammar */

START
    : EXP EOF
        { return $1; }
    ;

EXP
    : EXP AND EXP
        { $$ = function(obj) { return ($1(obj) && $3(obj)); }; }
    | EXP OR EXP
        { $$ = function(obj) { return ($1(obj) || $3(obj)); }; }
    | NOT EXP
        { $$ = function(obj) { return !($2(obj)); }; }
    | OPEN EXP CLOSE
        { $$ = $2; }
    | ARGS
        { $$ = function(obj) { return parser.processArgs(obj, $1)(obj); }; }
    ;

ARGS
    : ARG ARGS
        { $$ = [ $1, $2]; }
    | OP ARGS
        { $$ = [ $1, $2]; }
    | ARG
        { $$ = [ $1 ]; }
    | OP
        { $$ = [ $1 ]; }
    ;

OP
    : NEG ARG
        {{
            $2.not = true;
            $$ = $2;
        }}
    | NEG ARG QUAL ARG
        {{
            $$ = {
                "not": true,
                "operator": $2.operand,
                "operand": $4.operand
            };
        }}
    | ARG QUAL ARG
        {{
            $$ = {
                "not": false,
                "operator": $1.operand,
                "operand": $3.operand
            };
        }}
    ;

ARG
    : QUOTE TERMS QUOTE
        {{
            $$ = {
                "not": false,
                "operator": null,
                "operand": $2.join(" ")
            };
        }}
    | TERM
        {{
            $$ = {
                "not": false,
                "operator": null,
                "operand": $1
            };
        }}
    ;

TERMS
    : TERM TERMS
        { $$ = [ $1 ].concat($2); }
    | TERM
        { $$ = [ $1 ]; }
    ;

TERM
    : WORD DOT TERM
        { $$ = $1 + $2 + $3; }
    | WORD
        { $$ = $1; }
    ;

%%

parser.processArgs = function(obj, args) {
    if (args.length > 1)
    {
        if (args[0].operator)
            return function(obj) { return (parser.matchArg(obj, args[0]) && parser.processArgs(args[1])(obj)); };
        else
            return function(obj) { return (parser.matchArg(obj, args[0]) || parser.processArgs(args[1])(obj)); };
    }
    else
    {
        return function(obj) { return parser.matchArg(obj, args[0]); };
    }
}

/* Override Later */
parser.matchArg = function(obj, arg) {
    return true;
}