ANTLR 4优先级不如预期

时间:2014-05-11 12:49:04

标签: antlr4

我已经定义了我们在公司中使用的SQL的一种风格,如下所示:

/** Grammars always start with a grammar header. This grammar is called
 *  GigyaSQL and must match the filename: GigyaSQL.g4
 */
grammar GigyaSQL;

parse
 : selectClause
   fromClause
   ( whereClause )?
   ( filterClause )?
   ( groupByClause )?
   ( limitClause )?
 ;

selectClause
 : K_SELECT result_column ( ',' result_column )*
 ;

result_column
 : '*' # selectAll
 | table_name '.' '*' # selectAllFromTable
 | select_expr ( K_AS? column_alias )? # selectExpr
 | with_table # selectWithTable
 ;

fromClause
 : K_FROM table_name
 ;

table_name 
 : any_name # simpleTable
 | any_name K_WITH with_table # tableWithTable
 ;

any_name
 : IDENTIFIER
 | STRING_LITERAL
 | '(' any_name ')'
 ;

with_table
 : COUNTERS
 ;

select_expr
 : literal_value
 | range_function_in_select
 | interval_function_in_select
 | ( table_name '.' )? column_name
 | function_name '(' argument_list ')'
 ;

whereClause
 : K_WHERE condition_expr
 ;

condition_expr
 : literal_value # literal
 | ( table_name '.' )? column_name # column_name_expr
 | unary_operator condition_expr # unary_expr
 | condition_expr binary_operator condition_expr # binary_expr
 | K_IFELEMENT '(' with_table ',' condition_expr ')' # if_element
 | function_name '(' argument_list ')' # function_expr
 | '(' condition_expr ')' # brackets_expr
 | condition_expr K_NOT? K_LIKE condition_expr # like_expr
 | condition_expr K_NOT? K_CONTAINS condition_expr # contains_expr
 | condition_expr K_IS K_NOT? condition_expr # is_expr
 //| condition_expr K_NOT? K_BETWEEN condition_expr K_AND condition_expr
 | condition_expr K_NOT? K_IN '(' ( literal_value ( ',' literal_value )*) ')' # in_expr
 ;

filterClause
 : K_FILTER with_table K_BY condition_expr
 ;

groupByClause
 : K_GROUP K_BY group_expr ( ',' group_expr )*
 ;  

group_expr
 : literal_value
 | ( table_name '.' )? column_name
 | function_name '(' argument_list ')'
 | range_function_in_group
 | interval_function_in_group
 ;

limitClause
 : K_LIMIT NUMERIC_LITERAL
 ;

argument_list
 : ( select_expr ( ',' select_expr )* | '*' )
 ;

unary_operator
 : MINUS
 | PLUS
 | '~'
 | K_NOT
 ;

binary_operator
 : ( '*' | DIVIDE | MODULAR )
 | ( PLUS | MINUS )
 //| ( '<<' | '>>' | '&' | '|' )
 | ( LTH | LEQ | GTH | GEQ )
 | ( EQUAL | NOT_EQUAL | K_IN | K_LIKE )
 //| ( '=' | '==' | '!=' | '<>' | K_IS | K_IS K_NOT | K_IN | K_LIKE | K_GLOB | K_MATCH | K_REGEXP )
 | K_AND
 | K_OR
 ;

range_function_in_select
 : K_RANGE '(' select_expr ')'
 ;

range_function_in_group
 : K_RANGE '(' select_expr ',' range_pair (',' range_pair)* ')'
 ;

range_pair // Tried to use INT instead (for decimal numbers) but that didn't work fine (didn't parse a = 1 correctly)
 : '"' NUMERIC_LITERAL ',' NUMERIC_LITERAL '"'
 | '"' ',' NUMERIC_LITERAL '"'
 | '"' NUMERIC_LITERAL ',' '"'
 ;

interval_function_in_select
 : K_INTERVAL '(' select_expr ')'
 ;

interval_function_in_group
 : K_INTERVAL '(' select_expr ',' NUMERIC_LITERAL ')'
 ;


function_name
 : any_name
 ;

literal_value
 : NUMERIC_LITERAL
 | STRING_LITERAL
// | BLOB_LITERAL
 | K_NULL
// | K_CURRENT_TIME
// | K_CURRENT_DATE
// | K_CURRENT_TIMESTAMP
 ;

column_name 
 : any_name
 ;

column_alias
 : IDENTIFIER
 | STRING_LITERAL
 ;

SPACES
 : [ \u000B\t\r\n] -> skip
 ;

COUNTERS : 'counters' | 'COUNTERS';

//INT : '0' | DIGIT+ ; 

EQUAL  : '=';
NOT_EQUAL  : '<>' | '!=';
LTH : '<' ;
LEQ : '<=';
GTH   : '>';
GEQ   : '>=';
//MULTIPLY: '*';
DIVIDE  : '/';
MODULAR : '%';
PLUS  : '+';
MINUS : '-';

K_AND : A N D;
K_AS : A S;
K_BY : B Y;
K_CONTAINS: C O N T A I N S;
K_DISTINCT : D I S T I N C T;
K_FILTER : F I L T E R;
K_FROM : F R O M;
K_GROUP : G R O U P;
K_IFELEMENT : I F E L E M E N T;
K_IN : I N;
K_INTERVAL : I N T E R V A L;
K_IS : I S;
K_LIKE : L I K E;
K_LIMIT : L I M I T;
K_NOT : N O T;
K_NULL : N U L L;
K_OR : O R;
K_RANGE : R A N G E;
K_REGEXP : R E G E X P;
K_SELECT : S E L E C T;
K_WHERE : W H E R E;
K_WITH : W I T H;

IDENTIFIER
 : '"' (~'"' | '""')* '"'
 | '`' (~'`' | '``')* '`'
 | '[' ~']'* ']'
 | [a-zA-Z_] [.a-zA-Z_0-9]* // TODO - need to check if the period is correcly handled
 | [a-zA-Z_] [a-zA-Z_0-9]* // TODO check: needs more chars in set
 ;

STRING_LITERAL
 : '\'' ( ~'\'' | '\'\'' )* '\''
 ;

NUMERIC_LITERAL
 :// INT
 DIGIT+  ('.' DIGIT*)? ( E [-+]? DIGIT+ )?
 | '.' DIGIT+ ( E [-+]? DIGIT+ )?
 ;

fragment DIGIT : [0-9];

fragment A : [aA];
fragment B : [bB];
fragment C : [cC];
fragment D : [dD];
fragment E : [eE];
fragment F : [fF];
fragment G : [gG];
fragment H : [hH];
fragment I : [iI];
fragment J : [jJ];
fragment K : [kK];
fragment L : [lL];
fragment M : [mM];
fragment N : [nN];
fragment O : [oO];
fragment P : [pP];
fragment Q : [qQ];
fragment R : [rR];
fragment S : [sS];
fragment T : [tT];
fragment U : [uU];
fragment V : [vV];
fragment W : [wW];
fragment X : [xX];
fragment Y : [yY];
fragment Z : [zZ];

我尝试解析以下查询: 从没有data.zzz&gt;的帐户中选择* 124

我得到以下树: enter image description here

但是我想让树在使用括号时类似: SELECT *来自不在的帐户(data.zzz&gt; 124)

enter image description here

我不明白为什么它的工作方式会使一元规则在别人之前消失。

有什么建议吗?

1 个答案:

答案 0 :(得分:2)

这是给定语法的正确结果。正如您已经提到的,unary_operatorbinary_operator之前,意味着NOT关键字的任何操作数首先在其他运算符之前绑定到它。由于它是一元的,它将data.zzz作为其操作数,之后整个NOT表达式成为binary_operator的操作数。

为了得到你想要的东西,只需按照它的优先级向下移动unary_operator(我记得,在SQL中,NOT的优先级低于二元运算符的优先级,而NOT运算符不应该有与MINUS PLUS相同的优先级,就像你的语法一样,例如

condition_expr
: literal_value # literal
| ( table_name '.' )? column_name # column_name_expr
| condition_expr binary_operator condition_expr # binary_expr
| unary_operator condition_expr # unary_expr
| K_IFELEMENT '(' with_table ',' condition_expr ')' # if_element
| function_name '(' argument_list ')' # function_expr
| '(' condition_expr ')' # brackets_expr
| condition_expr K_NOT? K_LIKE condition_expr # like_expr
| condition_expr K_NOT? K_CONTAINS condition_expr # contains_expr
| condition_expr K_IS K_NOT? condition_expr # is_expr
//| condition_expr K_NOT? K_BETWEEN condition_expr K_AND condition_expr
| condition_expr K_NOT? K_IN '(' ( literal_value ( ',' literal_value )*) ')' # in_expr
;

这就是你想要的: enter image description here