我正在尝试使用antlr为DOT(graphviz)语言编写解析器。 当antlr生成解析树时有一个问题。有些时候,当我用一些输入调试我的语法并跟踪树生成的步骤时,我可以看到一些子树在我的树中完全重复。第一次它带有绿色,但在此之后,相同的子树将带有黑色。 快速看看它似乎发生在我使用“?” (操作员)在我的语法中 如你所知,这个运算符意味着某些标记是可选的。
有没有人知道为什么甚至antlr试图改变树的颜色?!
这是我的语法:
grammar Dot;
options {
backtrack = true;
memoize = true;
output = AST;
ASTLabelType = CommonTree;
}
tokens {
// operators and special characters
O_BRACKET = '{';
C_BRACKET = '}';
O_SQR_BRACKET = '[';
C_SQR_BRACKET = ']';
SEMI_COLON = ';';
EQUAL = '=';
COMMA = ',';
COLON = ':';
LPAREN = '(';
RPAREN = ')';
// case-insensitive keywords
GRAPH;
DIGRAPH;
STRICT;
NODE;
EDGE;
SUBGRAPH;
// tokens from imaginary nodes
GRAPH_ROOT;
SUBGRAPH_ROOT;
STMT_LIST;
EDGE_STMT;
NODE_STMT;
ATTR_LIST;
ATTR;
}
@header {
package org.cesta.parsers.dot;
import java.util.logging.Logger;
}
@members {
private boolean hasErrors = false;
private Logger logger = Logger.getLogger(this.getClass().getName());
public void setLogger(Logger newLogger){
logger = newLogger;
}
@Override
public void emitErrorMessage(String message) {
hasErrors = true;
if (logger!=null) logger.warning(message);
super.emitErrorMessage(message);
}
public boolean hasErrors(){
return hasErrors;
}
}
@lexer::header {
package org.cesta.parsers.dot;
}
@lexer::members {
}
graph
:
graphModifier ID? O_BRACKET stmt_list C_BRACKET
-> ^(GRAPH_ROOT graphModifier ID? stmt_list)
;
graphModifier
:
(STRICT)? (GRAPH | DIGRAPH)
;
stmt_list
: stmt (SEMI_COLON* stmt)* SEMI_COLON* -> ^(STMT_LIST stmt+)
;
stmt
:
attr_stmt |
edge_stmt |
subgraph |
ID EQUAL ID -> ^(ATTR ID EQUAL ID) |
node_stmt
;
attr_stmt
: (GRAPH^ | NODE^ | EDGE^) (attr_list)
;
attr_list
:
(O_SQR_BRACKET a_list? C_SQR_BRACKET)*
-> ^(ATTR_LIST a_list*)
;
a_list
: (attr COMMA!?)+
;
attr
:
ID (EQUAL ID)? -> ^(ATTR ID (EQUAL ID)?)
;
edge_stmt
:
node_subgraph edgeRHS attr_list?
-> ^(EDGE_STMT node_subgraph edgeRHS attr_list?)
;
node_subgraph
:
(node_id | subgraph)
;
edgeRHS
: EDGEOP^ (node_id | subgraph) (edgeRHS)?
;
node_stmt
: node_id (attr_list)? -> ^(NODE_STMT node_id attr_list?)
;
node_id
: ID^ (port)?
;
port
:
COLON! ID (COLON! VALIDSTR)? |
COLON! VALIDSTR
;
subgraph
:
O_BRACKET stmt_list? C_BRACKET
-> ^(SUBGRAPH_ROOT stmt_list?)
|(SUBGRAPH O_BRACKET) =>
SUBGRAPH ID? O_BRACKET stmt_list? C_BRACKET
-> ^(SUBGRAPH_ROOT ID? stmt_list?)
| SUBGRAPH ID? O_BRACKET stmt_list? C_BRACKET
-> ^(SUBGRAPH_ROOT ID? stmt_list?)
|
SUBGRAPH ID
-> ^(SUBGRAPH_ROOT ID)
;
// LEXER
// case-insensitive keywords
GRAPH: G R A P H;
DIGRAPH: D I G R A P H;
STRICT: S T R I C T;
NODE: N O D E;
EDGE: E D G E;
SUBGRAPH: S U B G R A P H;
EDGEOP : '->' | '--';
ID
: ( VALIDSTR
| NUMBER
| QUOTEDSTR
| HTMLSTR
);
fragment ALPHACHAR
: ( 'a'..'z'
| 'A'..'Z'
| '_'
);
fragment VALIDSTR
: ALPHACHAR
( ALPHACHAR
| '0'..'9'
)*
;
fragment NUMBER
: ('-')? ('0'..'9')+ ('.' ('0'..'9')+)?
;
fragment QUOTEDSTR
: '"'
STR
'"'
;
fragment STR
:
(ESCAPE_SEQUENCE | ~('\\'|'"') )*
;
fragment ESCAPE_SEQUENCE
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
;
fragment HTMLSTR
: '<' (~'>')* '>'
;
WS : (' '|'\t')+
{
$channel = HIDDEN;
}
;
NEWLINE : ('\r' '\n'|'\r'|'\n'|'\u000C')
{
$channel = HIDDEN;
}
;
COMMENT
: '/*' ( options {greedy=false;} : . )* '*/'
{
$channel = HIDDEN;
}
;
LINE_COMMENT
: '//' ~('\n'|'\r')* '\r'? '\n'
{
$channel = HIDDEN;
}
;
fragment A:('a'|'A');
fragment B:('b'|'B');
fragment C:('c'|'C');
fragment D:('d'|'D');
fragment E:('e'|'E');
fragment F:('f'|'F');
fragment G:('g'|'G');
fragment H:('h'|'H');
fragment I:('i'|'I');
fragment J:('j'|'J');
fragment K:('k'|'K');
fragment L:('l'|'L');
fragment M:('m'|'M');
fragment N:('n'|'N');
fragment O:('o'|'O');
fragment P:('p'|'P');
fragment Q:('q'|'Q');
fragment R:('r'|'R');
fragment S:('s'|'S');
fragment T:('t'|'T');
fragment U:('u'|'U');
fragment V:('v'|'V');
fragment W:('w'|'W');
fragment X:('x'|'X');
fragment Y:('y'|'Y');
fragment Z:('z'|'Z');