在antlr的AST树

时间:2014-05-14 11:49:22

标签: java antlr antlr3 dot

我正在尝试使用antlr为DOT(graphviz)语言编写解析器。 当antlr生成解析树时有一个问题。有些时候,当我用一些输入调试我的语法并跟踪树生成的步骤时,我可以看到一些子树在我的树中完全重复。第一次它带有绿色,但在此之后,相同的子树将带有黑色。 快速看看它似乎发生在我使用“?” (操作员)在我的语法中 如你所知,这个运算符意味着某些标记是可选的。

有没有人知道为什么甚至antlr试图改变树的颜色?!

这是我的语法:

grammar Dot;

options {
    backtrack = true; 
    memoize = true;
    output = AST;
    ASTLabelType = CommonTree;
}

tokens {
    // operators and special characters
    O_BRACKET = '{';
    C_BRACKET = '}';
    O_SQR_BRACKET = '[';
    C_SQR_BRACKET = ']';
    SEMI_COLON = ';';
    EQUAL = '=';
    COMMA = ',';
    COLON = ':';
    LPAREN = '(';
    RPAREN = ')';

    // case-insensitive keywords
    GRAPH;
    DIGRAPH;
    STRICT;
    NODE;
    EDGE;
    SUBGRAPH;

    // tokens from imaginary nodes
    GRAPH_ROOT;
    SUBGRAPH_ROOT;
    STMT_LIST;
    EDGE_STMT;
    NODE_STMT;
    ATTR_LIST;
    ATTR;
}

@header {
package org.cesta.parsers.dot;

import java.util.logging.Logger;
}

@members {
    private boolean hasErrors = false;
    private Logger logger = Logger.getLogger(this.getClass().getName());

    public void setLogger(Logger newLogger){
        logger = newLogger;
    }

    @Override
    public void emitErrorMessage(String message) {
        hasErrors = true;
    if (logger!=null) logger.warning(message);
        super.emitErrorMessage(message);
    }

    public boolean hasErrors(){
        return hasErrors;
    }
}

@lexer::header {
package org.cesta.parsers.dot;
}

@lexer::members {
}

graph
    :   
        graphModifier ID? O_BRACKET stmt_list C_BRACKET 
        -> ^(GRAPH_ROOT graphModifier ID? stmt_list)
    ;

graphModifier
        :   
            (STRICT)? (GRAPH | DIGRAPH)
        ;

stmt_list
    :  stmt (SEMI_COLON* stmt)* SEMI_COLON* -> ^(STMT_LIST stmt+)
    ;

stmt
    :  
        attr_stmt |
        edge_stmt  |
        subgraph |
        ID EQUAL ID  -> ^(ATTR ID EQUAL ID) |
         node_stmt 
    ;

attr_stmt
    :  (GRAPH^ | NODE^ | EDGE^) (attr_list)
    ;

attr_list
    :   
        (O_SQR_BRACKET a_list? C_SQR_BRACKET)*
        -> ^(ATTR_LIST a_list*)
    ;

a_list
    :  (attr COMMA!?)+ 
    ;

attr
    :
        ID (EQUAL ID)? -> ^(ATTR ID (EQUAL ID)?)
    ;

edge_stmt
    :  
         node_subgraph edgeRHS attr_list?
        -> ^(EDGE_STMT node_subgraph edgeRHS attr_list?)
    ;

node_subgraph
    :   
    (node_id | subgraph)
    ;

edgeRHS
    :  EDGEOP^ (node_id | subgraph) (edgeRHS)?
    ;

node_stmt
    :  node_id (attr_list)? -> ^(NODE_STMT node_id attr_list?)
    ;

node_id    
    :  ID^ (port)?
    ;

port
    : 
        COLON! ID (COLON! VALIDSTR)? |
        COLON! VALIDSTR
    ;

subgraph
    :  
        O_BRACKET stmt_list? C_BRACKET 
            -> ^(SUBGRAPH_ROOT stmt_list?)

        |(SUBGRAPH O_BRACKET) => 
            SUBGRAPH ID? O_BRACKET stmt_list? C_BRACKET 
            -> ^(SUBGRAPH_ROOT ID? stmt_list?) 

        | SUBGRAPH ID? O_BRACKET stmt_list? C_BRACKET 
            -> ^(SUBGRAPH_ROOT ID? stmt_list?) 
        |
        SUBGRAPH ID 
            -> ^(SUBGRAPH_ROOT ID)
    ;

// LEXER

// case-insensitive keywords
GRAPH: G R A P H;
DIGRAPH: D I G R A P H;
STRICT: S T R I C T;
NODE: N O D E;
EDGE: E D G E;
SUBGRAPH: S U B G R A P H;

EDGEOP : '->' | '--';

ID
    :  (  VALIDSTR
        | NUMBER
        | QUOTEDSTR
        | HTMLSTR
       );



fragment ALPHACHAR 
    :  (   'a'..'z'
        |  'A'..'Z'
        |  '_'
       );


fragment VALIDSTR
    :  ALPHACHAR
        (  ALPHACHAR
         |  '0'..'9'
        )*
    ;

fragment NUMBER
    :  ('-')? ('0'..'9')+ ('.' ('0'..'9')+)?
    ;

fragment QUOTEDSTR
    :  '"'
        STR
       '"'
    ;

fragment STR
    :
        (ESCAPE_SEQUENCE | ~('\\'|'"') )*
    ;

fragment ESCAPE_SEQUENCE
    :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
    ;

fragment HTMLSTR
    :  '<' (~'>')* '>'
    ;

WS  :  (' '|'\t')+
    {
        $channel = HIDDEN;
    }
    ;

NEWLINE  :  ('\r' '\n'|'\r'|'\n'|'\u000C')
    {
        $channel = HIDDEN;
    }
    ;

COMMENT
    :   '/*' ( options {greedy=false;} : . )* '*/'
    {
        $channel = HIDDEN;
    }
    ;

LINE_COMMENT
    : '//' ~('\n'|'\r')* '\r'? '\n'
    {
        $channel = HIDDEN;
    }
    ;

fragment A:('a'|'A');
fragment B:('b'|'B');
fragment C:('c'|'C');
fragment D:('d'|'D');
fragment E:('e'|'E');
fragment F:('f'|'F');
fragment G:('g'|'G');
fragment H:('h'|'H');
fragment I:('i'|'I');
fragment J:('j'|'J');
fragment K:('k'|'K');
fragment L:('l'|'L');
fragment M:('m'|'M');
fragment N:('n'|'N');
fragment O:('o'|'O');
fragment P:('p'|'P');
fragment Q:('q'|'Q');
fragment R:('r'|'R');
fragment S:('s'|'S');
fragment T:('t'|'T');
fragment U:('u'|'U');
fragment V:('v'|'V');
fragment W:('w'|'W');
fragment X:('x'|'X');
fragment Y:('y'|'Y');
fragment Z:('z'|'Z');

0 个答案:

没有答案