antlr4 / java:相当打印解析树到stdout

时间:2018-04-27 14:04:13

标签: java antlr antlr4 pretty-print

初学者提问:如何将可读版本的解析树打印到stdout?

CharStream input = CharStreams.fromFileName("testdata/test.txt");
MyLexer lexer = new MyLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
MyParser parser = new MyParser(tokens);     
parser.setBuildParseTree(true);
RuleContext tree = parser.record();
System.out.println(tree.toStringTree(parser));

这将整个树打印在由括号'()'分隔的单行上。

(record (husband <4601>   (name KOHAI   Nikolaus) \n (birth *   um.1872   (place Ploschitz)) \n\n) (wife      (marriage oo) \n      (name SCHLOTTHAUER   Maria) \n      (birth *   um.1877  
...

我想有类似的东西

record 
  husband
    <id>
    name
       <name>
...
  wife

4 个答案:

答案 0 :(得分:4)

SnippetsTest中提取为独立的实用程序类:

import java.util.List;

import org.antlr.v4.runtime.misc.Utils;
import org.antlr.v4.runtime.tree.Tree;
import org.antlr.v4.runtime.tree.Trees;

public class TreeUtils {

    /** Platform dependent end-of-line marker */
    public static final String Eol = System.lineSeparator();
    /** The literal indent char(s) used for pretty-printing */
    public static final String Indents = "  ";
    private static int level;

    private TreeUtils() {}

    /**
     * Pretty print out a whole tree. {@link #getNodeText} is used on the node payloads to get the text
     * for the nodes. (Derived from Trees.toStringTree(....))
     */
    public static String toPrettyTree(final Tree t, final List<String> ruleNames) {
        level = 0;
        return process(t, ruleNames).replaceAll("(?m)^\\s+$", "").replaceAll("\\r?\\n\\r?\\n", Eol);
    }

    private static String process(final Tree t, final List<String> ruleNames) {
        if (t.getChildCount() == 0) return Utils.escapeWhitespace(Trees.getNodeText(t, ruleNames), false);
        StringBuilder sb = new StringBuilder();
        sb.append(lead(level));
        level++;
        String s = Utils.escapeWhitespace(Trees.getNodeText(t, ruleNames), false);
        sb.append(s + ' ');
        for (int i = 0; i < t.getChildCount(); i++) {
            sb.append(process(t.getChild(i), ruleNames));
        }
        level--;
        sb.append(lead(level));
        return sb.toString();
    }

    private static String lead(int level) {
        StringBuilder sb = new StringBuilder();
        if (level > 0) {
            sb.append(Eol);
            for (int cnt = 0; cnt < level; cnt++) {
                sb.append(Indents);
            }
        }
        return sb.toString();
    }
}

答案 1 :(得分:2)

除了图形解析树my ANTLR4 extension for Visual Studio Code之外,还会生成格式化的文本解析树:

enter image description here

答案 2 :(得分:1)

如果您只想将正则表达式用于实际用途,则始终可以自己打印树:

import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.Trees;

public static String printSyntaxTree(Parser parser, ParseTree root) {
    StringBuilder buf = new StringBuilder();
    recursive(root, buf, 0, Arrays.asList(parser.getRuleNames()));
    return buf.toString();
}

private static void recursive(ParseTree aRoot, StringBuilder buf, int offset, List<String> ruleNames) {
    for (int i = 0; i < offset; i++) {
        buf.append("  ");
    }
    buf.append(Trees.getNodeText(aRoot, ruleNames)).append("\n");
    if (aRoot instanceof ParserRuleContext) {
        ParserRuleContext prc = (ParserRuleContext) aRoot;
        if (prc.children != null) {
            for (ParseTree child : prc.children) {
                recursive(child, buf, offset + 1, ruleNames);
            }
        }
    }
}

用法:

ParseTree root = parser.yourOwnRule();
System.out.println(printSyntaxTree(parser, root));

答案 3 :(得分:1)

我想利用这个事实,因为我已经在项目中使用了StringTemplate。这意味着我不必像其他答案一样手动处理级别。它还使输出格式更易于自定义。

最重要的是,我发布此消息的主要原因是因为我决定跳过仅“通过”的打印规则,即在使用链式规则时

a : b | something_else ;
b : c | another ;
c : d | yet_more ;
d : rule that matters ;

因为他们在从小输入中检查树时不添加任何有用的信息,却使我的输出混乱。在//pass-through rules评论位置也很容易更改。

我还复制了Trees.getNodeText的定义,并将其修改为使用纯数组来消除不必要的包装,甚至让我自定义它。

最后,我把解析器和树带到了System.out,因为这是我唯一需要的情况。

import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ErrorNode;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.antlr.v4.runtime.tree.Tree;
import org.stringtemplate.v4.ST;

//for pretty-dumping trees in short form
public class TreeUtils {
    private static final ST template() {
        return new ST("<rule_text>\n\t<child; separator=\"\n\">");
    }
    private static final ST literal(String text) {
        return new ST("<text>").add("text", text);
    }

    public static void dump(Parser parser, Tree tree) {
        System.out.println(process(parser.getRuleNames(),tree).render());
    }
    
    private static String getNodeText(Tree t, String[] ruleNames) {
        if ( t instanceof RuleContext ) {
            int ruleIndex = ((RuleContext)t).getRuleContext().getRuleIndex();
            String ruleName = ruleNames[ruleIndex];
            return ruleName;
        }
        else if ( t instanceof ErrorNode) {
            return t.toString();
        }
        else if ( t instanceof TerminalNode) {
            Token symbol = ((TerminalNode)t).getSymbol();
            if (symbol != null) {
                String s = symbol.getText();
                return s;
            }
        }

        Object payload = t.getPayload();
        if ( payload instanceof Token ) {
            return ((Token)payload).getText();
        }
        return t.getPayload().toString();
    }

    private static ST process(String[] ruleNames, Tree t) {
        if(t.getChildCount()==0) {
            return literal(getNodeText(t, ruleNames));
        } else if(t.getChildCount()==1) {
            //pass-through rules
            return process(ruleNames,t.getChild(0));
        } else {
            ST out=template();
            out.add("rule_text", getNodeText(t, ruleNames));
            for(int i=0;i<t.getChildCount();i++) {
                out.add("child", process(ruleNames,t.getChild(i)));
            }
            return out;
        }
    }
}