是否有一个使用antlr4从java源代码创建AST并提取方法,变量和注释的简单示例?

时间:2014-02-03 18:01:15

标签: java antlr

有人可以提供一个详细的例子,告诉我如何使用antlr4做到这一点?安装antlr4及其依赖项的说明将受到高度赞赏。

3 个答案:

答案 0 :(得分:26)

在这里。

首先,你要购买ANTLR4书籍; - )

其次,你将下载antlr4 jar和java语法(http://pragprog.com/book/tpantlr2/the-definitive-antlr-4-reference

然后,您可以稍微更改语法,将其添加到标题

    (...)
grammar Java;

options 
{
    language = Java;
}

// starting point for parsing a java file
compilationUnit
    (...)

我会在语法中改变一点,只是为了说明一些东西。

/*
methodDeclaration
    :   (type|'void') Identifier formalParameters ('[' ']')*
        ('throws' qualifiedNameList)?
        (   methodBody
        |   ';'
        )
    ;
*/
methodDeclaration
    :   (type|'void') myMethodName formalParameters ('[' ']')*
        ('throws' qualifiedNameList)?
        (   methodBody
        |   ';'
        )
    ;

myMethodName
    :   Identifier
    ;

你看,原始语法不允许你从任何其他标识符中识别方法标识符,所以我评论了原始块并添加了一个新块只是为了向你展示如何获得你想要的东西。

您必须对要检索的其他元素执行相同操作,例如当前刚刚跳过的注释。那是给你的: - )

现在,创建一个这样的类来生成所有存根

package mypackage;

public class Gen {

    public static void main(String[] args) {
        String[] arg0 = { "-visitor", "/home/leoks/EclipseIndigo/workspace2/SO/src/mypackage/Java.g4", "-package", "mypackage" };
        org.antlr.v4.Tool.main(arg0);
    }

}

运行Gen,你会在mypackage中为你创建一些java代码。

现在创建一个访问者。实际上,访问者将在此示例中解析自己

package mypackage;

import java.io.FileInputStream;
import java.io.IOException;

import mypackage.JavaParser.MyMethodNameContext;

import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeWalker;

/**
 * @author Leonardo Kenji Feb 4, 2014
 */
public class MyVisitor extends JavaBaseVisitor<Void> {

    /**
     * Main Method
     * 
     * @param args
     * @throws IOException
     */
    public static void main(String[] args) throws IOException {
        ANTLRInputStream input = new ANTLRInputStream(new FileInputStream("/home/leoks/EclipseIndigo/workspace2/SO/src/mypackage/MyVisitor.java")); // we'll
                                                                                                                                                    // parse
                                                                                                                                                    // this
                                                                                                                                                    // file
        JavaLexer lexer = new JavaLexer(input);
        CommonTokenStream tokens = new CommonTokenStream(lexer);
        JavaParser parser = new JavaParser(tokens);
        ParseTree tree = parser.compilationUnit(); // see the grammar ->
                                                    // starting point for
                                                    // parsing a java file



        MyVisitor visitor = new MyVisitor(); // extends JavaBaseVisitor<Void>
                                                // and overrides the methods
                                                // you're interested
        visitor.visit(tree);
    }

    /**
     * some attribute comment
     */
    private String  someAttribute;

    @Override
    public Void visitMyMethodName(MyMethodNameContext ctx) {
        System.out.println("Method name:" + ctx.getText());
        return super.visitMyMethodName(ctx);
    }

}

就是这样。

你会得到像

这样的东西
Method name:main
Method name:visitMyMethodName

PS。还有一件事。当我在eclipse中编写这段代码时,我有一个奇怪的例外。这是由Java 7引起的,只需将这些参数添加到编译器即可修复(感谢此链接http://java.dzone.com/articles/javalangverifyerror-expecting

enter image description here

答案 1 :(得分:2)

grammar Criteria;

@parser::header {
  import java.util.regex.Pattern;
}

options
{
  superClass = ReferenceResolvingParser;
}

@parser::members {

  public CriteriaParser(TokenStream input, Object object) {
    this(input);
    setObject(object);
  }

}

/* Grammar rules */

reference returns [String value]
          : '$.' IDENTIFIER { $value = resolveReferenceValue($IDENTIFIER.text); }
          ;

operand returns [String value]
        : TRUE { $value = $TRUE.text; }
        | FALSE { $value = $FALSE.text; }
        | DECIMAL { $value = $DECIMAL.text; }
        | QUOTED_LITERAL  { $value = $QUOTED_LITERAL.text.substring(1, $QUOTED_LITERAL.text.length() - 1); }
        | reference { $value = $reference.value; }
        ;

operand_list returns [List value]
             @init{ $value = new ArrayList(); }
             : LBPAREN o=operand { $value.add($o.value); } (',' o=operand { $value.add($o.value); })* RBPAREN
             ;

comparison_expression returns [boolean value]
                      : lhs=operand NEQ rhs=operand { $value = !$lhs.value.equals($rhs.value); }
                      | lhs=operand EQ rhs=operand { $value = $lhs.value.equals($rhs.value); }
                      | lhs=operand GT rhs=operand { $value = $lhs.value.compareTo($rhs.value) > 0; }
                      | lhs=operand GE rhs=operand { $value = $lhs.value.compareTo($rhs.value) >= 0; }
                      | lhs=operand LT rhs=operand { $value = $lhs.value.compareTo($rhs.value) < 0; }
                      | lhs=operand LE rhs=operand { $value = $lhs.value.compareTo($rhs.value) <= 0; }
                      ;

in_expression returns [boolean value]
              : lhs=operand IN rhs=operand_list { $value = $rhs.value.contains($lhs.value); };

rlike_expression returns [boolean value]
                 : lhs=operand RLIKE rhs=QUOTED_LITERAL { $value = Pattern.compile($rhs.text.substring(1, $rhs.text.length() - 1)).matcher($lhs.value).matches(); }
                 ;

logical_expression returns [boolean value]
                   : c=comparison_expression { $value = $c.value; }
                   | i=in_expression { $value = $i.value; }
                   | l=rlike_expression { $value = $l.value; }
                   ;

chained_expression returns [boolean value]
                   : e=logical_expression { $value = $e.value; } (OR  c=chained_expression { $value |= $c.value; })?
                   | e=logical_expression { $value = $e.value; } (AND c=chained_expression { $value &= $c.value; })?
                   ;

grouped_expression returns [boolean value]
                   : LCPAREN c=chained_expression { $value = $c.value; } RCPAREN ;

expression returns [boolean value]
           : c=chained_expression { $value = $c.value; } (OR  e=expression { $value |= $e.value; })?
           | c=chained_expression { $value = $c.value; } (AND e=expression { $value &= $e.value; })?
           | g=grouped_expression { $value = $g.value; } (OR  e=expression { $value |= $e.value; })?
           | g=grouped_expression { $value = $g.value; } (AND e=expression { $value &= $e.value; })?
           ;

criteria returns [boolean value]
         : e=expression { $value = $e.value; }
         ;


/* Lexical rules */

AND : 'and' ;
OR  : 'or' ;

TRUE  : 'true' ;
FALSE : 'false' ;

EQ    : '=' ;
NEQ   : '<>' ;
GT    : '>' ;
GE    : '>=' ;
LT    : '<' ;
LE    : '<=' ;
IN    : 'in' ;
RLIKE : 'rlike' ;

LCPAREN : '(' ;
RCPAREN : ')' ;
LBPAREN : '[' ;
RBPAREN : ']' ;

DECIMAL : '-'?[0-9]+('.'[0-9]+)? ;

IDENTIFIER : [a-zA-Z_][a-zA-Z_.0-9]* ;

QUOTED_LITERAL :
                 (  '\''
                    ( ('\\' '\\') | ('\'' '\'') | ('\\' '\'') | ~('\'') )*
                 '\''  )
                ;

WS : [ \r\t\u000C\n]+ -> skip ;



public class CriteriaEvaluator extends CriteriaBaseListener
{

    static class CriteriaEvaluatorErrorListener extends BaseErrorListener
    {

        Optional<String> error = Optional.empty();

        @Override
        public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) {
            error = Optional.of(String.format("Failed to parse at line %d:%d due to %s", line, charPositionInLine + 1, msg));
        }

    }

    public static boolean evaluate(String input, Object argument)
    {
        CriteriaLexer lexer = new CriteriaLexer(new ANTLRInputStream(input));
        CriteriaParser parser = new CriteriaParser(new CommonTokenStream(lexer), argument);
        parser.removeErrorListeners();
        CriteriaEvaluatorErrorListener errorListener = new CriteriaEvaluatorErrorListener();
        lexer.removeErrorListeners();
        lexer.addErrorListener(errorListener);
        parser.removeErrorListeners();
        parser.addErrorListener(errorListener);
        CriteriaParser.CriteriaContext criteriaCtx = parser.criteria();
        if(errorListener.error.isPresent())
        {
            throw new IllegalArgumentException(errorListener.error.get());
        }
        else
        {
            return criteriaCtx.value;
        }
    }

}

答案 2 :(得分:-2)

这是一个详细的示例((从https://github.com/satnam-sandhu/ASTGenerator借来的),我做了一些更改以获取行号。

helloworld.java

public class HelloWorld {
public static void main(String[] args) {
    System.out.println("Hello, World");
    }
}

JavaAstGeneratorDOT.java

import antlr.Java8Lexer;
import antlr.Java8Parser;

import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.misc.Interval;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.util.ArrayList;


public class JavaAstGeneratorDOT {

    static ArrayList<String> LineNum = new ArrayList<String>();
    static ArrayList<String> Type = new ArrayList<String>();
    static ArrayList<String> Content = new ArrayList<String>();
    static ArrayList<String> RawLineNum = new ArrayList<String>();

    private static String readFile(String pathname) throws IOException {
        File file = new File(pathname);
        byte[] encoded = Files.readAllBytes(file.toPath());
        return new String(encoded, Charset.forName("UTF-8"));
    }

    public static void main(String args[]) throws IOException {
        String path = "helloworld.java";
        String inputString = readFile(path);
        ANTLRInputStream input = new ANTLRInputStream(inputString);
        Java8Lexer lexer = new Java8Lexer(input);
        CommonTokenStream tokens = new CommonTokenStream(lexer);
        Java8Parser parser = new Java8Parser(tokens);
        ParserRuleContext ctx = parser.compilationUnit();
//      ParserRuleContext ctx = parser.statementExpressionList();
//      ParserRuleContext ctx = parser.methodDeclaration();

        generateAST(ctx, false, 0, tokens);
        String filename = path.substring(path.lastIndexOf("\\") + 1, path.lastIndexOf("."));
        String save_dot_filename = String.format("ast_%s.dot", filename);
        PrintWriter writer = new PrintWriter(save_dot_filename);
        writer.println(String.format("digraph %s {", filename));
        printDOT(writer);
        writer.println("}");
        writer.close();
    }

    private static void generateAST(RuleContext ctx, boolean verbose, int indentation, CommonTokenStream tokens) {
        boolean toBeIgnored = !verbose && ctx.getChildCount() == 1 && ctx.getChild(0) instanceof ParserRuleContext;
        if (!toBeIgnored) {
            String ruleName = Java8Parser.ruleNames[ctx.getRuleIndex()];
            LineNum.add(Integer.toString(indentation));
            Type.add(ruleName);
            Content.add(ctx.getText());

            // get line number, added by tsmc.sumihui, 20190425
            Interval sourceInterval = ctx.getSourceInterval();
            Token firstToken = tokens.get(sourceInterval.a);
            int lineNum = firstToken.getLine();
            RawLineNum.add(Integer.toString(lineNum));
        }
        for (int i = 0; i < ctx.getChildCount(); i++) {
            ParseTree element = ctx.getChild(i);
            if (element instanceof RuleContext) {
                generateAST((RuleContext) element, verbose, indentation + (toBeIgnored ? 0 : 1), tokens);
            }
        }
    }

    private static void printDOT(PrintWriter writer) {
        printLabel(writer);
        int pos = 0;
        for (int i = 1; i < LineNum.size(); i++) {
            pos = getPos(Integer.parseInt(LineNum.get(i)) - 1, i);
            writer.println((Integer.parseInt(LineNum.get(i)) - 1) + Integer.toString(pos) + "->" + LineNum.get(i) + i);
        }
    }

    private static void printLabel(PrintWriter writer) {
        for (int i = 0; i < LineNum.size(); i++) {
//          writer.println(LineNum.get(i)+i+"[label=\""+Type.get(i)+"\\n "+Content.get(i)+" \"]");
            writer.println(LineNum.get(i) + i + "[label=\"" + Type.get(i) + "\", linenum=\"" + RawLineNum.get(i) + "\"]");
        }
    }

    private static int getPos(int n, int limit) {
        int pos = 0;
        for (int i = 0; i < limit; i++) {
            if (Integer.parseInt(LineNum.get(i)) == n) {
                pos = i;
            }
        }
        return pos;
    }
}

结果是这样的( ast_helloworld.dot ):

digraph helloworld {
00[label="compilationUnit", linenum="1"]
11[label="normalClassDeclaration", linenum="1"]
22[label="classModifier", linenum="1"]
23[label="classBody", linenum="1"]
34[label="methodDeclaration", linenum="2"]
45[label="methodModifier", linenum="2"]
46[label="methodModifier", linenum="2"]
47[label="methodHeader", linenum="2"]
58[label="result", linenum="2"]
59[label="methodDeclarator", linenum="2"]
610[label="formalParameter", linenum="2"]
711[label="unannArrayType", linenum="2"]
812[label="unannClassType_lfno_unannClassOrInterfaceType", linenum="2"]
813[label="dims", linenum="2"]
714[label="variableDeclaratorId", linenum="2"]
415[label="block", linenum="2"]
516[label="expressionStatement", linenum="3"]
617[label="methodInvocation", linenum="3"]
718[label="typeName", linenum="3"]
819[label="packageOrTypeName", linenum="3"]
720[label="literal", linenum="3"]
00->11
11->22
11->23
23->34
34->45
34->46
34->47
47->58
47->59
59->610
610->711
711->812
711->813
610->714
34->415
415->516
516->617
617->718
718->819
617->720
}