ANTLR4中的动态运算符标记

时间:2015-04-27 11:27:51

标签: java parsing token antlr4

我试图在ANTLR4中制作一个可以使用几乎所有可能的符号作为数学运算符的计算器。
混凝土:
- 用户定义由运算符和优先级组成的运算。除了一些系统符号(括号,逗号,......)之外,运算符可以是符号的任意组合。优先顺序是正整数。操作存储在java HashMap中 - 有三种不同的操作:左侧(一元减号,......),右侧(因子,...)和二元(加法,......)
- 应在运行时请求操作,以便在解析期间(de)激活操作。如果无法做到这一点,则应在创建解析器时请求运算符 - 对于优先级:优选完全动态优先级(在运行时请求遇到的操作的优先级),但如果不可能则应该有不同的优先级预设。 (乘法,加法......)

我得到了什么:
- 操作员识别的工作代码
- 优先级爬升代码,它生成一个正确的解析树,但会出错:rule expr failed predicate:(getPrecedence($ op)> = $ _p)?

更新:修复了操作员识别码,找到了优先爬升机制的代码

tokens { PREOP, POSTOP, BINOP, ERROR }
@lexer::members {

    private static List<String> binaryOperators;
    private static List<String> prefixOperators;
    private static List<String> postfixOperators;
    {
        binaryOperators = new ArrayList<String>();
        binaryOperators.add("+");
        binaryOperators.add("*");
        binaryOperators.add("-");
        binaryOperators.add("/");

        prefixOperators = new ArrayList<String>();
        prefixOperators.add("-");

        postfixOperators = new ArrayList<String>();
        postfixOperators.add("!");
    }

    private Deque<Token> deque = new LinkedList<Token>();
    private Token previousToken;
    private Token nextToken;

    @Override
    public Token nextToken() {
        if (!deque.isEmpty()) {
            return previousToken = deque.pollFirst();
        }

        Token next = super.nextToken();
        if (next.getType() != SYMBOL) {
            return previousToken = next;
        }

        StringBuilder builder = new StringBuilder();
        while (next.getType() == SYMBOL) {
            builder.append(next.getText());
            next = super.nextToken();
        }
        deque.addLast(nextToken = next);

        List<Token> tokens = findOperatorCombination(builder.toString(), getOperatorType());
        for (int i = tokens.size() - 1; i >= 0; i--) {
            deque.addFirst(tokens.get(i));
        }
        return deque.pollFirst();       
    }


    private static List<Token> findOperatorCombination(String sequence, OperatorType type) {
        switch (type) {
        case POSTFIX:
            return getPostfixCombination(sequence);
        case PREFIX:
            return getPrefixCombination(sequence);
        case BINARY:
            return getBinaryCombination(sequence);
        default:
            break;
        }
        return null;
    }

    private static List<Token> getPrefixCombination(String sequence) {
        if (isPrefixOperator(sequence)) {
            List<Token> seq = new ArrayList<Token>(1);
            seq.add(0, new CommonToken(MathParser.PREOP, sequence));
            return seq;
        }
        if (sequence.length() <= 1) {
            return null;
        }

        for (int i = 1; i < sequence.length(); i++) {
            List<Token> seq1 = getPrefixCombination(sequence.substring(0, i));
            List<Token> seq2 = getPrefixCombination(sequence.substring(i, sequence.length()));
            if (seq1 != null & seq2 != null) {
                seq1.addAll(seq2);
                return seq1;
            }
        }
        return null;
    }

    private static List<Token> getPostfixCombination(String sequence) {
        if (isPostfixOperator(sequence)) {
            List<Token> seq = new ArrayList<Token>(1);
            seq.add(0, new CommonToken(MathParser.POSTOP, sequence));
            return seq;
        }
        if (sequence.length() <= 1) {
            return null;
        }

        for (int i = 1; i < sequence.length(); i++) {
            List<Token> seq1 = getPostfixCombination(sequence.substring(0, i));
            List<Token> seq2 = getPostfixCombination(sequence.substring(i, sequence.length()));
            if (seq1 != null && seq2 != null) {
                seq1.addAll(seq2);
                return seq1;
            }
        }
        return null;
    }


    private static List<Token> getBinaryCombination(String sequence) {
        for (int i = 0; i < sequence.length(); i++) { // i is number of postfix spaces
            for (int j = 0; j < sequence.length() - i; j++) { // j is number of prefix spaces
                String seqPost = sequence.substring(0, i);
                List<Token> post = getPostfixCombination(seqPost);

                String seqPre = sequence.substring(sequence.length()-j, sequence.length());
                List<Token> pre = getPrefixCombination(seqPre);

                String seqBin = sequence.substring(i, sequence.length()-j);

                if ((post != null || seqPost.isEmpty()) && 
                    (pre != null || seqPre.isEmpty()) && 
                    isBinaryOperator(seqBin)) {
                    List<Token> res = new ArrayList<Token>();
                    if (post != null)
                        res.addAll(post);
                    res.add(new CommonToken(MathParser.BINOP, seqBin));
                    if (pre != null)
                        res.addAll(pre);
                    return res;
                }
            }
        }
        return null;
    }


    /**
     * Returns the expected operator type based on the previous and next token
     */
    private OperatorType getOperatorType() {
        if (isValueEnd(previousToken.getType())) {
            if (isValueStart(nextToken.getType())) {
                return OperatorType.BINARY;
            }
            return OperatorType.POSTFIX;
        }
        return OperatorType.PREFIX;
    }
    private enum OperatorType { BINARY, PREFIX, POSTFIX };


    /**
     * Checks whether the given token is a token found at the start of value elements
     * @param tokenType
     * @return
     */
    private static boolean isValueStart(int tokenType) {
        return tokenType == MathParser.INT;

    }
    /**
     * Checks whether the given token is a token found at the end of value elements
     * @param tokenType
     * @return
     */
    private static boolean isValueEnd(int tokenType) {
        return tokenType == MathParser.INT;

    }

    private static boolean isBinaryOperator(String operator) {
        return binaryOperators.contains(operator);
    }
    private static boolean isPrefixOperator(String operator) {
        return prefixOperators.contains(operator);
    }
    private static boolean isPostfixOperator(String operator) {
        return postfixOperators.contains(operator);
    }
}

优先攀登代码:

@parser::members {
    static Map<String, Integer> precedenceMap = new HashMap<String, Integer>();
    static {
        precedenceMap.put("*", 2);
        precedenceMap.put("+", 1);
        precedenceMap.put("^", 4);
        precedenceMap.put("-", 3);
        precedenceMap.put("!", 5);
    }
    public static Integer getPrecedence(Token op) {
        return precedenceMap.get(op.getText());
    }
    public static Integer getNextPrecedence(Token op) {
        Integer p = getPrecedence(op);
        if (op.getType() == PREOP)          return p;
        else if (op.getText().equals("^"))  return p;
        else if (op.getType() == BINOP)     return p+1;
        else if (op.getType() == POSTOP)    return p+1;
        throw new IllegalArgumentException(op.getText());
    }
}

prog
    :   expr[0]
    ;


expr [int _p]
    :   aexpr 
        (   {getPrecedence(_input.LT(1)) >= $_p}? op=BINOP expr[getNextPrecedence($op)] 
        |   {getPrecedence(_input.LT(1)) >= $_p}? POSTOP
        )* 
    ;

atom
    :   INT 
    |   '(' expr[0] ')'
    |   op=PREOP expr[getNextPrecedence($op)]
    ;

所以现在问题是如何解决这个谓词失败错误

4 个答案:

答案 0 :(得分:0)

您无法在运行时为Antlr定义优先级/关联性规则。但是,您可以将所有运算符(内置在语言中或用户定义的内容)解析为解析中的单个链表(如ArrayList<>),然后应用您自己的算法来确定优先级和关联性在访客中(或在语法操作中,如果你真的想)。

如果您多次迭代列表,算法本身就不那么难了。例如,您可以首先获取列表中每个运算符的优先级,然后检查具有最高优先级的运算符,查看其右侧或左侧关联,并从那里构建您的第一个(最底部)树节点。继续应用,直到列表为空,你已经构建了自己的“解析树”,但没有解析(你不再使用抽象输入字符串了。)

或者,在运行时,make externals会调用Antlr来编译.g4javac以编译生成的Antlr代码,然后使用反射来调用它。然而,它可能要慢得多,而且可能难以实现。

答案 1 :(得分:0)

可以根据Symbol优先级的某些运行时定义“正确”工作的解析器规则。虽然最初看起来不是一个惯用的选择,但是从解析器中推迟语义分析的标准替代方法会产生一个分辨率很低的解析树 - 这使得这是标准设计规则的合理例外。

在(过度简化的)表单中,解析器规则为:

expr : LParen expr RParen                    # group
     | expr s=Symbol { binary($s) }? expr    # binary
     | expr s=Symbol { postfix($s) }?        # postfix
     | s=Symbol { prefix($s) }? expr         # prefix
     | Int+                                  # value
     ;

为了解决歧义,添加内联谓词:

pageViewController.pageIndex!

对于任何给定的Symbol,单个谓词方法应评估为true。

扩展到多个Symbol字符串会增加一些复杂性(例如,将二进制文件与后缀区分开来后跟一个前缀),但机制基本保持不变。

答案 2 :(得分:0)

我认为你的方法是正确的。我建议遵循语法:

grammar Op;

options {
  superClass=PrecedenceParser;
}

prog :  expr[0] ;

expr[int _p] locals[Token op]:  INT ({$op = _input.LT(1);} {getPrecedence($op) >= $_p}? OP expr[getPrecedence($op)])*;

INT :   ( '0'..'9' )+ ;

OP : '+' | '*'; // all allowed symbols, should be extended

WS  : [ \t\r\n]+ -> skip ; // skip spaces, tabs, newlines

op的规则应包含所有允许的运算符符号。我对+*的限制只是为了简单起见。解析器超类将是:

public abstract class PrecedenceParser extends Parser {

    private Map<String, Integer> precedences;

    public PrecedenceParser(TokenStream input) {
        super(input);
        this.precedences = new HashMap<>();
    }

    public PrecedenceParser putOperator(String op, int p) {
        precedences.put(op, p);
        return this;
    }

    public int getPrecedence(Token operator) {
        Integer p = precedences.get(operator.getText());
        if (p == null) {
            return Integer.MAX_VALUE; 
        } else {
            return p;
        }
    }

}

<强>结果

优先级为{+ : 4, * : 3 }

(prog (expr 1 + (expr 2) * (expr 3 + (expr 4))))

优先级为{+ : 3, * : 4 }

(prog (expr 1 + (expr 2 * (expr 3) + (expr 4))))

从左到右评估这些序列相当于优先评估它们。

此方法适用于较大的运营商集合。 ANTLR4在内部使用此方法进行优先攀爬,但ANTLR使用常量而不是优先级映射(因为它假定优先级在解析器构建时确定)。

答案 3 :(得分:0)

感谢其他贡献者,我找到了一个完整的(实际上相当干净)解决方案来解决我的问题。

操作员匹配:
通过查看遇到的一系列符号之前和之后的标记,可以检测操作符的固定性。之后,应用一种算法,该算法检测符号系列中的有效运算符序列。然后在令牌流中注入这些令牌(在nextToken()中)。 只需确保在SYMBOL定义之前定义所有硬编码令牌。

优先攀登:
实际上这并不是那么难,它与ANTLR4的内部策略完全相同。

grammar Math;


tokens { PREOP, POSTOP, BINOP, ERROR }

@header {
    import java.util.*;
}

@lexer::members {

    private static List<String> binaryOperators;
    private static List<String> prefixOperators;
    private static List<String> postfixOperators;
    {
        binaryOperators = new ArrayList<String>();
        binaryOperators.add("+");
        binaryOperators.add("*");
        binaryOperators.add("-");
        binaryOperators.add("/");
        System.out.println(binaryOperators);

        prefixOperators = new ArrayList<String>();
        prefixOperators.add("-");
        System.out.println(prefixOperators);

        postfixOperators = new ArrayList<String>();
        postfixOperators.add("!");
        System.out.println(postfixOperators);
    }

    private Deque<Token> deque = new LinkedList<Token>();

    private Token previousToken;
    private Token nextToken;

    @Override
    public Token nextToken() {
        if (!deque.isEmpty()) {
            return previousToken = deque.pollFirst();
        }

        Token next = super.nextToken();
        if (next.getType() != SYMBOL) {
            return previousToken = next;
        }

        StringBuilder builder = new StringBuilder();
        while (next.getType() == SYMBOL) {
            builder.append(next.getText());
            next = super.nextToken();
        }
        deque.addLast(nextToken = next);

        List<Token> tokens = findOperatorCombination(builder.toString(), getOperatorType());
        for (int i = tokens.size() - 1; i >= 0; i--) {
            deque.addFirst(tokens.get(i));
        }
        return deque.pollFirst();       
    }


    private static List<Token> findOperatorCombination(String sequence, OperatorType type) {
        switch (type) {
        case POSTFIX:
            return getPostfixCombination(sequence);
        case PREFIX:
            return getPrefixCombination(sequence);
        case BINARY:
            return getBinaryCombination(sequence);
        default:
            break;
        }
        return null;
    }

    private static List<Token> getPrefixCombination(String sequence) {
        if (isPrefixOperator(sequence)) {
            List<Token> seq = new ArrayList<Token>(1);
            seq.add(0, new CommonToken(MathParser.PREOP, sequence));
            return seq;
        }
        if (sequence.length() <= 1) {
            return null;
        }

        for (int i = 1; i < sequence.length(); i++) {
            List<Token> seq1 = getPrefixCombination(sequence.substring(0, i));
            List<Token> seq2 = getPrefixCombination(sequence.substring(i, sequence.length()));
            if (seq1 != null & seq2 != null) {
                seq1.addAll(seq2);
                return seq1;
            }
        }
        return null;
    }

    private static List<Token> getPostfixCombination(String sequence) {
        if (isPostfixOperator(sequence)) {
            List<Token> seq = new ArrayList<Token>(1);
            seq.add(0, new CommonToken(MathParser.POSTOP, sequence));
            return seq;
        }
        if (sequence.length() <= 1) {
            return null;
        }

        for (int i = 1; i < sequence.length(); i++) {
            List<Token> seq1 = getPostfixCombination(sequence.substring(0, i));
            List<Token> seq2 = getPostfixCombination(sequence.substring(i, sequence.length()));
            if (seq1 != null && seq2 != null) {
                seq1.addAll(seq2);
                return seq1;
            }
        }
        return null;
    }


    private static List<Token> getBinaryCombination(String sequence) {
        for (int i = 0; i < sequence.length(); i++) { // i is number of postfix spaces
            for (int j = 0; j < sequence.length() - i; j++) { // j is number of prefix spaces
                String seqPost = sequence.substring(0, i);
                List<Token> post = getPostfixCombination(seqPost);

                String seqPre = sequence.substring(sequence.length()-j, sequence.length());
                List<Token> pre = getPrefixCombination(seqPre);

                String seqBin = sequence.substring(i, sequence.length()-j);

                if ((post != null || seqPost.isEmpty()) && 
                    (pre != null || seqPre.isEmpty()) && 
                    isBinaryOperator(seqBin)) {
                    List<Token> res = new ArrayList<Token>();
                    if (post != null)
                        res.addAll(post);
                    res.add(new CommonToken(MathParser.BINOP, seqBin));
                    if (pre != null)
                        res.addAll(pre);
                    return res;
                }
            }
        }
        return null;
    }


    /**
     * Returns the expected operator type based on the previous and next token
     */
    private OperatorType getOperatorType() {
        if (isAfterAtom()) {
            if (isBeforeAtom()) {
                return OperatorType.BINARY;
            }
            return OperatorType.POSTFIX;
        }
        return OperatorType.PREFIX;
    }
    private enum OperatorType { BINARY, PREFIX, POSTFIX };


    /**
     * Checks whether the current token is a token found at the start of atom elements
     * @return
     */
    private boolean isBeforeAtom() {
        int tokenType = nextToken.getType();
        return tokenType == MathParser.INT || 
                tokenType == MathParser.PLEFT;

    }
    /**
     * Checks whether the current token is a token found at the end of atom elements
     * @return
     */
    private boolean isAfterAtom() {
        int tokenType = previousToken.getType();
        return tokenType == MathParser.INT ||
                tokenType == MathParser.PRIGHT;

    }

    private static boolean isBinaryOperator(String operator) {
        return binaryOperators.contains(operator);
    }
    private static boolean isPrefixOperator(String operator) {
        return prefixOperators.contains(operator);
    }
    private static boolean isPostfixOperator(String operator) {
        return postfixOperators.contains(operator);
    }

}

@parser::members {
    static Map<String, Integer> precedenceMap = new HashMap<String, Integer>();
    static {
        precedenceMap.put("*", 2);
        precedenceMap.put("+", 1);
        precedenceMap.put("^", 4);
        precedenceMap.put("-", 3);
        precedenceMap.put("!", 5);
    }
    public static Integer getPrecedence(Token op) {
        return precedenceMap.get(op.getText());
    }
    public static Integer getNextPrecedence(Token op) {
        Integer p = getPrecedence(op);
        if (op.getType() == PREOP)          return p;
        else if (op.getText().equals("^"))  return p;
        else if (op.getType() == BINOP)     return p+1;
        throw new IllegalArgumentException(op.getText());
    }
}

prog
    :   expr[0]
    ;


expr [int _p]
    :   atom
        (   {getPrecedence(_input.LT(1)) >= $_p}? op=BINOP expr[getNextPrecedence($op)] 
        |   {getPrecedence(_input.LT(1)) >= $_p}? POSTOP
        )* 
    ;

atom
    :   INT 
    |   PLEFT expr[0] PRIGHT
    |   op=PREOP expr[getNextPrecedence($op)]
    ;

INT
    :   ( '0'..'9' )+
    ;

PLEFT   :   '(' ;
PRIGHT  :   ')' ;

WS
    : [ \t\r\n]+ -> skip ; // skip spaces, tabs, newlines

SYMBOL
    :   .
    ;

注意:代码仅作为示例,而不是我的真实代码(将在外部请求运算符和优先级)