ANTLR 3错误,输入不匹配,但有什么不对?

时间:2014-07-06 19:41:58

标签: parsing input antlr3 lexer

我有以下问题: 我的ANTLR 3语法编译,但我的简单测试程序不起作用。语法如下:     语法Rietse;

options {
    k=1;
    language=Java;
    output=AST;
}

tokens {
    COLON       =   ':'     ;
    SEMICOLON   =   ';'     ;
    OPAREN      =   '('     ;
    CPAREN      =   ')'     ;
    COMMA       =   ','     ;
    OCURLY      =   '{'     ;
    CCURLY      =   '}'     ;
    SINGLEQUOTE =   '\''    ;

    // operators
    BECOMES     =   '='     ;
    PLUS        =   '+'     ;
    MINUS       =   '-'     ;
    TIMES       =   '*'     ;
    DIVIDE      =   '/'     ;
    MODULO      =   '%'     ;

    EQUALS      =   '=='    ;
    LT          =   '<'     ;
    LTE         =   '<='    ;
    GT          =   '>'     ;
    GTE         =   '>='    ;
    UNEQUALS    =   '!='    ;
    AND         =   '&&'    ;
    OR          =   '||'    ;
    NOT         =   '!'     ;

    // keywords
    PROGRAM     =   'program'               ;
    COMPOUND    =   'compound'              ;
    UNARY       =   'unary'                 ;
    DECL        =   'decl'                  ;
    SDECL       =   'sdecl'                 ;
    STATIC      =   'static'                ;
    PRINT       =   'print'                 ;
    READ        =   'read'                  ;
    IF          =   'if'                    ;
    THEN        =   'then'                  ;
    ELSE        =   'else'                  ;
    DO          =   'do'                    ;
    WHILE       =   'while'                 ;

    // types
    INTEGER     =   'int'                   ;
    CHAR        =   'char'                  ;
    BOOLEAN     =   'boolean'               ;
    TRUE        =   'true'                  ;
    FALSE       =   'false'                 ;
}

@lexer::header {
package Eindopdracht;
}

@header {
package Eindopdracht;
}

// Parser rules

program
    :   program2 EOF
            ->  ^(PROGRAM program2)
    ;

program2
    :   (declaration* statement)+
    ;

declaration
    :   STATIC type IDENTIFIER SEMICOLON -> ^(SDECL type IDENTIFIER)
    |   type IDENTIFIER SEMICOLON -> ^(DECL type IDENTIFIER)
    ;

type
    :   INTEGER
    |   CHAR
    |   BOOLEAN
    ;

statement
    :   assignment_expr SEMICOLON!
    |   while_stat SEMICOLON!
    |   print_stat SEMICOLON!
    |   if_stat SEMICOLON!
    |   read_stat SEMICOLON!
    ;

while_stat
    :   WHILE^ OPAREN! or_expr CPAREN! OCURLY! statement+ CCURLY!  // while (expression) {statement+}
    ;

print_stat
    :   PRINT^ OPAREN! or_expr (COMMA! or_expr)* CPAREN!          // print(expression)
    ;

read_stat
    :   READ^ OPAREN! IDENTIFIER (COMMA! IDENTIFIER)+ CPAREN!   // read(expression)
    ;

if_stat
    :   IF^ OPAREN! or_expr CPAREN! comp_expr (ELSE! comp_expr)?     // if (expression) compound else compound
    ;

assignment_expr
    :   or_expr (BECOMES^ or_expr)*
    ;

or_expr
    :   and_expr (OR^ and_expr)*
    ;

and_expr
    :   compare_expr (AND^ compare_expr)*
    ;

compare_expr
    :   plusminus_expr ((LT|LTE|GT|GTE|EQUALS|UNEQUALS)^ plusminus_expr)?
    ;

plusminus_expr
    :   timesdivide_expr ((PLUS | MINUS)^ timesdivide_expr)*
    ;

timesdivide_expr
    :   unary_expr ((TIMES | DIVIDE | MODULO)^ unary_expr)*
    ;

unary_expr
    :   operand
    |   PLUS operand -> ^(UNARY PLUS operand)
    |   MINUS operand -> ^(UNARY MINUS operand)
    |   NOT operand -> ^(UNARY NOT operand)
    ;

operand
    :   TRUE
    |   FALSE
    |   charliteral
    |   IDENTIFIER
    |   NUMBER
    |   OPAREN! or_expr CPAREN!
    ;

comp_expr
    :   OCURLY program2 CCURLY -> ^(COMPOUND program2)
    ;

// Lexer rules

charliteral
    :   SINGLEQUOTE! LETTER SINGLEQUOTE!
    ;

IDENTIFIER
    :   LETTER (LETTER | DIGIT)*
    ;

NUMBER
    :   DIGIT+
    ;

COMMENT
    :   '//' .* '\n' 
            { $channel=HIDDEN; }
    ;

WS
    :   (' ' | '\t' | '\f' | '\r' | '\n')+
            { $channel=HIDDEN; }
    ;

fragment DIGIT  :   ('0'..'9') ;
fragment LOWER  :   ('a'..'z') ;
fragment UPPER  :   ('A'..'Z') ;
fragment LETTER :   LOWER | UPPER ;

// EOF

然后我使用以下java文件来测试程序:     包裹包装;

import java.io.FileInputStream;
import java.io.InputStream;

import org.antlr.runtime.ANTLRInputStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.BufferedTreeNodeStream;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.DOTTreeGenerator;
import org.antlr.runtime.tree.TreeNodeStream;
import org.antlr.stringtemplate.StringTemplate;

public class Rietse {
    public static void main (String[] args)
    {
        String inputFile = args[0];
        try {
            InputStream in = inputFile == null ? System.in : new FileInputStream(inputFile);
            RietseLexer lexer = new RietseLexer(new ANTLRInputStream(in));
            CommonTokenStream tokens = new CommonTokenStream(lexer);
            RietseParser parser = new RietseParser(tokens);

            RietseParser.program_return result = parser.program();

        } catch (RietseException e) {
            System.err.print("ERROR: RietseException thrown by compiler: ");
            System.err.println(e.getMessage());
        } catch (RecognitionException e) {
            System.err.print("ERROR: recognition exception thrown by compiler: ");
            System.err.println(e.getMessage());
            e.printStackTrace();
        } catch (Exception e) {
            System.err.print("ERROR: uncaught exception thrown by compiler: ");
            System.err.println(e.getMessage());
            e.printStackTrace();
        }
    }
}

最后,测试程序本身:

print('a');

现在,当我运行此操作时,我收到以下错误:

line 1:7 mismatched input 'a' expecting LETTER
line 1:9 mismatched input ')' expecting LETTER

我不知道是什么导致了这个错误。我尝试过几处改变,但没有修复它。这里有没有人知道我的代码有什么问题以及如何解决它? 非常感谢每一点帮助,提前谢谢。

问候, 璃园

2 个答案:

答案 0 :(得分:1)

使用规则:

CHARLITERAL
:   SINGLEQUOTE (LETTER | DIGIT) SINGLEQUOTE
;

并将操作数更改为:

operand
:   TRUE
|   FALSE
|   CHARLITERAL
|   IDENTIFIER
|   NUMBER
|   OPAREN! or_expr CPAREN!
;

将解决问题。它确实给出了在AST中具有单引号的问题,但是可以通过使用

更改节点的文本来任意修复
setText(String);

方法

答案 1 :(得分:0)

charliteral变为词法分析器规则(将其重命名为CHARLITERAL)。现在,字符串'a'被标记为:SINGLEQUOTE IDENTIFIER SINGLEQUOTE,因此您获得IDENTIFIER而不是LETTER

我想知道如果您使用解析器规则中的片段(LETTER),此代码可以完全编译。