Question

我是ANTLR的新人。使用ANTLR编译2个JAVA文件时遇到同样的错误。树木看起来很好，没有缺陷叶子。我找不到任何关于此错误的根本原因的线索。我使用的ANTLR版本是'ANTLR 4'。有谁知道如何解决它？提前谢谢！

1）ANTLR文件是test.g4，如下所示，

grammar test;

// Syntax Specification ==> Context-free Grammar 
pa1: 
     mainClass aClass*;

mainClass: 
    classDeclaration '{' mainDeclaration ('{'body'}'|'{''{'body'}''}') '}';

mainDeclaration:
    'public' 'static' 'void' 'main' '(' 'String''['']' ID ')';

aClass:
    classDeclaration '{' body '}';

classDeclaration:
    'class' ID;

aMethod:
    methodDeclaration'{'body'}';

methodDeclaration:
    type ID'('parameterList')';

body:
    (varDeclaration|statement|expression|aMethod)*;

varDeclaration:
    type ID ';' ;

statement: 
    (ID|arrayElement) '=' (NUM|ID|string|aChar|('new' type)? arrayElement|'new' (type|ID) '('')'|aCall|mathExpression)';';  //(ID|arrayElement) '=' (NUM|ID|string|aChar|arrayElement|aCall|mathExpression|booleanExpression)';';

string:
    '"' .*? '"';

aChar:
    '\''(.?|'+'|'-')'\'';

expression: 
    ID';'|whileExpression|ifExpression|sysPrintExpression|returnExpression;

ifExpression:
    'if''('booleanExpression')' ((varDeclaration|statement|expression)*|'{'(varDeclaration|statement|expression)*'}')
    ('else''if''('booleanExpression')' ((varDeclaration|statement|expression)*|'{'(varDeclaration|statement|expression)*'}'))?  
    ('else'((varDeclaration|statement|expression)*|'{'(varDeclaration|statement|expression)*'}'))?;

whileExpression: 
    'while''('booleanExpression')' '{'(varDeclaration|statement|expression)*'}';

sysPrintExpression: 
    'System''.''out''.''println''('(NUM|arrayElement|aCall)')'';';

returnExpression: 
    'return'(NUM|ID)';';

compExpression: 
    (ID|NUM|mathExpression) COMPOPERATOR (ID|NUM|'('mathExpression')'|'('ID')');

mathExpression: 
    (ID|NUM) (PLUS|MINUS|MULT|DIV)(ID|NUM|('('ID'.'ID'('parameterList')'')'));  

singleBooleanExpression: 
    '!'?('('compExpression')'|compExpression|aCall|ID);//(LOGICALOPERATOR('!'?(compExpression|aCall|ID|string|aChar)))?;    

doubleBooleanExpression: 
    '(''!'?('('compExpression')'|compExpression|aCall|ID)')'LOGICALOPERATOR('(''!'?(compExpression|aCall|ID|string|aChar)')');

booleanExpression:
    singleBooleanExpression|doubleBooleanExpression;    

aCall: 
    (ID|'new'? ID '('')')calling|'('(ID|ID'('')')calling')'calling;

calling:
    '.'(ID('('parameterList')')?);  

parameterList: 
    (NUM|type? ID|aChar|string|mathExpression|aCall)?(','(NUM|type? ID|aChar|mathExpression))*;

arrayElement: 
    ID?'['(ID|NUM)']';

type: 
    'int''['']'|'boolean'|'int'|'char'|ID;  

// Lexer Specification ==> Regular Expressions  
NUM: ('0' | [1-9][0-9]*);
ID: [a-zA-Z_][0-9a-zA-Z_]*;
PLUS : '+' ;
MINUS : '-' ;
MULT : '*' ;
DIV : '/';
COMPOPERATOR: '<'|'>'; 
LOGICALOPERATOR: '=='|'||'|'&&';
WHITESPACE: [ \t\r\n]+  -> skip; 
COMMENT: ('/*'.*?'*/'|'//'~[\r\n|\r|\n]*) -> skip;

2）JAVA file1是MyChar.java，如下所示，

class MyChar{
    public static void main(String[] a){
        {
            System.out.println(new CharEditor().whichIsSmaller('a', 'c'));
            System.out.println(new CharEditor().whichIsSmaller('a', 'A'));
            System.out.println(new CharEditor().whichIsSmaller('1', 'd'));
            System.out.println(new CharEditor().whichIsSmaller('-', '+'));
            // System.out.println("There are total " + new MyChar().countFromCharToChar('a', 'z', true) + " characters in between a and z");
            System.out.println(new CharEditor().countChars("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", 'c'));
        }
    }
}

class CharEditor {

    char whichIsSmaller(char firstChar, char secondChar){
        char returnChar;

        if(secondChar < firstChar) //the comparison is based on the character's ASCII code
        returnChar = firstChar;
        else
        returnChar = secondChar;
        return returnChar;
    }

    int countChars(String str, char c){
        int n;
        int sz;
        char c1;
        int counter;

        counter = 0;

        sz = str.length();
        n = 0;
        while ( n < sz ) {
            c1 = str.charAt(n);
            if ( c1 == c) {
                counter = counter + 1;
            }
            n = n + 1;
        }
        return counter;
    }
}

3）JAVA file2是MyString.java，如下所示，

class MyString{
    public static void main(String[] a){
        {
            System.out.println(new StringEditor().removeSpace("Hello World And Happy Coding"));
            System.out.println(new StringEditor().containsChar("Hello World And Happy Coding", 'd'));
            System.out.println(new StringEditor().containsChar("Hello World And Happy Coding", 'b'));
        }
    }
}

class StringEditor {
    String removeSpace(String str) {
        String toReturn;
        int n;
        int sz;
        char c;

        toReturn = "";
        sz = str.length();
        n = 0;
        while ( n < sz ) {
            c = str.charAt(n);
            if ( c == ' ') {

            } else {
                toReturn = toReturn + c;
            }
            n = n+1;
        }

        return toReturn;
    }

    boolean containsChar(String str, char c) {
        int n;
        int sz;
        char c1;

        boolean toReturn;

        toReturn = false;

        sz = str.length();
        n = 0;
        while ( n < sz ) {
            c1 = str.charAt(n);
            if ( c1 == c) {
                toReturn = true;
                break;
            }
            n = n+1;
        }

        return toReturn;
    }
}

Answer 1

尽管您已经解决了文件2的问题，但我仍然发布了我正在处理的答案。

如果出现问题，首先要做的是显示标记以查看词法分析器如何解释输入。这通常不是我们所相信的。使用文件2：

$ grun Question question -tokens -diagnostics input2.text 
...
[@131,652:653='if',<'if'>,23:12]
[@132,655:655='(',<'('>,23:15]
[@133,657:657='c',<ID>,23:17]
[@134,659:660='==',<COMP_OPERATOR>,23:19]
[@135,662:662=''',<'''>,23:22]
[@136,664:664=''',<'''>,23:24]
[@137,665:665=')',<')'>,23:25]
[@138,667:667='{',<'{'>,23:27]
[@139,682:682='}',<'}'>,25:12]

在陈述23中显示

    if ( c == ' ') {

单个空格字符被解释为两个单独的'，而不是规则

aChar:
    '\'' ( .? | '+' | '-' ) '\'';

因为撇号在此解析器规则aChar中被隐式定义为一个标记，所以空间被

抛弃

WHITESPACE: [ \t\r\n]+  -> skip;

并且没有词法分析器规则强制词法分析器将' '视为一个整体。这就是我将解析器规则aChar更改为词法分析器

的原因

A_CHAR : '\'' ( '+' | '-' | .? ) '\'' ;

现在代币是正确的：

[@127,652:653='if',<'if'>,23:12]
[@128,655:655='(',<'('>,23:15]
[@129,657:657='c',<ID>,23:17]
[@130,659:660='==',<COMP_OPERATOR>,23:19]
[@131,662:664='' '',<A_CHAR>,23:22]
[@132,665:665=')',<')'>,23:25]

另请注意，使用string解析器规则时，"Lorem ipsum dolor sit met ..."的每个单词都被解释为ID，而使用词法分析器规则STRING则会捕获整个句子一个令牌。

另一项改进是应用DRY原则（不要重复自己）。例如，在ifExpression中，表达式(varDeclaration|statement|expression)重复6次。使用子规则可以减少这种情况。

因此语法可能是：

grammar Question;

question
@init {System.out.println("Question last update 1803");}

// Syntax Specification ==> Context-free Grammar 
    :   mainClass aClass*;

mainClass
    :   classDeclaration '{' mainDeclaration declaration_block '}';

mainDeclaration:
    'public' 'static' 'void' 'main' '(' 'String' '[' ']' ID ')';

aClass:
    classDeclaration declaration_block ;

classDeclaration:
    'class' ID;

aMethod:
    methodDeclaration declaration_block ;

methodDeclaration:
    type ID parameterList ;

declaration_block
    :   '{' declarattion_body* '}'
    |   '{' declaration_block '}'
    ;

declarattion_body
    :   statement
    |   aMethod
    ;

statement_block
    :   '{' statement* '}'
    ;

statement
    :   varDeclaration
    |   assignment
    |   statement_expression
    ;

varDeclaration:
    type ID ';' ;

assignment: 
    (ID|arrayElement) '=' (NUM|ID|STRING|A_CHAR|('new' type)? arrayElement|'new' (type|ID) '('')'|aCall|mathExpression)';';  //(ID|arrayElement) '=' (NUM|ID|STRING|A_CHAR|arrayElement|aCall|mathExpression|booleanExpression)';';

statement_expression
    :   ID ';'
    |   whileExpression
    |   ifExpression
    |   sysPrintExpression
    |   returnExpression
    ;

ifExpression
    :   'if' '(' booleanExpression ')' statement_if
        ( 'else' 'if' '(' booleanExpression ')' statement_if )?  
        ( 'else' statement_if )?
    ;

statement_if
    :   statement*
    |   statement_block
    ;

whileExpression
    :   'while' '(' booleanExpression ')' statement_block
    ;

sysPrintExpression: 
    'System''.''out''.''println''('(NUM|arrayElement|aCall)')'';';

returnExpression: 
    'return'(NUM|ID)';';

compExpression
    :   ( ID | NUM | mathExpression ) COMP_OPERATOR ( ID | NUM | A_CHAR | '(' mathExpression ')' | '(' ID ')' ) ;

mathExpression: 
    (ID|NUM) (PLUS|MINUS|MULT|DIV) ( ID | NUM | ( '(' ID '.' ID parameterList ')' ));  

singleBooleanExpression
    :   '!'? ( '(' compExpression ')' | compExpression | aCall | ID ) ; //(LOGICAL_OPERATOR('!'?(compExpression|aCall|ID|STRING|A_CHAR)))?;    

doubleBooleanExpression: 
    '(''!'?('('compExpression')'|compExpression|aCall|ID)')'LOGICAL_OPERATOR('(''!'?(compExpression|aCall|ID|STRING|A_CHAR)')');

booleanExpression
    :   singleBooleanExpression
    |   doubleBooleanExpression
    ;

aCall: 
    (ID|'new'? ID '('')')calling|'('(ID|ID'('')')calling')'calling;

calling:
    '.' ( ID ( parameterList )? ) ;  

parameterList
    :   '('  
        ( NUM | type? ID | A_CHAR | STRING | mathExpression | aCall )? 
        ( ',' ( NUM | type? ID | A_CHAR | mathExpression ) )*
        ')'
    ;

arrayElement: 
    ID? '[' ( ID | NUM ) ']' ;

type
    :   'int' '[' ']'
    |   'boolean'
    |   'int'
    |   'char'
    |   'String' // <----- added
    |   ID;  

// Lexer Specification ==> Regular Expressions  
NUM   : '0' | [1-9][0-9]* ;
ID    : [a-zA-Z_][0-9a-zA-Z_]* ;
PLUS  : '+' ;
MINUS : '-' ;
MULT  : '*' ;
DIV   : '/' ;
COMP_OPERATOR    : '<' | '>' | '==' ;  // <----- changed
LOGICAL_OPERATOR : '||' | '&&' ; // <----- changed
STRING : '"' .*? '"' ;
A_CHAR : '\'' ( '+' | '-' | .? ) '\'' ;
WHITESPACE: [ \t\r\n]+  -> skip; 
//COMMENT: ('/*'.*?'*/'|'//'~[\r\n|\r|\n]*) -> skip; 
/*
warning(180): Question.g4:103:27: chars "" used multiple times in set [\r\n|\r|\n]
warning(180): Question.g4:103:27: chars "|" used multiple times in set [\r\n|\r|\n]
warning(180): Question.g4:103:27: chars "
" used multiple times in set [\r\n|\r|\n]
*/
COMMENT      : '/*' .*? '*/' -> skip ;
LINE_COMMENT : '//' ~[\r\n]* -> skip;

但它仅适用于两个发布的文件。有了这个：

class MyString{
    public static void main(String[] a){
        {
        }
    }
}

public class test {
    public static class UnderlineListener extends BaseErrorListener {
    }
}

它不起作用：

$ grun Question question input3.text 
Question last update 1803
line 8:0 extraneous input 'public' expecting {<EOF>, 'class'}
line 9:1 no viable alternative at input '{public'
line 9:39 mismatched input 'extends' expecting '{'

为现有语言编写语法既令人兴奋又困难。准备好了Java语法from the Antlr site - ＆GT;下载

- ＆GT;源存储库

一切都在github

- ＆GT;附加语法

This repository

ANTLR错误：输入'<eof>'没有可行的替代方案。怎么解决？

1 个答案: