不能只用一个字母创建变量

时间:2019-04-19 14:08:24

标签: antlr antlr4

我希望变量的名称中只能包含一个字母。 当我写Integer aa时;所有的工作,但 当我输入整数a时;然后格伦说:输入ID“ a”与期望ID不匹配。 我见过反问题,但没有帮助。我认为我的代码是正确的,但看不到哪里错了。这是我的词法分析器:

lexer grammar Symbols;

WS: [ \t\n\r] -> skip ;

INCR: '++' ;
DECR: '--' ;
ASSIGNMENT: '=' ;
ADD: '+' ;
ADD_ASSIGNMENT: '+=' ;
SUB: '-' ;
SUB_ASSIGNMENT: '-=' ;
MUL: '*' ;
MUL_ASSIGNMENT: '*=' ;
DIV: '/' ;
DIV_ASSIGNMENT: '/=' ;
MOD: '%' ;
MOD_ASSIGNMENT: '%=' ;
EQEQ: '==' ;
EXCL_EQ: '!=' ;
SEMI: ';' -> skip;
COLON: ':' ;
COLONCOLON: '::' ;
ARROW: '->' ;
COMMA: ',' ;
DOT: '.' ;
AND: '&&' ;
OR: '||' ;
NOT: '!' ;
RANGLE: '>' ;
GE: '>=' ;
LANGLE: '<' ;
LE: '<=' ;
LPAREN: '(' ;
RPAREN: ')' ;
LSQUARE: '[' ;
RSQUARE: ']' ;
LCURL: '{' ;
RCURL: '}' ;
UNDERSCORE: '_' ;
QUEST: '?' ;

//Keywords
IF: 'if' ;
ELIF: 'elif' ;
ELSE: 'else' ;
DO: 'do' ;
WHILE: 'while' ;
FOR: 'for' ;
CLASS: 'class' ;
SWITCH: 'switch' ;
CASE: 'case' ;
DEFAULT: 'default' ;
FINALLY: 'finally' ;
TRY: 'try' ;
CATCH: 'catch' ;
THROW: 'throw' ;
RETURN: 'return' ;
IS: 'is' ;
OF: 'of' ;
NEW: 'new' ;
OVERRIDE: 'override' ;
ENUM: 'enum' ;
EXTENDS: 'extends' ;
NULL: 'null' ;
THIS: 'this' ;
SUPER: 'super' ;
TRUE: 'true' ;
FALSE: 'false' ;
VOID: 'void' ;
CONSTRUCTOR: 'constructor' ;
OPERATOR: 'operator' ;
IMPORT: 'import' ;
LAMBDA: 'lambda' ;

//Modifiers
ABSTRACT: 'abstract' ;
FINAL: 'final' ;
STATIC: 'static' ;
PUBLIC: 'public' ;
PRIVATE: 'private' ;
PROTECTED: 'protected' ;

Operators
    : ADD
    | SUB
    | MUL
    | DIV
    | MOD
    | ADD_ASSIGNMENT
    | SUB_ASSIGNMENT
    | MUL_ASSIGNMENT
    | DIV_ASSIGNMENT
    | MOD_ASSIGNMENT
    | ASSIGNMENT
    | INCR
    | DECR
    ;

LineComment: '//' ~[\u000A\u000D]* -> channel(HIDDEN) ;
DelimetedComment: '/*' .*? '*/' -> channel(HIDDEN) ;

String: '"' .*? '"' ;
Character: '\'' (EscapeSeq | .) '\'' ;
IntegerLiteral: '0' | (ADD?| SUB) DecDigitNoZero DecDigit+ ;
FloatLiteral: ((ADD? | SUB) (DecDigitNoZero DecDigit*)? DOT DecDigit+ | IntegerLiteral) [F] ;
DoubleLiteral: ((ADD? | SUB) (DecDigitNoZero DecDigit*)? DOT DecDigit+ | IntegerLiteral) [D]  ;
LongLiteral: IntegerLiteral [L] ;
HexLiteral: '0' [xX] HexDigit (HexDigit | UNDERSCORE)* ;
BinLiteral: '0' [bB] BinDigit (BinDigit | UNDERSCORE)* ;
OctLiteral: '0' [cC] OctDigit (OctDigit | UNDERSCORE)* ;
Booleans: TRUE | FALSE ;
Number: IntegerLiteral | FloatLiteral | DoubleLiteral | BinLiteral | HexLiteral | OctLiteral | LongLiteral ;
EscapeSeq: UniCharacterLiteral | EscapedIdentifier;
UniCharacterLiteral: '\\' 'u' HexDigit HexDigit HexDigit HexDigit ;
EscapedIdentifier: '\\' ('t' | 'b' | 'r' | 'n' | '\'' | '"' | '\\' | '$') ;
HexDigit: [0-9a-fA-F] ;
BinDigit: [01] ;
OctDigit: [0-7];
DecDigit: [0-9];
DecDigitNoZero: [1-9];

ID: [a-z] ([a-zA-Z_] | [0-9])*;
TYPE: [A-Z] ([a-zA-Z] | UNDERSCORE | [0-9])* ;

DATATYPE: Number | String | Character | Booleans ;

这是我的解析器:

grammar File;

import Symbols;

file: importHeader* topLevelDeclaration* ;

importHeader
    : IMPORT TYPE (DOT MUL)?
    ;

topLevelDeclaration
    : classDeclaration
    | functionDeclaration
    | enumDeclaration
    ;

block
    : LCURL statement* RCURL
    ;

statement
    : ifStatement
    | forStatement
    | whileStatement
    | dowhileStatement
    | switchStatement
    | thisStatement
    | throwStatement
    | tryStatement
    | returnStatement
    | anObjectCalls
    | functionCall
    | indexOfArrayAssignmnet
    | lambdaFunction
    | varDeclaration
    | varAssignment
    | classDeclaration
    | functionDeclaration
    | enumDeclaration
    | arrayAssignment
    ;

expr
    : ID
    | DATATYPE
    | NULL
    | INCR expr
    | DECR expr
    | expr INCR
    | expr DECR
    | expr AND expr
    | expr OR expr
    | NOT expr
    | expr MUL expr
    | expr DIV expr
    | expr MOD expr
    | expr ADD expr
    | expr SUB expr
    | expr RANGLE expr
    | expr GE expr
    | expr LANGLE expr
    | expr LE expr
    | expr EQEQ expr
    | expr EXCL_EQ expr
    | functionCall
    | ifExpr
    | thisStatement
    | anObjectCalls
    | objectInstantation
    | indexOfArray
    | lambdaExpression
    ;

lambdaExpression
    : LPAREN formalParameters RPAREN (COLON TYPE | (COLON VOID)?) ARROW (block | statement)
    ;

lambdaFunction
    : LAMBDA ID ASSIGNMENT lambdaExpression
    ;

indexOfArray
    : ID LSQUARE expr RSQUARE
    ;

indexOfArrayAssignmnet
    : indexOfArray (ASSIGNMENT
    | ADD_ASSIGNMENT
    | SUB_ASSIGNMENT
    | MUL_ASSIGNMENT
    | DIV_ASSIGNMENT
    | MOD_ASSIGNMENT) expr
    ;

ifExpr
    : IF LPAREN expr RPAREN expr ELSE expr
    ;

varDeclaration
    : TYPE ID
    ;

varAssignment
    : ID (ASSIGNMENT
    | ADD_ASSIGNMENT
    | SUB_ASSIGNMENT
    | MUL_ASSIGNMENT
    | DIV_ASSIGNMENT
    | MOD_ASSIGNMENT) expr
    ;

arrayAssignment
    : ID ASSIGNMENT LSQUARE expr RSQUARE OF TYPE
    ;

visibilityModifier
    : PUBLIC
    | PRIVATE
    | PROTECTED
    ;

attributeMethodModifier
    : FINAL
    | STATIC
    ;

classModifier
    : ABSTRACT
    | FINAL
    | PUBLIC
    | PRIVATE
    ;

functionDeclaration
    : parametricTypes? ID LPAREN (formalParameters | VOID?) RPAREN COLON (TYPE | VOID) throwsStatement? block
    ;

throwsStatement
    : QUEST TYPE (COMMA TYPE)*
    ;

parametricTypes
    : LANGLE parametricType (COMMA parametricType)*  RANGLE
    ;

parametricType
    : TYPE | QUEST (EXTENDS TYPE (COMMA TYPE)* | SUPER TYPE (COMMA TYPE))
    ;

formalParameters
    : formalParameter (COMMA formalParameter)*
    ;

formalParameter
    : TYPE ID (ASSIGNMENT expr)?
    | TYPE ID LSQUARE RSQUARE
    ;

functionCall
    : ID LPAREN (currentParameters | VOID?) RPAREN
    ;

methodCall
    : ID LPAREN (currentParameters | VOID?) RPAREN
    ;

currentParameters
    : currentParameter (COMMA currentParameter)*
    ;

currentParameter
    : expr
    ;

classDeclaration
    : classModifier* CLASS TYPE parametricTypes? (EXTENDS TYPE (COMMA TYPE)*)? classBody
    ;

classBody
    : LCURL classMemberDeclaration* RCURL
    ;

classMemberDeclaration
    : classDeclaration
    | methodDeclaration
    | constructorDeclaration
    | attributeDeclaration
    ;

attributeDeclaration
    : visibilityModifier attributeDeclaration* ID (ASSIGNMENT expr)?
    ;

methodDeclaration
    : visibilityModifier attributeMethodModifier* functionDeclaration
    ;

constructorDeclaration
    : visibilityModifier CONSTRUCTOR LPAREN formalParameters RPAREN constructorBody
    ;

constructorBody
    : LCURL (SUPER LPAREN currentParameters RPAREN)? (THIS LPAREN currentParameters RPAREN)? statement*
    ;

enumDeclaration
    : ENUM TYPE enumBody
    ;

enumBody
    : LCURL ((ID | TYPE) (COMMA (ID | TYPE))*)? RCURL
    ;

ifStatement
    : IF LPAREN expr RPAREN (block | statement)
        ((ELIF LPAREN expr RPAREN (block | statement))* | (ELSE (block | statement))? )
    ;

whileStatement
    : WHILE LPAREN expr RPAREN (block | statement)
    ;

forStatement
    :  FOR LPAREN initializationfield COLON expr COLON updatefield RPAREN (block | statement)
    | FOR LPAREN TYPE ID COLON (expr | ID) RPAREN (block | statement)
    ;

initializationfield
    : ((TYPE ID | TYPE varAssignment | varAssignment) (COMMA (TYPE ID | TYPE varAssignment | varAssignment))*)
    ;

updatefield
    : varAssignment (COMMA varAssignment)*
    ;

dowhileStatement
    : DO (block | statement) WHILE LPAREN expr RPAREN
    ;

switchStatement
    : SWITCH LPAREN expr RPAREN LCURL defaultstatement? casestatement* finallystatement? RCURL
    ;

defaultstatement
    : DEFAULT ARROW block
    ;

finallystatement
    : FINALLY ARROW block
    ;

casestatement
    : CASE expr (COMMA expr)* ARROW block
    ;

throwStatement
    : THROW objectInstantation
    ;

tryStatement
    : TRY block (CATCH LPAREN formalParameters RPAREN block)+
    ;

thisStatement
    : THIS DOT ((ID | methodCall) | varAssignment)
    | THIS
    ;

objectInstantation
    : NEW TYPE LPAREN (currentParameters | VOID?) RPAREN
    ;

returnStatement
    : RETURN expr
    ;

anObjectCalls
    : ID DOT methodCall (DOT methodCall)*
    | objectInstantation DOT methodCall (DOT methodCall)*
    ;

1 个答案:

答案 0 :(得分:1)

当您收到诸如“意外输入'foo',预期的BAR”之类的错误并且您认为“但是'foo' BAR”时,您应该做的第一件事是打印令牌输入流(您可以通过运行grun Symbols tokens -tokens inputfile来实现)。如果这样做,您将看到输入中的a被识别为HexDigit,而不是ID

为什么会这样?因为HexDigitID都与输入a匹配,并且ANTLR(像大多数词法生成器一样)根据最大munch规则来解决歧义:当多个规则可以匹配当前输入时,它将选择产生最长匹配项的(这就是为什么使用多个字母的变量起作用的原因),然后通过选择首先定义的匹配项(在这种情况下为HexDigit)来解决联系。

请注意,词法分析器并不关心解析器使用什么词法分析器规则以及何时使用。词法分析器仅根据词法分析器语法的内容来决定要生成哪些令牌,因此词法分析器现在不知道或不在乎解析器是否需要ID。它会查看所有匹配的规则,然后根据最大的munch规则选择一个。

在您的情况下,您实际上从未在解析器语法中使用HexDigit,因此没有理由想要创建HexDigit令牌。因此,HexDigit不应是词法分析器规则,而应是fragment

fragment HexDigit : [0-9a-fA-F];

这也适用于解析器中未使用的其他规则,包括所有...Digit规则。

PS:由于这些相同的规则,您的Number规则将永远不会匹配。相反,它可能应该是解析器规则(或者,其他数字规则应该是片段,如果您不在乎您拥有哪种数字文字)。