ANTLR无限递归而没有警告,尝试实现宏解析语法

时间:2018-09-19 11:12:20

标签: java antlr4

编译此语法时,Antlr将显示零警告,但是当使用输入文件调用该语法时,它将在org.antlr.v4.runtime.atn.LexerATNSimulator.closure中永久递归。知道如何创建必须在词法分析器上使用除特殊字符以外的任何字符的宏解析语法吗?

词法分析器:

lexer grammar e8pp_lexer;

NEWLINE: '\r' '\n' | '\n' | '\r';
CONTENT: NEWLINE CONTENTF+;
MACRODEF : NEWLINE 'MACRO' -> pushMode(MACRO_DEF);
MACROUSE : NEWLINE '`' -> mode (MACRO_USE); 
fragment CONTENTF: .* ;

mode MACRO_DEF;

MACRONAME: LETTERS+ ;
MACROPARAMSTART: '[' ;
MACROPARAM: LETTERS+ ;
MACROSEP: ',';
MACROPARAMEND : ']' ;
MACRODEF_DEF : ':' -> pushMode(MACRO_MODE);
WHITESPACE: WS+ -> skip;

fragment WS: [ \t\r\n\u000C];
fragment LETTERS : [a-zA-Z];

mode MACRO_MODE;

NEWLINE_CONTENT: '\r' '\n' | '\n' | '\r';
ENDMACRO : NEWLINE_CONTENT 'ENDMACRO' -> popMode;
MACRO_CONTENT : NEWLINE_CONTENT MACRO_CONTENTF+;
fragment MACRO_CONTENTF: .* ;

mode MACRO_USE;

MU_ID: LETTERS+ ;
MU_PARAMSTART: '(';
MU_PARAM: LETTERS+;
MU_PARAMSEP : ',';
MU_PARAM_END: ')' -> popMode;

fragment MU_WS: [ \t\r\n\u000C];
fragment MU_LETTERS : [a-zA-Z];

解析器

grammar e8pp_parser;

options { tokenVocab=e8pp_lexer; }

content:
    contentElement+ EOF
    ;

contentElement:
    data | macro
    ;

data:
    contentData | macroUse
    ;

contentData:
    CONTENT
    ;

macroUse:
    MACROUSE macroId MU_PARAMSTART macroUseParams? MU_PARAM_END
    ;

macroId:
    MU_ID
    ;

macroUseParams:
    macroUseParam (MU_PARAMSEP macroUseParam)*
    ;

macroUseParam:
    MU_PARAM
    ;


macro:
    MACRODEF macroName 
        MACROPARAMSTART macroParams? MACROPARAMEND MACRODEF_DEF
    macroBody?
    NEWLINE_CONTENT ENDMACRO
    ;

macroName:
    MACRONAME
    ;

macroParams:
    macroParam (MACROSEP macroParam)*
    ;

macroParam:
    MACROPARAM
    ;

macroBody:
    MACRO_CONTENT+
    ; 

这个想法是,这将解析带有宏的特殊块,同时将其他所有内容作为数据返回。 Java用法:

private ByteArrayInputStream parseMacros() throws Exception {
    ByteArrayOutputStream bos = new ByteArrayOutputStream();

    e8pp_lexer lexer = new e8pp_lexer(new ANTLRFileStream(srcFile.getAbsolutePath(), "utf-8"));
    CommonTokenStream tokens = new CommonTokenStream(lexer);
    e8pp_parserParser parser = new e8pp_parserParser(tokens);

    ContentContext cc = parser.content();

    Map<String, MacroDef> macros = new HashMap<String, MacroDef>();

    for (ContentElementContext ctx : cc.contentElement()) {
        parseMacroElement(ctx, bos, macros);
    }

    return new ByteArrayInputStream(bos.toByteArray());
}

private void parseMacroElement(ContentElementContext ctx, ByteArrayOutputStream bos, Map<String, MacroDef> macros) throws Exception {
    if (ctx.data() != null) {
        if (ctx.data().contentData() != null) {
            bos.write(ctx.data().contentData().getText().getBytes(Charset.forName("UTF-8")));
        } else {
            resolveMacro(ctx.data().macroUse(), bos, macros);
        }
    } else {
        defineMacro(ctx.macro(), macros);
    }
}

private void defineMacro(MacroContext macro, Map<String, MacroDef> macros) {
    String name = macro.macroName().getText();
    if (macros.containsKey(name)) {
        throw new AssemblyException("Duplicate macro name " + name);
    }

    MacroDef md = new MacroDef();
    md.content = macro.macroBody().getText();
    md.argNames = parseMacroArgs(macro.macroParams());

}

private List<String> parseMacroArgs(MacroParamsContext ctx) {
    List<String> params = new ArrayList<String>();

    for (MacroParamContext p : ctx.macroParam()) {
        params.add(p.getText());
    }

    return params;
}

private void resolveMacro(MacroUseContext ctx, ByteArrayOutputStream bos, Map<String, MacroDef> macros) throws Exception {
    String macroName = ctx.macroId().getText();
    if (!macros.containsKey(macroName)) {
        throw new AssemblyException("Unknown macro " + macroName);
    }

    MacroDef md = macros.get(macroName);
    List<String> values = parseMacroParams(ctx.macroUseParams());

    if (values.size() != md.argNames.size()) {
        throw new AssemblyException("Bad macro param count for macro " + macroName);
    }

    String replaced = md.content;
    for (int i=0; i<md.argNames.size(); i++) {
        String paramName = md.argNames.get(i);
        String replaceValue = values.get(i);
        replaced = replaced.replaceAll(Pattern.quote(paramName), replaceValue);
    }

    bos.write(replaced.getBytes(Charset.forName("UTF-8")));
}

private List<String> parseMacroParams(MacroUseParamsContext ctx) {
    List<String> params = new ArrayList<String>();

    for (MacroUseParamContext p : ctx.macroUseParam()) {
        params.add(p.getText());
    }

    return params;
}

0 个答案:

没有答案