Antlr语法不会匹配任何内容,lexer将整个输入作为标记返回

时间:2015-08-05 16:11:37

标签: java python antlr antlr4

我正在尝试在antlr中实现python格式:

 grammar formatter;

 source_stream
 : segments EOF
 ;

 segments
 : segment*
 ;

 segment
 : (text | replacement_field)
 ;

 text
 : TEXT
 ;

 replacement_field
 : '{' field_name? ('!' conversion)? (':' format_spec)? '}'
 ;

 field_name 
 : arg_name (('.'attribute_name) | '[' element_index ']')*
 ;

 arg_name
 : (identifier | integer)?
 ;

 attribute_name
 : identifier
 ;

element_index
: integer | index_string
;

index_string
: string
;

identifier
: NAME
;

conversion
: 's'
;

format_spec
: (fill? align)? sign? hash? zero? width? dash? precision? type?
;

fill
: FILL_ANY_CHAR
;

align
: '<' | '>' | '=' | '^'
;

sign
: '+' | '-' | ' '
;

hash
: '#'
;

zero
: '0'
;

width
: integer
;

precision
: integer
;

type
: 'b' | 'c' | 'd' | 'e' | 'E' | 'f' | 'g' | 'G' | 'n' | 'o' | 's' | 'x' | 'X' | '%'
;

dash
: ','
;

string
: STRING
;

STRING
: STRING_NO_BRACKET+
;

STRING_NO_BRACKET
 : [\u0000-\u005C]
 | [\u005E-\u007F]
 ;

TEXT
: ANY
;

ANY
: .
;

integer
 : DECIMAL_INTEGER
 | OCT_INTEGER
 | HEX_INTEGER
 | BIN_INTEGER
 ;

 DECIMAL_INTEGER
 : NON_ZERO_DIGIT DIGIT*
 | '0'+
 ;

OCT_INTEGER
 : '0' [oO] OCT_DIGIT+
 ;

HEX_INTEGER
 : '0' [xX] HEX_DIGIT+
 ;

BIN_INTEGER
 : '0' [bB]? BIN_DIGIT+
 ;

FILL_ANY_CHAR
: FILL
;

fragment FILL
: [\u0000-\u007F]
;

NAME
 : ID_START ID_CONTINUE*
 ;

 fragment NON_ZERO_DIGIT
 : [1-9]
 ;

fragment DIGIT
 : [0-9]
 ;

fragment OCT_DIGIT
 : [0-7]
 ;

fragment HEX_DIGIT
 : [0-9a-fA-F]
 ;

fragment BIN_DIGIT
 : [01]
 ;

fragment ID_START
 : '_'
 | [A-Z]
 | [a-z]
 | '\u00AA'
 | '\u00B5'
 | '\u00BA'
 | [\u00C0-\u00D6]
 | [\u00D8-\u00F6]
 | [\u00F8-\u01BA]
 | '\u01BB'
 | [\u01BC-\u01BF]
 | [\u01C0-\u01C3]
 | [\u01C4-\u0241]
 | [\u0250-\u02AF]
 | [\u02B0-\u02C1]
 | [\u02C6-\u02D1]
 | [\u02E0-\u02E4]
 | '\u02EE'
 | '\u037A'
 | '\u0386'
 | [\u0388-\u038A]
 | '\u038C'
 | [\u038E-\u03A1]
 | [\u03A3-\u03CE]
 | [\u03D0-\u03F5]
 | [\u03F7-\u0481]
 | [\u048A-\u04CE]
 | [\u04D0-\u04F9]
 | [\u0500-\u050F]
 | [\u0531-\u0556]
 | '\u0559'
 | [\u0561-\u0587]
 | [\u05D0-\u05EA]
 | [\u05F0-\u05F2]
 | [\u0621-\u063A]
 | '\u0640'
 | [\u0641-\u064A]
 | [\u066E-\u066F]
 | [\u0671-\u06D3]
 | '\u06D5'
 | [\u06E5-\u06E6]
 | [\u06EE-\u06EF]
 | [\u06FA-\u06FC]
 | '\u06FF'
 | '\u0710'
 | [\u0712-\u072F]
 | [\u074D-\u076D]
 | [\u0780-\u07A5]
 | '\u07B1'
 | [\u0904-\u0939]
 | '\u093D'
 | '\u0950'
 | [\u0958-\u0961]
 | '\u097D'
 | [\u0985-\u098C]
 | [\u098F-\u0990]
 | [\u0993-\u09A8]
 | [\u09AA-\u09B0]
 | '\u09B2'
 | [\u09B6-\u09B9]
 | '\u09BD'
 | '\u09CE'
 | [\u09DC-\u09DD]
 | [\u09DF-\u09E1]
 | [\u09F0-\u09F1]
 | [\u0A05-\u0A0A]
 | [\u0A0F-\u0A10]
 | [\u0A13-\u0A28]
 | [\u0A2A-\u0A30]
 | [\u0A32-\u0A33]
 | [\u0A35-\u0A36]
 | [\u0A38-\u0A39]
 | [\u0A59-\u0A5C]
 | '\u0A5E'
 | [\u0A72-\u0A74]
 | [\u0A85-\u0A8D]
 | [\u0A8F-\u0A91]
 | [\u0A93-\u0AA8]
 | [\u0AAA-\u0AB0]
 | [\u0AB2-\u0AB3]
 | [\u0AB5-\u0AB9]
 | '\u0ABD'
 | '\u0AD0'
 | [\u0AE0-\u0AE1]
 | [\u0B05-\u0B0C]
 | [\u0B0F-\u0B10]
 | [\u0B13-\u0B28]
 | [\u0B2A-\u0B30]
 | [\u0B32-\u0B33]
 | [\u0B35-\u0B39]
 | '\u0B3D'
 | [\u0B5C-\u0B5D]
 | [\u0B5F-\u0B61]
 | '\u0B71'
 | '\u0B83'
 | [\u0B85-\u0B8A]
 | [\u0B8E-\u0B90]
 | [\u0B92-\u0B95]
 | [\u0B99-\u0B9A]
 | '\u0B9C'
 | [\u0B9E-\u0B9F]
 | [\u0BA3-\u0BA4]
 | [\u0BA8-\u0BAA]
 | [\u0BAE-\u0BB9]
 | [\u0C05-\u0C0C]
 | [\u0C0E-\u0C10]
 | [\u0C12-\u0C28]
 | [\u0C2A-\u0C33]
 | [\u0C35-\u0C39]
 | [\u0C60-\u0C61]
 | [\u0C85-\u0C8C]
 | [\u0C8E-\u0C90]
 | [\u0C92-\u0CA8]
 | [\u0CAA-\u0CB3]
 | [\u0CB5-\u0CB9]
 | '\u0CBD'
 | '\u0CDE'
 | [\u0CE0-\u0CE1]
 | [\u0D05-\u0D0C]
 | [\u0D0E-\u0D10]
 | [\u0D12-\u0D28]
 | [\u0D2A-\u0D39]
 | [\u0D60-\u0D61]
 | [\u0D85-\u0D96]
 | [\u0D9A-\u0DB1]
 | [\u0DB3-\u0DBB]
 | '\u0DBD'
 | [\u0DC0-\u0DC6]
 | [\u0E01-\u0E30]
 | [\u0E32-\u0E33]
 | [\u0E40-\u0E45]
 | '\u0E46'
 | [\u0E81-\u0E82]
 | '\u0E84'
 | [\u0E87-\u0E88]
 | '\u0E8A'
 | '\u0E8D'
 | [\u0E94-\u0E97]
 | [\u0E99-\u0E9F]
 | [\u0EA1-\u0EA3]
 | '\u0EA5'
 | '\u0EA7'
 | [\u0EAA-\u0EAB]
 | [\u0EAD-\u0EB0]
 | [\u0EB2-\u0EB3]
 | '\u0EBD'
 | [\u0EC0-\u0EC4]
 | '\u0EC6'
 | [\u0EDC-\u0EDD]
 | '\u0F00'
 | [\u0F40-\u0F47]
 | [\u0F49-\u0F6A]
 | [\u0F88-\u0F8B]
 | [\u1000-\u1021]
 | [\u1023-\u1027]
 | [\u1029-\u102A]
 | [\u1050-\u1055]
 | [\u10A0-\u10C5]
 | [\u10D0-\u10FA]
 | '\u10FC'
 | [\u1100-\u1159]
 | [\u115F-\u11A2]
 | [\u11A8-\u11F9]
 | [\u1200-\u1248]
 | [\u124A-\u124D]
 | [\u1250-\u1256]
 | '\u1258'
 | [\u125A-\u125D]
 | [\u1260-\u1288]
 | [\u128A-\u128D]
 | [\u1290-\u12B0]
 | [\u12B2-\u12B5]
 | [\u12B8-\u12BE]
 | '\u12C0'
 | [\u12C2-\u12C5]
 | [\u12C8-\u12D6]
 | [\u12D8-\u1310]
 | [\u1312-\u1315]
 | [\u1318-\u135A]
 | [\u1380-\u138F]
 | [\u13A0-\u13F4]
 | [\u1401-\u166C]
 | [\u166F-\u1676]
 | [\u1681-\u169A]
 | [\u16A0-\u16EA]
 | [\u16EE-\u16F0]
 | [\u1700-\u170C]
 | [\u170E-\u1711]
 | [\u1720-\u1731]
 | [\u1740-\u1751]
 | [\u1760-\u176C]
 | [\u176E-\u1770]
 | [\u1780-\u17B3]
 | '\u17D7'
 | '\u17DC'
 | [\u1820-\u1842]
 | '\u1843'
 | [\u1844-\u1877]
 | [\u1880-\u18A8]
 | [\u1900-\u191C]
 | [\u1950-\u196D]
 | [\u1970-\u1974]
 | [\u1980-\u19A9]
 | [\u19C1-\u19C7]
 | [\u1A00-\u1A16]
 | [\u1D00-\u1D2B]
 | [\u1D2C-\u1D61]
 | [\u1D62-\u1D77]
 | '\u1D78'
 | [\u1D79-\u1D9A]
 | [\u1D9B-\u1DBF]
 | [\u1E00-\u1E9B]
 | [\u1EA0-\u1EF9]
 | [\u1F00-\u1F15]
 | [\u1F18-\u1F1D]
 | [\u1F20-\u1F45]
 | [\u1F48-\u1F4D]
 | [\u1F50-\u1F57]
 | '\u1F59'
 | '\u1F5B'
 | '\u1F5D'
 | [\u1F5F-\u1F7D]
 | [\u1F80-\u1FB4]
 | [\u1FB6-\u1FBC]
 | '\u1FBE'
 | [\u1FC2-\u1FC4]
 | [\u1FC6-\u1FCC]
 | [\u1FD0-\u1FD3]
 | [\u1FD6-\u1FDB]
 | [\u1FE0-\u1FEC]
 | [\u1FF2-\u1FF4]
 | [\u1FF6-\u1FFC]
 | '\u2071'
 | '\u207F'
 | [\u2090-\u2094]
 | '\u2102'
 | '\u2107'
 | [\u210A-\u2113]
 | '\u2115'
 | '\u2118'
 | [\u2119-\u211D]
 | '\u2124'
 | '\u2126'
 | '\u2128'
 | [\u212A-\u212D]
 | '\u212E'
 | [\u212F-\u2131]
 | [\u2133-\u2134]
 | [\u2135-\u2138]
 | '\u2139'
 | [\u213C-\u213F]
 | [\u2145-\u2149]
 | [\u2160-\u2183]
 | [\u2C00-\u2C2E]
 | [\u2C30-\u2C5E]
 | [\u2C80-\u2CE4]
 | [\u2D00-\u2D25]
 | [\u2D30-\u2D65]
 | '\u2D6F'
 | [\u2D80-\u2D96]
 | [\u2DA0-\u2DA6]
 | [\u2DA8-\u2DAE]
 | [\u2DB0-\u2DB6]
 | [\u2DB8-\u2DBE]
 | [\u2DC0-\u2DC6]
 | [\u2DC8-\u2DCE]
 | [\u2DD0-\u2DD6]
 | [\u2DD8-\u2DDE]
 | '\u3005'
 | '\u3006'
 | '\u3007'
 | [\u3021-\u3029]
 | [\u3031-\u3035]
 | [\u3038-\u303A]
 | '\u303B'
 | '\u303C'
 | [\u3041-\u3096]
 | [\u309B-\u309C]
 | [\u309D-\u309E]
 | '\u309F'
 | [\u30A1-\u30FA]
 | [\u30FC-\u30FE]
 | '\u30FF'
 | [\u3105-\u312C]
 | [\u3131-\u318E]
 | [\u31A0-\u31B7]
 | [\u31F0-\u31FF]
 | [\u3400-\u4DB5]
 | [\u4E00-\u9FBB]
 | [\uA000-\uA014]
 | '\uA015'
 | [\uA016-\uA48C]
 | [\uA800-\uA801]
 | [\uA803-\uA805]
 | [\uA807-\uA80A]
 | [\uA80C-\uA822]
 | [\uAC00-\uD7A3]
 | [\uF900-\uFA2D]
 | [\uFA30-\uFA6A]
 | [\uFA70-\uFAD9]
 | [\uFB00-\uFB06]
 | [\uFB13-\uFB17]
 | '\uFB1D'
 | [\uFB1F-\uFB28]
 | [\uFB2A-\uFB36]
 | [\uFB38-\uFB3C]
 | '\uFB3E'
 | [\uFB40-\uFB41]
 | [\uFB43-\uFB44]
 | [\uFB46-\uFBB1]
 | [\uFBD3-\uFD3D]
 | [\uFD50-\uFD8F]
 | [\uFD92-\uFDC7]
 | [\uFDF0-\uFDFB]
 | [\uFE70-\uFE74]
 | [\uFE76-\uFEFC]
 | [\uFF21-\uFF3A]
 | [\uFF41-\uFF5A]
 | [\uFF66-\uFF6F]
 | '\uFF70'
 | [\uFF71-\uFF9D]
 | [\uFF9E-\uFF9F]
 | [\uFFA0-\uFFBE]
 | [\uFFC2-\uFFC7]
 | [\uFFCA-\uFFCF]
 | [\uFFD2-\uFFD7]
 | [\uFFDA-\uFFDC]
 ;

fragment ID_CONTINUE
 : ID_START
 | [0-9]
 | [\u0300-\u036F]
 | [\u0483-\u0486]
 | [\u0591-\u05B9]
 | [\u05BB-\u05BD]
 | '\u05BF'
 | [\u05C1-\u05C2]
 | [\u05C4-\u05C5]
 | '\u05C7'
 | [\u0610-\u0615]
 | [\u064B-\u065E]
 | [\u0660-\u0669]
 | '\u0670'
 | [\u06D6-\u06DC]
 | [\u06DF-\u06E4]
 | [\u06E7-\u06E8]
 | [\u06EA-\u06ED]
 | [\u06F0-\u06F9]
 | '\u0711'
 | [\u0730-\u074A]
 | [\u07A6-\u07B0]
 | [\u0901-\u0902]
 | '\u0903'
 | '\u093C'
 | [\u093E-\u0940]
 | [\u0941-\u0948]
 | [\u0949-\u094C]
 | '\u094D'
 | [\u0951-\u0954]
 | [\u0962-\u0963]
 | [\u0966-\u096F]
 | '\u0981'
 | [\u0982-\u0983]
 | '\u09BC'
 | [\u09BE-\u09C0]
 | [\u09C1-\u09C4]
 | [\u09C7-\u09C8]
 | [\u09CB-\u09CC]
 | '\u09CD'
 | '\u09D7'
 | [\u09E2-\u09E3]
 | [\u09E6-\u09EF]
 | [\u0A01-\u0A02]
 | '\u0A03'
 | '\u0A3C'
 | [\u0A3E-\u0A40]
 | [\u0A41-\u0A42]
 | [\u0A47-\u0A48]
 | [\u0A4B-\u0A4D]
 | [\u0A66-\u0A6F]
 | [\u0A70-\u0A71]
 | [\u0A81-\u0A82]
 | '\u0A83'
 | '\u0ABC'
 | [\u0ABE-\u0AC0]
 | [\u0AC1-\u0AC5]
 | [\u0AC7-\u0AC8]
 | '\u0AC9'
 | [\u0ACB-\u0ACC]
 | '\u0ACD'
 | [\u0AE2-\u0AE3]
 | [\u0AE6-\u0AEF]
 | '\u0B01'
 | [\u0B02-\u0B03]
 | '\u0B3C'
 | '\u0B3E'
 | '\u0B3F'
 | '\u0B40'
 | [\u0B41-\u0B43]
 | [\u0B47-\u0B48]
 | [\u0B4B-\u0B4C]
 | '\u0B4D'
 | '\u0B56'
 | '\u0B57'
 | [\u0B66-\u0B6F]
 | '\u0B82'
 | [\u0BBE-\u0BBF]
 | '\u0BC0'
 | [\u0BC1-\u0BC2]
 | [\u0BC6-\u0BC8]
 | [\u0BCA-\u0BCC]
 | '\u0BCD'
 | '\u0BD7'
 | [\u0BE6-\u0BEF]
 | [\u0C01-\u0C03]
 | [\u0C3E-\u0C40]
 | [\u0C41-\u0C44]
 | [\u0C46-\u0C48]
 | [\u0C4A-\u0C4D]
 | [\u0C55-\u0C56]
 | [\u0C66-\u0C6F]
 | [\u0C82-\u0C83]
 | '\u0CBC'
 | '\u0CBE'
 | '\u0CBF'
 | [\u0CC0-\u0CC4]
 | '\u0CC6'
 | [\u0CC7-\u0CC8]
 | [\u0CCA-\u0CCB]
 | [\u0CCC-\u0CCD]
 | [\u0CD5-\u0CD6]
 | [\u0CE6-\u0CEF]
 | [\u0D02-\u0D03]
 | [\u0D3E-\u0D40]
 | [\u0D41-\u0D43]
 | [\u0D46-\u0D48]
 | [\u0D4A-\u0D4C]
 | '\u0D4D'
 | '\u0D57'
 | [\u0D66-\u0D6F]
 | [\u0D82-\u0D83]
 | '\u0DCA'
 | [\u0DCF-\u0DD1]
 | [\u0DD2-\u0DD4]
 | '\u0DD6'
 | [\u0DD8-\u0DDF]
 | [\u0DF2-\u0DF3]
 | '\u0E31'
 | [\u0E34-\u0E3A]
 | [\u0E47-\u0E4E]
 | [\u0E50-\u0E59]
 | '\u0EB1'
 | [\u0EB4-\u0EB9]
 | [\u0EBB-\u0EBC]
 | [\u0EC8-\u0ECD]
 | [\u0ED0-\u0ED9]
 | [\u0F18-\u0F19]
 | [\u0F20-\u0F29]
 | '\u0F35'
 | '\u0F37'
 | '\u0F39'
 | [\u0F3E-\u0F3F]
 | [\u0F71-\u0F7E]
 | '\u0F7F'
 | [\u0F80-\u0F84]
 | [\u0F86-\u0F87]
 | [\u0F90-\u0F97]
 | [\u0F99-\u0FBC]
 | '\u0FC6'
 | '\u102C'
 | [\u102D-\u1030]
 | '\u1031'
 | '\u1032'
 | [\u1036-\u1037]
 | '\u1038'
 | '\u1039'
 | [\u1040-\u1049]
 | [\u1056-\u1057]
 | [\u1058-\u1059]
 | '\u135F'
 | [\u1369-\u1371]
 | [\u1712-\u1714]
 | [\u1732-\u1734]
 | [\u1752-\u1753]
 | [\u1772-\u1773]
 | '\u17B6'
 | [\u17B7-\u17BD]
 | [\u17BE-\u17C5]
 | '\u17C6'
 | [\u17C7-\u17C8]
 | [\u17C9-\u17D3]
 | '\u17DD'
 | [\u17E0-\u17E9]
 | [\u180B-\u180D]
 | [\u1810-\u1819]
 | '\u18A9'
 | [\u1920-\u1922]
 | [\u1923-\u1926]
 | [\u1927-\u1928]
 | [\u1929-\u192B]
 | [\u1930-\u1931]
 | '\u1932'
 | [\u1933-\u1938]
 | [\u1939-\u193B]
 | [\u1946-\u194F]
 | [\u19B0-\u19C0]
 | [\u19C8-\u19C9]
 | [\u19D0-\u19D9]
 | [\u1A17-\u1A18]
 | [\u1A19-\u1A1B]
 | [\u1DC0-\u1DC3]
 | [\u203F-\u2040]
 | '\u2054'
 | [\u20D0-\u20DC]
 | '\u20E1'
 | [\u20E5-\u20EB]
 | [\u302A-\u302F]
 | [\u3099-\u309A]
 | '\uA802'
 | '\uA806'
 | '\uA80B'
 | [\uA823-\uA824]
 | [\uA825-\uA826]
 | '\uA827'
 | '\uFB1E'
 | [\uFE00-\uFE0F]
 | [\uFE20-\uFE23]
 | [\uFE33-\uFE34]
 | [\uFE4D-\uFE4F]
 | [\uFF10-\uFF19]
 | '\uFF3F'
 ;

然而,无论我给它什么输入,它都永远不会匹配任何东西。

输入&#34;测试&#34;:

 line 1:0 extraneous input 'test' expecting {<EOF>, '{', TEXT}

输入&#34; {}&#34;:

 line 1:0 extraneous input '{}' expecting {<EOF>, '{', TEXT}

输入&#34; {test}&#34;:

 line 1:0 extraneous input '{test}' expecting {<EOF>, '{', TEXT}

我是否会错过一些基本的东西?由于某些原因,无论输入是什么,词法分析器只是将其作为整个令牌返回而不是将其复原。在示例2和3中,&#39; {&#39;本应该符合规则,但没有。

详细错误:

enter   source_stream, LT(1)={test}
enter   segments, LT(1)={test}
line 1:0 extraneous input '{test}' expecting {<EOF>, '{', TEXT}
exit    segments, LT(1)=<EOF>
consume [@1,6:5='<EOF>',<-1>,1:6] rule source_stream
exit    source_stream, LT(1)=<EOF>

1 个答案:

答案 0 :(得分:0)

好吧,经过仔细的思考和审议,我发现,我想要的是antlr的lexer + parser语法是不可能的,因为所有内容都会在一个大范围的词法规则中匹配。解决方案:将词法分析器和解析器拆分为两个语法并使用词法分析器模式进行正确的解析:

Lexer语法:

lexer grammar formatterLexer;

OPEN_RF: '{' -> pushMode(RuleMode);
CHAR_NOCURLY
: ~('{' | '}')
;

mode StringMode;
CHAR_NORIGHTB
: ~(']')
;
RIX: ']' -> popMode;

mode RuleMode;

EXLM: '!' ;
COLON: ':' ;
DOT: '.' ;
CLOSE_RF: '}' -> popMode;
LIX: '[' -> pushMode(StringMode);
LS: 's';
LT: '<';
MT: '>';
EQ: '=';
EXP: '^';
PLUS: '+';
MINUS: '-';
SPACE: ' ';
HASH: '#';
ZERO: '0';
TYPE: 'b' | 'c' | 'd' | 'e' | 'E' | 'f' | 'g' | 'G' | 'n' | 'o' | 's' | 'x' | 'X' | '%';
COMMA: ',';
CHAR: ~('{' | '}') ;

INTEGER
 : DECIMAL_INTEGER
 | OCT_INTEGER
 | HEX_INTEGER
 | BIN_INTEGER
 ;

 DECIMAL_INTEGER
 : NON_ZERO_DIGIT DIGIT*
 | '0'+
 ;

OCT_INTEGER
 : '0' [oO] OCT_DIGIT+
 ;

HEX_INTEGER
 : '0' [xX] HEX_DIGIT+
 ;

BIN_INTEGER
 : '0' [bB]? BIN_DIGIT+
 ;

NAME
 : ID_START ID_CONTINUE*
 ;

 fragment NON_ZERO_DIGIT
 : [1-9]
 ;

fragment DIGIT
 : [0-9]
 ;

fragment OCT_DIGIT
 : [0-7]
 ;

fragment HEX_DIGIT
 : [0-9a-fA-F]
 ;

fragment BIN_DIGIT
 : [01]
 ;

fragment ID_START
 : '_'
 | [A-Z]
 | [a-z]
 | '\u00AA'
 | '\u00B5'
 | '\u00BA'
 | [\u00C0-\u00D6]
 | [\u00D8-\u00F6]
 | [\u00F8-\u01BA]
 | '\u01BB'
 | [\u01BC-\u01BF]
 | [\u01C0-\u01C3]
 | [\u01C4-\u0241]
 | [\u0250-\u02AF]
 | [\u02B0-\u02C1]
 | [\u02C6-\u02D1]
 | [\u02E0-\u02E4]
 | '\u02EE'
 | '\u037A'
 | '\u0386'
 | [\u0388-\u038A]
 | '\u038C'
 | [\u038E-\u03A1]
 | [\u03A3-\u03CE]
 | [\u03D0-\u03F5]
 | [\u03F7-\u0481]
 | [\u048A-\u04CE]
 | [\u04D0-\u04F9]
 | [\u0500-\u050F]
 | [\u0531-\u0556]
 | '\u0559'
 | [\u0561-\u0587]
 | [\u05D0-\u05EA]
 | [\u05F0-\u05F2]
 | [\u0621-\u063A]
 | '\u0640'
 | [\u0641-\u064A]
 | [\u066E-\u066F]
 | [\u0671-\u06D3]
 | '\u06D5'
 | [\u06E5-\u06E6]
 | [\u06EE-\u06EF]
 | [\u06FA-\u06FC]
 | '\u06FF'
 | '\u0710'
 | [\u0712-\u072F]
 | [\u074D-\u076D]
 | [\u0780-\u07A5]
 | '\u07B1'
 | [\u0904-\u0939]
 | '\u093D'
 | '\u0950'
 | [\u0958-\u0961]
 | '\u097D'
 | [\u0985-\u098C]
 | [\u098F-\u0990]
 | [\u0993-\u09A8]
 | [\u09AA-\u09B0]
 | '\u09B2'
 | [\u09B6-\u09B9]
 | '\u09BD'
 | '\u09CE'
 | [\u09DC-\u09DD]
 | [\u09DF-\u09E1]
 | [\u09F0-\u09F1]
 | [\u0A05-\u0A0A]
 | [\u0A0F-\u0A10]
 | [\u0A13-\u0A28]
 | [\u0A2A-\u0A30]
 | [\u0A32-\u0A33]
 | [\u0A35-\u0A36]
 | [\u0A38-\u0A39]
 | [\u0A59-\u0A5C]
 | '\u0A5E'
 | [\u0A72-\u0A74]
 | [\u0A85-\u0A8D]
 | [\u0A8F-\u0A91]
 | [\u0A93-\u0AA8]
 | [\u0AAA-\u0AB0]
 | [\u0AB2-\u0AB3]
 | [\u0AB5-\u0AB9]
 | '\u0ABD'
 | '\u0AD0'
 | [\u0AE0-\u0AE1]
 | [\u0B05-\u0B0C]
 | [\u0B0F-\u0B10]
 | [\u0B13-\u0B28]
 | [\u0B2A-\u0B30]
 | [\u0B32-\u0B33]
 | [\u0B35-\u0B39]
 | '\u0B3D'
 | [\u0B5C-\u0B5D]
 | [\u0B5F-\u0B61]
 | '\u0B71'
 | '\u0B83'
 | [\u0B85-\u0B8A]
 | [\u0B8E-\u0B90]
 | [\u0B92-\u0B95]
 | [\u0B99-\u0B9A]
 | '\u0B9C'
 | [\u0B9E-\u0B9F]
 | [\u0BA3-\u0BA4]
 | [\u0BA8-\u0BAA]
 | [\u0BAE-\u0BB9]
 | [\u0C05-\u0C0C]
 | [\u0C0E-\u0C10]
 | [\u0C12-\u0C28]
 | [\u0C2A-\u0C33]
 | [\u0C35-\u0C39]
 | [\u0C60-\u0C61]
 | [\u0C85-\u0C8C]
 | [\u0C8E-\u0C90]
 | [\u0C92-\u0CA8]
 | [\u0CAA-\u0CB3]
 | [\u0CB5-\u0CB9]
 | '\u0CBD'
 | '\u0CDE'
 | [\u0CE0-\u0CE1]
 | [\u0D05-\u0D0C]
 | [\u0D0E-\u0D10]
 | [\u0D12-\u0D28]
 | [\u0D2A-\u0D39]
 | [\u0D60-\u0D61]
 | [\u0D85-\u0D96]
 | [\u0D9A-\u0DB1]
 | [\u0DB3-\u0DBB]
 | '\u0DBD'
 | [\u0DC0-\u0DC6]
 | [\u0E01-\u0E30]
 | [\u0E32-\u0E33]
 | [\u0E40-\u0E45]
 | '\u0E46'
 | [\u0E81-\u0E82]
 | '\u0E84'
 | [\u0E87-\u0E88]
 | '\u0E8A'
 | '\u0E8D'
 | [\u0E94-\u0E97]
 | [\u0E99-\u0E9F]
 | [\u0EA1-\u0EA3]
 | '\u0EA5'
 | '\u0EA7'
 | [\u0EAA-\u0EAB]
 | [\u0EAD-\u0EB0]
 | [\u0EB2-\u0EB3]
 | '\u0EBD'
 | [\u0EC0-\u0EC4]
 | '\u0EC6'
 | [\u0EDC-\u0EDD]
 | '\u0F00'
 | [\u0F40-\u0F47]
 | [\u0F49-\u0F6A]
 | [\u0F88-\u0F8B]
 | [\u1000-\u1021]
 | [\u1023-\u1027]
 | [\u1029-\u102A]
 | [\u1050-\u1055]
 | [\u10A0-\u10C5]
 | [\u10D0-\u10FA]
 | '\u10FC'
 | [\u1100-\u1159]
 | [\u115F-\u11A2]
 | [\u11A8-\u11F9]
 | [\u1200-\u1248]
 | [\u124A-\u124D]
 | [\u1250-\u1256]
 | '\u1258'
 | [\u125A-\u125D]
 | [\u1260-\u1288]
 | [\u128A-\u128D]
 | [\u1290-\u12B0]
 | [\u12B2-\u12B5]
 | [\u12B8-\u12BE]
 | '\u12C0'
 | [\u12C2-\u12C5]
 | [\u12C8-\u12D6]
 | [\u12D8-\u1310]
 | [\u1312-\u1315]
 | [\u1318-\u135A]
 | [\u1380-\u138F]
 | [\u13A0-\u13F4]
 | [\u1401-\u166C]
 | [\u166F-\u1676]
 | [\u1681-\u169A]
 | [\u16A0-\u16EA]
 | [\u16EE-\u16F0]
 | [\u1700-\u170C]
 | [\u170E-\u1711]
 | [\u1720-\u1731]
 | [\u1740-\u1751]
 | [\u1760-\u176C]
 | [\u176E-\u1770]
 | [\u1780-\u17B3]
 | '\u17D7'
 | '\u17DC'
 | [\u1820-\u1842]
 | '\u1843'
 | [\u1844-\u1877]
 | [\u1880-\u18A8]
 | [\u1900-\u191C]
 | [\u1950-\u196D]
 | [\u1970-\u1974]
 | [\u1980-\u19A9]
 | [\u19C1-\u19C7]
 | [\u1A00-\u1A16]
 | [\u1D00-\u1D2B]
 | [\u1D2C-\u1D61]
 | [\u1D62-\u1D77]
 | '\u1D78'
 | [\u1D79-\u1D9A]
 | [\u1D9B-\u1DBF]
 | [\u1E00-\u1E9B]
 | [\u1EA0-\u1EF9]
 | [\u1F00-\u1F15]
 | [\u1F18-\u1F1D]
 | [\u1F20-\u1F45]
 | [\u1F48-\u1F4D]
 | [\u1F50-\u1F57]
 | '\u1F59'
 | '\u1F5B'
 | '\u1F5D'
 | [\u1F5F-\u1F7D]
 | [\u1F80-\u1FB4]
 | [\u1FB6-\u1FBC]
 | '\u1FBE'
 | [\u1FC2-\u1FC4]
 | [\u1FC6-\u1FCC]
 | [\u1FD0-\u1FD3]
 | [\u1FD6-\u1FDB]
 | [\u1FE0-\u1FEC]
 | [\u1FF2-\u1FF4]
 | [\u1FF6-\u1FFC]
 | '\u2071'
 | '\u207F'
 | [\u2090-\u2094]
 | '\u2102'
 | '\u2107'
 | [\u210A-\u2113]
 | '\u2115'
 | '\u2118'
 | [\u2119-\u211D]
 | '\u2124'
 | '\u2126'
 | '\u2128'
 | [\u212A-\u212D]
 | '\u212E'
 | [\u212F-\u2131]
 | [\u2133-\u2134]
 | [\u2135-\u2138]
 | '\u2139'
 | [\u213C-\u213F]
 | [\u2145-\u2149]
 | [\u2160-\u2183]
 | [\u2C00-\u2C2E]
 | [\u2C30-\u2C5E]
 | [\u2C80-\u2CE4]
 | [\u2D00-\u2D25]
 | [\u2D30-\u2D65]
 | '\u2D6F'
 | [\u2D80-\u2D96]
 | [\u2DA0-\u2DA6]
 | [\u2DA8-\u2DAE]
 | [\u2DB0-\u2DB6]
 | [\u2DB8-\u2DBE]
 | [\u2DC0-\u2DC6]
 | [\u2DC8-\u2DCE]
 | [\u2DD0-\u2DD6]
 | [\u2DD8-\u2DDE]
 | '\u3005'
 | '\u3006'
 | '\u3007'
 | [\u3021-\u3029]
 | [\u3031-\u3035]
 | [\u3038-\u303A]
 | '\u303B'
 | '\u303C'
 | [\u3041-\u3096]
 | [\u309B-\u309C]
 | [\u309D-\u309E]
 | '\u309F'
 | [\u30A1-\u30FA]
 | [\u30FC-\u30FE]
 | '\u30FF'
 | [\u3105-\u312C]
 | [\u3131-\u318E]
 | [\u31A0-\u31B7]
 | [\u31F0-\u31FF]
 | [\u3400-\u4DB5]
 | [\u4E00-\u9FBB]
 | [\uA000-\uA014]
 | '\uA015'
 | [\uA016-\uA48C]
 | [\uA800-\uA801]
 | [\uA803-\uA805]
 | [\uA807-\uA80A]
 | [\uA80C-\uA822]
 | [\uAC00-\uD7A3]
 | [\uF900-\uFA2D]
 | [\uFA30-\uFA6A]
 | [\uFA70-\uFAD9]
 | [\uFB00-\uFB06]
 | [\uFB13-\uFB17]
 | '\uFB1D'
 | [\uFB1F-\uFB28]
 | [\uFB2A-\uFB36]
 | [\uFB38-\uFB3C]
 | '\uFB3E'
 | [\uFB40-\uFB41]
 | [\uFB43-\uFB44]
 | [\uFB46-\uFBB1]
 | [\uFBD3-\uFD3D]
 | [\uFD50-\uFD8F]
 | [\uFD92-\uFDC7]
 | [\uFDF0-\uFDFB]
 | [\uFE70-\uFE74]
 | [\uFE76-\uFEFC]
 | [\uFF21-\uFF3A]
 | [\uFF41-\uFF5A]
 | [\uFF66-\uFF6F]
 | '\uFF70'
 | [\uFF71-\uFF9D]
 | [\uFF9E-\uFF9F]
 | [\uFFA0-\uFFBE]
 | [\uFFC2-\uFFC7]
 | [\uFFCA-\uFFCF]
 | [\uFFD2-\uFFD7]
 | [\uFFDA-\uFFDC]
 ;

fragment ID_CONTINUE
 : ID_START
 | [0-9]
 | [\u0300-\u036F]
 | [\u0483-\u0486]
 | [\u0591-\u05B9]
 | [\u05BB-\u05BD]
 | '\u05BF'
 | [\u05C1-\u05C2]
 | [\u05C4-\u05C5]
 | '\u05C7'
 | [\u0610-\u0615]
 | [\u064B-\u065E]
 | [\u0660-\u0669]
 | '\u0670'
 | [\u06D6-\u06DC]
 | [\u06DF-\u06E4]
 | [\u06E7-\u06E8]
 | [\u06EA-\u06ED]
 | [\u06F0-\u06F9]
 | '\u0711'
 | [\u0730-\u074A]
 | [\u07A6-\u07B0]
 | [\u0901-\u0902]
 | '\u0903'
 | '\u093C'
 | [\u093E-\u0940]
 | [\u0941-\u0948]
 | [\u0949-\u094C]
 | '\u094D'
 | [\u0951-\u0954]
 | [\u0962-\u0963]
 | [\u0966-\u096F]
 | '\u0981'
 | [\u0982-\u0983]
 | '\u09BC'
 | [\u09BE-\u09C0]
 | [\u09C1-\u09C4]
 | [\u09C7-\u09C8]
 | [\u09CB-\u09CC]
 | '\u09CD'
 | '\u09D7'
 | [\u09E2-\u09E3]
 | [\u09E6-\u09EF]
 | [\u0A01-\u0A02]
 | '\u0A03'
 | '\u0A3C'
 | [\u0A3E-\u0A40]
 | [\u0A41-\u0A42]
 | [\u0A47-\u0A48]
 | [\u0A4B-\u0A4D]
 | [\u0A66-\u0A6F]
 | [\u0A70-\u0A71]
 | [\u0A81-\u0A82]
 | '\u0A83'
 | '\u0ABC'
 | [\u0ABE-\u0AC0]
 | [\u0AC1-\u0AC5]
 | [\u0AC7-\u0AC8]
 | '\u0AC9'
 | [\u0ACB-\u0ACC]
 | '\u0ACD'
 | [\u0AE2-\u0AE3]
 | [\u0AE6-\u0AEF]
 | '\u0B01'
 | [\u0B02-\u0B03]
 | '\u0B3C'
 | '\u0B3E'
 | '\u0B3F'
 | '\u0B40'
 | [\u0B41-\u0B43]
 | [\u0B47-\u0B48]
 | [\u0B4B-\u0B4C]
 | '\u0B4D'
 | '\u0B56'
 | '\u0B57'
 | [\u0B66-\u0B6F]
 | '\u0B82'
 | [\u0BBE-\u0BBF]
 | '\u0BC0'
 | [\u0BC1-\u0BC2]
 | [\u0BC6-\u0BC8]
 | [\u0BCA-\u0BCC]
 | '\u0BCD'
 | '\u0BD7'
 | [\u0BE6-\u0BEF]
 | [\u0C01-\u0C03]
 | [\u0C3E-\u0C40]
 | [\u0C41-\u0C44]
 | [\u0C46-\u0C48]
 | [\u0C4A-\u0C4D]
 | [\u0C55-\u0C56]
 | [\u0C66-\u0C6F]
 | [\u0C82-\u0C83]
 | '\u0CBC'
 | '\u0CBE'
 | '\u0CBF'
 | [\u0CC0-\u0CC4]
 | '\u0CC6'
 | [\u0CC7-\u0CC8]
 | [\u0CCA-\u0CCB]
 | [\u0CCC-\u0CCD]
 | [\u0CD5-\u0CD6]
 | [\u0CE6-\u0CEF]
 | [\u0D02-\u0D03]
 | [\u0D3E-\u0D40]
 | [\u0D41-\u0D43]
 | [\u0D46-\u0D48]
 | [\u0D4A-\u0D4C]
 | '\u0D4D'
 | '\u0D57'
 | [\u0D66-\u0D6F]
 | [\u0D82-\u0D83]
 | '\u0DCA'
 | [\u0DCF-\u0DD1]
 | [\u0DD2-\u0DD4]
 | '\u0DD6'
 | [\u0DD8-\u0DDF]
 | [\u0DF2-\u0DF3]
 | '\u0E31'
 | [\u0E34-\u0E3A]
 | [\u0E47-\u0E4E]
 | [\u0E50-\u0E59]
 | '\u0EB1'
 | [\u0EB4-\u0EB9]
 | [\u0EBB-\u0EBC]
 | [\u0EC8-\u0ECD]
 | [\u0ED0-\u0ED9]
 | [\u0F18-\u0F19]
 | [\u0F20-\u0F29]
 | '\u0F35'
 | '\u0F37'
 | '\u0F39'
 | [\u0F3E-\u0F3F]
 | [\u0F71-\u0F7E]
 | '\u0F7F'
 | [\u0F80-\u0F84]
 | [\u0F86-\u0F87]
 | [\u0F90-\u0F97]
 | [\u0F99-\u0FBC]
 | '\u0FC6'
 | '\u102C'
 | [\u102D-\u1030]
 | '\u1031'
 | '\u1032'
 | [\u1036-\u1037]
 | '\u1038'
 | '\u1039'
 | [\u1040-\u1049]
 | [\u1056-\u1057]
 | [\u1058-\u1059]
 | '\u135F'
 | [\u1369-\u1371]
 | [\u1712-\u1714]
 | [\u1732-\u1734]
 | [\u1752-\u1753]
 | [\u1772-\u1773]
 | '\u17B6'
 | [\u17B7-\u17BD]
 | [\u17BE-\u17C5]
 | '\u17C6'
 | [\u17C7-\u17C8]
 | [\u17C9-\u17D3]
 | '\u17DD'
 | [\u17E0-\u17E9]
 | [\u180B-\u180D]
 | [\u1810-\u1819]
 | '\u18A9'
 | [\u1920-\u1922]
 | [\u1923-\u1926]
 | [\u1927-\u1928]
 | [\u1929-\u192B]
 | [\u1930-\u1931]
 | '\u1932'
 | [\u1933-\u1938]
 | [\u1939-\u193B]
 | [\u1946-\u194F]
 | [\u19B0-\u19C0]
 | [\u19C8-\u19C9]
 | [\u19D0-\u19D9]
 | [\u1A17-\u1A18]
 | [\u1A19-\u1A1B]
 | [\u1DC0-\u1DC3]
 | [\u203F-\u2040]
 | '\u2054'
 | [\u20D0-\u20DC]
 | '\u20E1'
 | [\u20E5-\u20EB]
 | [\u302A-\u302F]
 | [\u3099-\u309A]
 | '\uA802'
 | '\uA806'
 | '\uA80B'
 | [\uA823-\uA824]
 | [\uA825-\uA826]
 | '\uA827'
 | '\uFB1E'
 | [\uFE00-\uFE0F]
 | [\uFE20-\uFE23]
 | [\uFE33-\uFE34]
 | [\uFE4D-\uFE4F]
 | [\uFF10-\uFF19]
 | '\uFF3F'
 ;

使用两种模式我可以模拟python格式化程序想要的内容。

语法:

parser grammar formatterParser;

options {   tokenVocab = formatterLexer; }

source_stream
 : segments EOF
 ;

 segments
 : segment*
 ;

 segment
 : (text | replacement_field)
 ;

 text
 : CHAR_NOCURLY+
 ;

 replacement_field
 : OPEN_RF field_name? (EXLM conversion)? (COLON format_spec)? CLOSE_RF
 ;

 field_name 
 : arg_name ((DOT attribute_name) | LIX element_index RIX)*
 ;

 arg_name
 : (identifier | INTEGER)? 
 ;

 attribute_name
 : identifier
 ;

element_index
: INTEGER | index_string
;

index_string
: CHAR_NORIGHTB+
;

identifier
: NAME
;

conversion
: LS
;

format_spec
: (fill? align)? sign? hash? zero? width? dash? precision? type?
;

fill
: CHAR
;

align
: LT | MT | EQ | EXP
;

sign
: PLUS | MINUS | SPACE
;

hash
: HASH
;

zero
: ZERO
;

width
: INTEGER
;

precision
: INTEGER
;

type
: TYPE
;

dash
: COMMA
;