我目前正在尝试实现一个能够解析db2 create table语句的DB2解析器。因此,我使用了官方的IBM语法DB2-Syntax (IBM)
到目前为止,这是我的语法:
grammar db2;
list: sql_expression;
sql_expression:
(create_statement SEMICOLON)+;
/* ------------------------------------------------------------------------------------------------
TABLES MAIN
-------------------------------------------------------------------------------------------------*/
create_statement:
'CREATE' 'TABLE' tableName=table_name tableMainBlock=table_main_block (initialLoggin=initial_loggin)? (volatilityLabel=volatility)? (rcdfmtLabel=rcdfmt)? (clauseLabel=clause)? (inTS=in_ts)? #createTable
;
in_ts:
'IN' '"' tsName=NAME '"'
;
table_main_block:
LEFT_PAREN tableParens=table_parens RIGHT_PAREN #tableMainParens
| tableLike=table_like #createTableLike
| asSubqueryClause=as_subquery_clause #tableAsSubqueryClause
;
table_parens:
((column_defs=column_definitions)| (tableLike=table_like) | (tableUnique=table_unique) | (tableRef=table_ref) | (tableCheck=table_check))+ #tableParens
;
as_subquery_clause:
'TABLESUBQUERYCLAUSE'
;
table_name : ('"' dbName=NAME '"' '.')? '"'tableName=NAME'"' #tableNameBunnys
| ('"'dbName=NAME'"''.') ?tableName=NAME #tableName
;
like_table_name : '"'tableName=NAME'"' #likeTableNameBunnys
| tableName=NAME #likeTableName
;
table_like:
'LIKE' likeTableName=like_table_name #tableLikeTable
| 'LIKE' likeViewName=like_view_name copyOptions=copy_options #tableLikeView
;
like_view_name:
'"'viewName=NAME'"' #viewNameBunnys
| viewName=NAME #viewName
;
initial_loggin:
'NOT LOGGED INITIALLY' #initialLogging
;
volatility:
'NOT VOLATILE' #nVolatile
|'NOT VOLATILE' cardinality_volatility #NVolatileCard
|'VOLATILE' #Volatile
|'VOLATILE' cardinality_volatility #VolatileCard
;
cardinality_volatility:
'CARDVOL'
;
rcdfmt:
'RCDFMT' rcdfmt_format_name #rcdfmtFormatName
;
rcdfmt_format_name:
'RCDFMTNAME'
;
clause:
distribution_clause
| partitioning_clause
;
distribution_clause:
'DISTRIBUTIONCLAUSE' #distributionClause
;
partitioning_clause:
'PARTITIONINGCLAUSE' #partitioningClause
;
copy_options:
'EXCLUDING IDENTITY...' #copyOptionsExclude
;
table_unique:
'UNIQUECONSTRAINT'
;
table_ref:
'FOREIGN KEY' #foreignKeyRef
| 'CONSTRAINT' constraint_name #constraintRef
;
constraint_name:
NAME
;
table_check:
'CHECKCONSTRAINT'
;
/* -------------------------------------------------------------------------------------------------------
COLUMNS
--------------------------------------------------------------------------------------------------------- */
column_definitions:
column (COMMA column)* #colDefs
;
column:
colName=col_name (forColumn=col_for_column)? colType=col_type (colOptions=col_options)* #col
;
col_name: '"' colName=NAME '"' #colNameBunnys
| colName=NAME #colName;
col_for_column:
'FOR' ('COLUMN')? systemColumnName=col_system_column_name #colForSysColName
;
col_system_column_name:
'COLSYSTEMCOLUMNNAME'
;
col_options:
'NOT NULL' #colUnique
| col_default_clause #colDefaultClause
| ('GENERATED ALWAYS'|'GENERATED BY DEFAULT') (IdentityOptions=col_options_identity_options)? #colGenAlwaysDefault
| col_datalink_options #colDataLinkOptions
| col_constraint #colConstraint
;
col_default_clause:
('WITH')? 'DEFAULT'
| ('WITH')? 'DEFAULT' (NUMBER|NAME|NUMBER DOT NUMBER)
| ('WITH')? 'DEFAULT' 'USER'
| ('WITH')? 'DEFAULT' 'NULL'
| ('WITH')? 'DEFAULT' 'CURRENT_DATE'
| ('WITH')? 'DEFAULT' 'CURRENT_TIME'
| ('WITH')? 'DEFAULT' 'CURRENT_TIMESTAMP'
| ('WITH')? 'DEFAULT' cast_function_name LEFT_PAREN ((NUMBER|NAME)|'USER'|'CURRENT_DATE'|'CURRENT_TIME'|'CURRENT_TIMESTAMP') RIGHT_PAREN
;
col_options_identity_options:
'COLOPTIONSIDENTITYOPTIONS'
;
col_datalink_options:
'COLDATALINKOPTIONS'
;
col_constraint:
('CONSTRAINT' constraintName=NAME)? (constrType1='PRIMARY KEY'|constrType2='UNIQUE'|constrType3=references_clause|constrType4='CHECK' LEFT_PAREN conCondition=check_condition RIGHT_PAREN) #colConstraintDef
;
references_clause:
'REFCLAU'
;
check_condition:
'CHECKCONDI'
;
cast_function_name:
'CFN'
;
/* -----------------------------------------------------------------------------------------------------
DATATYPES
------------------------------------------------------------------------------------------------------*/
col_type:
col_type_simple #colTypeSimple
|col_type_dec #colTypeDec
|col_type_float #colTypeFloat
|col_type_chars #colTypeChars
|col_type_graphic #colTypeGraphic
|col_type_binary #colTypeBinary
|col_type_date #colTypeDate
|'DATALINK' (NUMBER)? (allocate_clause)? (ccsid_clause)? #colTypeDatalink
|'ROWID' #colTypeRowId
;
allocate_clause:
'ALLOCATE' NUMBER #allocateClause
;
ccsid_clause:
'CCSID' NUMBER ('NORMALIZED'|'NOT NORMALIZED')? #ccsidClause
;
col_type_simple:
'SMALLINT' #colTypeSmallInt
|'INT' #colTypeInt
|'BIGINT' #colTypeBigInt
| 'INTEGER' #colTypeInteger
;
col_type_dec:
(dataType='NUMERIC'|(dataType='DECIMAL'|dataType='DEC')) (LEFT_PAREN dataTypePrecision=NUMBER (COMMA dataTypePrecision2=NUMBER)? RIGHT_PAREN)? #colTypeDecimal
;
col_type_float:
'REAL' #colTypeReal
| 'FLOAT' (LEFT_PAREN precision=NUMBER RIGHT_PAREN)? #colTypeFloatDef
| 'DOUBLE' ('PRECISION')? #colTypeDoubleDef
;
col_type_chars:
('CHAR'|'CHARACTER') (LEFT_PAREN precision=NUMBER RIGHT_PAREN)? (col_type_chars_mixed)? #colTypeChar
| ('VARCHAR'|'CHAR' 'VARYING'|'CHARACTER' 'VARYING') LEFT_PAREN precision=NUMBER RIGHT_PAREN (allocate_clause)? col_type_chars_mixed #colTypeVarChar
| ('CHARACTER' 'LARGE OBJECT'|'CHAR' 'LARGE OBJECT'|'CLOB') ('(1M)'|NUMBER ('K'|'M'|'G'))? (allocate_clause)? col_type_chars_mixed #colTypeCLOB
;
col_type_chars_mixed:
'FOR' ('BIT'|'SBCS'|'MIXED') 'DATA' #colTypeCharBitType
| ccsid_clause #colTypeCharCCSID
;
col_type_graphic:
'GRAPHIC' ('(1)'|LEFT_PAREN NUMBER RIGHT_PAREN)? (ccsid_clause)? #colTypeGraphicDef
| ('VARGRAPHIC'|'GRAPHIC' 'VARYING') LEFT_PAREN NUMBER RIGHT_PAREN (allocate_clause)? (ccsid_clause)? #colTypeVarGraphic
| 'DBCLOB' ('(1M)'| LEFT_PAREN NUMBER ('K'|'M'|'G') RIGHT_PAREN)? (allocate_clause)? (ccsid_clause)? #colTypeDBCLOB
;
col_type_binary:
'BINARY' ('(1)'|LEFT_PAREN NUMBER RIGHT_PAREN)? #colTypeBinaryDef
| ('VARBINARY'|'BINARY' 'VARYING') LEFT_PAREN NUMBER RIGHT_PAREN (allocate_clause)? #colTypeVarBinary
| ('BLOB'|'BINARY LARGE OBJECT') ('(1M)'| LEFT_PAREN NUMBER ('K'|'M'|'G') RIGHT_PAREN)? (allocate_clause)? #colTypeBLOB
;
col_type_date:
'DATE' #colTypeDateDef
| 'TIME' (LEFT_PAREN '0' RIGHT_PAREN)? #colTypeTime
| 'TIMESTAMP' (LEFT_PAREN '6' RIGHT_PAREN)? #colTypeTimestamp
;
/* LITERALS */
NUMBER : (DIGIT)+;
NAME: [a-zA-Z0-9_]+ ;
FNUMBER: (DIGIT)+ DOT (DIGIT)+;
fragment DIGIT : '0'..'9' ;
LEFT_PAREN : '(';
RIGHT_PAREN : ')';
COMMA : ',';
SEMICOLON : ';';
DOT : '.';
WS : ( '\t' | ' ' | '\r' | '\n')+ -> skip;
当我现在尝试解析像这样的字符列时
CREATE TABLE“TEST”。“TEST2”( “IBMSNAP_OPERATION”CHAR(1));
Antlr导致错误
extraneous input '(1)' expecting {'FOREIGN KEY', 'CONSTRAINT', 'LIKE', 'UNIQUECONSTRAINT', 'UNIQUE', 'COLDATALINKOPTIONS', 'CHECKCONSTRAINT', 'CHECK', 'DEFAULT', 'GENERATED BY DEFAULT', 'NOT NULL', '"', 'WITH', 'PRIMARY KEY', 'REFCLAU', 'GENERATED ALWAYS', NAME, ')', ','}
但是尝试
CREATE TABLE "TEST"."TEST2" (
"IBMSNAP_OPERATION" CHAR(3));
工作得很好! 谁知道这个奇怪的问题?
THX!
答案 0 :(得分:0)
问题是你的语法包含内联文字'(1)'
,例如:
col_type_graphic:
'GRAPHIC' ('(1)'...
正因为如此,词法分析器会将(1)
视为单个令牌,并且无法再按(
NUMBER
)
解析CHAR
col_type_graphic
'K'|'M'|'G'
1}}。
这不是你唯一的问题,你的语法中有很多这些问题。例如,规则K
包含M
,它会将字母G
,K
,fragment
声明为特殊代币(将其视为特殊关键字)和例如,一旦您尝试命名列K
,就会遇到问题。
解决方案是您必须将此类规则作为{{1}}规则提取到词法分析器语法中,然后词法分析器将知道{{1}}本身不是词法标记,只与规则的其余部分结合使用。但这不是微不足道的,因为周围的规则不是词法而是解析器规则,因此您必须将其拆分为词法子规则和大解析器规则。
总而言之,在Antlr中运行此语法并生成有意义的解析树将是一项相当大的工作量。它的问题在于它包含许多不是关键字的文字,但是当你将它们定义为文字时,Antlr(以及许多其他解析器生成器)会将它们视为关键字。