用pyparsing正确评论扫描

时间:2012-09-20 10:23:06

标签: python pyparsing

我正在使用pyparsing在“CREATE TABLE”之后添加分号(“;”)。如果我有这个输入:

CREATE TABLE A ( B VARCHAR(255) )

该程序将提供下一个输出:

CREATE TABLE A ( B VARCHAR(255) );

当有评论时出现问题,例如:

CREATE TABLE A ( B VARCHAR(255) )
--Comment

该计划提供的地方:

CREATE TABLE A ( B VARCHAR(255) )
--Comment
;

以下是代码:

import pyparsing as par

alphanumsword = par.Word(par.alphanums + "_")
element = "(" + alphanumsword + ")" | alphanumsword
row = par.OneOrMore(element)
rows = row + par.OneOrMore("," + row) | row

semicolon = par.Literal(";")
comment1 = par.Literal("--") + par.restOfLine + par.LineEnd()

createtable = par.CaselessLiteral("create") + par.CaselessLiteral("table") 
              + alphanumsword + "("     + rows + ")" + ~semicolon

createtable.ignore(comment1)
createtable.ignore(par.cStyleComment)

text = \
"""
CREATE TABLE  PERSON
(
    /* Comment */
    /*
       Comment
    */
   ID VARCHAR(255),
   NAME VARCHAR(255), -- Comment
   --- Comment
   ADDRESS VARCHAR(255) NULL, -- Comment
   CONSTRAINT PK_PERSON PRIMARY KEY (ID)
)

-- Comment

CREATE TABLE A ( B VARCHAR(255) )
"""

text_list = list(text)
offset = 0

for t,s,e in createtable.scanString(text):
    print "(", t, ",", s, ",", e, ")"

    print "||", text[s:e], "||"

    text_list.insert(e + offset, ';')
    offset += 1

print "".join(text_list)

1 个答案:

答案 0 :(得分:2)

使用~( semicolon | comment1 )代替~( semicolon )

import pyparsing as par

alphanumsword = par.Word(par.alphanums + "_")

element = "(" + alphanumsword + ")" | alphanumsword
#element = alphanumsword + "(" + alphanumsword + ")" | alphanumsword

row = par.OneOrMore(element)

rows = row + par.OneOrMore("," + row) | row

semicolon = par.Literal(";")

comment1 = par.Literal("--") + par.restOfLine + par.LineEnd()

createtable = par.CaselessLiteral("create") + par.CaselessLiteral("table")  \
              + alphanumsword + "("     + rows + ")" + ~( semicolon | comment1 )

createtable.ignore(comment1)
createtable.ignore(par.cStyleComment)


text = \
"""
--- this is comment

CREATE TABLE  PERSON
(
    /* Comment */
    /*
       Comment
    */
   ID VARCHAR(255),
   NAME VARCHAR(255), -- Comment
   --- Comment
   ADDRESS VARCHAR(255) NULL, -- Comment
   CONSTRAINT PK_PERSON PRIMARY KEY (ID)
)

-- Comment

CREATE TABLE A ( B VARCHAR(255) ) --comment
CREATE TABLE A ( B VARCHAR(255) );
CREATE TABLE A ( B VARCHAR(255) )
CREATE TABLE A ( B VARCHAR(255) );

--Commentasfkjaskfj
"""

text_list = list(text)
offset = 0

for t,s,e in createtable.scanString(text):
    print "(***", t, ",", s, ",", e, "**)"

    print "||\n", text[s:e], "\n||"
    print "s: %d, e: %d" % (s, e)

    text_list.insert(e + offset , ';')
    offset += 1

    print "******************************"

print "".join(text_list)

<强>输出

(*** ['create', 'table', 'PERSON', '(', 'ID', 'VARCHAR', '(', '255', ')', ',', 'NAME', 'VARCHAR', '(', '255', ')', ',', 'ADDRESS', 'VARCHAR', '(', '255', ')', 'NULL', ',', 'CONSTRAINT', 'PK_PERSON', 'PRIMARY', 'KEY', '(', 'ID', ')', ')'] , 22 , 243 **)
||
CREATE TABLE  PERSON
(
    /* Comment */
    /*
       Comment
    */
   ID VARCHAR(255),
   NAME VARCHAR(255), -- Comment
   --- Comment
   ADDRESS VARCHAR(255) NULL, -- Comment
   CONSTRAINT PK_PERSON PRIMARY KEY (ID)
) 
||
s: 22, e: 243
******************************
(*** ['create', 'table', 'A', '(', 'B', 'VARCHAR', '(', '255', ')', ')'] , 257 , 290 **)
||
CREATE TABLE A ( B VARCHAR(255) ) 
||
s: 257, e: 290
******************************
(*** ['create', 'table', 'A', '(', 'B', 'VARCHAR', '(', '255', ')', ')'] , 336 , 369 **)
||
CREATE TABLE A ( B VARCHAR(255) ) 
||
s: 336, e: 369
******************************

--- this is comment

CREATE TABLE  PERSON
(
    /* Comment */
    /*
       Comment
    */
   ID VARCHAR(255),
   NAME VARCHAR(255), -- Comment
   --- Comment
   ADDRESS VARCHAR(255) NULL, -- Comment
   CONSTRAINT PK_PERSON PRIMARY KEY (ID)
);

-- Comment

CREATE TABLE A ( B VARCHAR(255) ); --comment
CREATE TABLE A ( B VARCHAR(255) );
CREATE TABLE A ( B VARCHAR(255) );
CREATE TABLE A ( B VARCHAR(255) );

--Commentasfkjaskfj