我试图用python正则表达式解析以下内容。
import (
"github.com/user/qrt"
"fmt"
"github.com/user/zyx"
)
import "abcdef"
import "abzdef"
理想情况下,单个正则表达式会产生:
parens中的所有内容都作为一个单独的组,单行中的每个项目作为一个组导入语句
这里是我对每个import语句分别拥有的内容。 (见结肠右侧的位..
# import (...) : r'import\s*(\()(.*?)(\))'
# import ".." : r'import\s*(\")(.*?)(\")'
我认为我可以使用类似下面的内容与第一组匹配来处理决定我是在解析a()导入还是""进口。 (?(id)是|否)匹配'是'如果小组' id'匹配,否则' no'
答案 0 :(得分:1)
这样的东西?
import re
test = """import (
"github.com/user/qrt"
"fmt"
"github.com/user/zyx"
)
import "abcdef"
import "abzdef"
"""
rx = re.compile(r'import\s+([^(]+?$|\([^)]+\))', re.MULTILINE)
rx2 = re.compile(r'".*"', re.MULTILINE)
for m in rx.finditer(test):
imp = m.group(1)
if imp[0] == '(':
for m in rx2.finditer(imp):
print(m.group(0))
else:
print(m.group(1))
输出继电器
"github.com/user/qrt"
"fmt"
"github.com/user/zyx"
"abcdef"
"abzdef"
编辑为了好玩,我尝试了一个模型递归下降解析器。它允许破解语法;但这是一个想法,并且易于使用,只需迭代。
import re
test = """import (
"github.com/user/qrt"
"fmt"
"github.com/user/zyx"
)
import "abcdef"
import "abzdef"
"""
BEGIN = 1
IMPORT = 2
DESCENT = 3
class Lexer(object):
def __init__(self, text):
self._rx = re.compile(r'(import|".*?"|\(|\))')
self._text = text
def __iter__(self):
for m in self._rx.finditer(self._text):
yield m.group(1)
class RecursiveDescent(object):
state = BEGIN
def __init__(self, lexer):
self._lexer = lexer
def __iter__(self):
for token in self._lexer:
if self.state == BEGIN:
if token != 'import':
# Beginning of the program most likely
raise StopIteration
self.state = IMPORT
elif self.state == IMPORT:
if token == '(':
self.state = DESCENT
else:
self.state = BEGIN
yield token
elif self.state == DESCENT:
if token == ')':
self.state = BEGIN
else:
yield token
for path in RecursiveDescent(Lexer(test)):
print(path)
输出
"github.com/user/qrt"
"fmt"
"github.com/user/zyx"
"abcdef"
"abzdef"