使用特定的正则表达式Python隔离文本块

时间:2019-07-09 22:17:48

标签: python regex

假设我有一个文件,其文本如下:

module combfn1789(clk, i0, i1, i2, i3, o);
  input clk, i0, i1, i2, i3; 
  output o;
  wire clk, i0, i1, i2, i3;
  wire o;
  wire UNCONNECTED788, n_0, n_1, n_2, n_3, n_4;
  Q_FDP0I0 o_reg(.CK (clk), .D (n_4), .Q (o), .QN (UNCONNECTED788));
  Q_OAI33 g186(.A0 (i2), .A1 (n_1), .A2 (i0), .B0 (n_0), .B1 (n_3), .B2
       (n_2), .Z (n_4));
  Q_INV g187(.A (i3), .Z (n_3));
  Q_INV g188(.A (i0), .Z (n_2));
  Q_INV g189(.A (i1), .Z (n_1));
  Q_INV g190(.A (i2), .Z (n_0));
endmodule;

module combfn1(clk, i0, i1, i2, i3, o);
  input clk, i0, i1, i2, i3;
  output o;
  wire clk, i0, i1, i2, i3;
  wire o;
  wire UNCONNECTED0, n_0, n_1;
  Q_FDP0I0 o_reg(.CK (clk), .D (n_1), .Q (o), .QN (UNCONNECTED0));
  Q_NR04 g59__4296(.A0 (i2), .A1 (i1), .A2 (n_0), .A3 (i3), .Z (n_1));
  Q_INV g60(.A (i0), .Z (n_0));
endmodule

我只对文本的一部分感兴趣,所以我试图编写一个python程序来隔离以下内容:

combfn1789
Q_FDP0I0 o_reg(.CK (clk), .D (n_4), .Q (o), .QN (UNCONNECTED788));
Q_OAI33 g186(.A0 (i2), .A1 (n_1), .A2 (i0), .B0 (n_0), .B1 (n_3), .B2
      (n_2), .Z (n_4));
Q_INV g187(.A (i3), .Z (n_3));
Q_INV g188(.A (i0), .Z (n_2));
Q_INV g189(.A (i1), .Z (n_1));
Q_INV g190(.A (i2), .Z (n_0));

combfn1
Q_NR04 g59__4296(.A0 (i2), .A1 (i1), .A2 (n_0), .A3 (i3), .Z (n_1));
Q_INV g60(.A (i0), .Z (n_0));

我最初的想法是使用re.search隔离以Q_开头的行。不幸的是,这对于隔离模块名称combfn无效。我不确定如何编写一个正则表达式来隔离bot以Q_和该模块名称开头的行。

1 个答案:

答案 0 :(得分:1)

此表达式或表达式的修改后的版本可能会返回所需的输出或与之接近的输出,

module\s+\K([^)(]+)|(Q_[\s\S]*?;)

使用re.finditer

进行测试
import re

regex = r"module\s+([^)(]+)|(Q_[\s\S]*?;)"

test_str = ("module combfn1789(clk, i0, i1, i2, i3, o);\n"
    "  input clk, i0, i1, i2, i3; \n"
    "  output o;\n"
    "  wire clk, i0, i1, i2, i3;\n"
    "  wire o;\n"
    "  wire UNCONNECTED788, n_0, n_1, n_2, n_3, n_4;\n"
    "  Q_FDP0I0 o_reg(.CK (clk), .D (n_4), .Q (o), .QN (UNCONNECTED788));\n"
    "  Q_OAI33 g186(.A0 (i2), .A1 (n_1), .A2 (i0), .B0 (n_0), .B1 (n_3), .B2\n"
    "       (n_2), .Z (n_4));\n"
    "  Q_INV g187(.A (i3), .Z (n_3));\n"
    "  Q_INV g188(.A (i0), .Z (n_2));\n"
    "  Q_INV g189(.A (i1), .Z (n_1));\n"
    "  Q_INV g190(.A (i2), .Z (n_0));\n"
    "endmodule;\n\n"
    "module combfn1(clk, i0, i1, i2, i3, o);\n"
    "  input clk, i0, i1, i2, i3;\n"
    "  output o;\n"
    "  wire clk, i0, i1, i2, i3;\n"
    "  wire o;\n"
    "  wire UNCONNECTED0, n_0, n_1;\n"
    "  Q_FDP0I0 o_reg(.CK (clk), .D (n_1), .Q (o), .QN (UNCONNECTED0));\n"
    "  Q_NR04 g59__4296(.A0 (i2), .A1 (i1), .A2 (n_0), .A3 (i3), .Z (n_1));\n"
    "  Q_INV g60(.A (i0), .Z (n_0));\n"
    "endmodule")

matches = re.finditer(regex, test_str, re.MULTILINE | re.IGNORECASE)

for matchNum, match in enumerate(matches, start=1):

    print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))

    for groupNum in range(0, len(match.groups())):
        groupNum = groupNum + 1

        print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))

使用re.findall

进行测试
import re

regex = r"module\s+([^)(]+)|(Q_[\s\S]*?;)"

test_str = ("module combfn1789(clk, i0, i1, i2, i3, o);\n"
    "  input clk, i0, i1, i2, i3; \n"
    "  output o;\n"
    "  wire clk, i0, i1, i2, i3;\n"
    "  wire o;\n"
    "  wire UNCONNECTED788, n_0, n_1, n_2, n_3, n_4;\n"
    "  Q_FDP0I0 o_reg(.CK (clk), .D (n_4), .Q (o), .QN (UNCONNECTED788));\n"
    "  Q_OAI33 g186(.A0 (i2), .A1 (n_1), .A2 (i0), .B0 (n_0), .B1 (n_3), .B2\n"
    "       (n_2), .Z (n_4));\n"
    "  Q_INV g187(.A (i3), .Z (n_3));\n"
    "  Q_INV g188(.A (i0), .Z (n_2));\n"
    "  Q_INV g189(.A (i1), .Z (n_1));\n"
    "  Q_INV g190(.A (i2), .Z (n_0));\n"
    "endmodule;\n\n"
    "module combfn1(clk, i0, i1, i2, i3, o);\n"
    "  input clk, i0, i1, i2, i3;\n"
    "  output o;\n"
    "  wire clk, i0, i1, i2, i3;\n"
    "  wire o;\n"
    "  wire UNCONNECTED0, n_0, n_1;\n"
    "  Q_FDP0I0 o_reg(.CK (clk), .D (n_1), .Q (o), .QN (UNCONNECTED0));\n"
    "  Q_NR04 g59__4296(.A0 (i2), .A1 (i1), .A2 (n_0), .A3 (i3), .Z (n_1));\n"
    "  Q_INV g60(.A (i0), .Z (n_0));\n"
    "endmodule")

print(re.findall(regex, test_str))

演示

this demo的右上角对表达式进行了说明,如果您想进一步探索或简化/修改它,可以在this link中观察它如何与某些示例输入匹配如果愿意,可以逐步进行。