在以下代码中,与Perl的类似实现相比,第13和14行花了很长时间。请提出任何想法。通过代码处理的总行数为323898。
import sys
import re
def readSpf(file1):
with open(file1) as fp:
lines = fp.read().split("\n")
h1 = {}
h3 = {}
for line in lines:
if re.search("^C", line):
l2 = re.split("\s+", line)
h1[l2[1]] = h1.get(l2[1], "0.0") + "+" + l2[3]
h1[l2[2]] = h1.get(l2[2], "0.0") + "+" + l2[3]
h3[l2[1]] = h3.get(l2[1], "") + "" + l2[2] + "###" + l2[3] + "###"
h3[l2[2]] = h3.get(l2[2], "") + "" + l2[1] + "###" + l2[3] + "###"
readSpf(sys.argv[1])
样本输入文件
C56818_1870 Xcancel|Xtxdac_cancel_cap_8<0>|intp ln_X44|X86|X1|18
3.05871e-17 C56818_1871 Xcancel|Xtxdac_cancel_cap_8<0>|intp Xcancel|Xtxdac_cancel_cap_8<1>|intp 2.34185e-16 C56818_1872 Xcancel|Xtxdac_cancel_cap_8<0>|intp Xcancel|Xtxdac_cancel_cap_8<0>|intn 3.26685e-16 C56818_1873 Xcancel|Xtxdac_cancel_cap_8<0>|intp ln_X44|X86|X1|12 3.75498e-17 C56818_1874 Xcancel|Xtxdac_cancel_cap_8<0>|intp ln_X44|X86|X1|14
3.67933e-17 C56818_1875 Xcancel|Xtxdac_cancel_cap_8<0>|intp ln_X44|X86|X1|16 3.16223e-17 Cg56818_1876 Xcancel|Xtxdac_cancel_cap_8<0>|intp gnd! 2.13745e-16 C56819_1877 Xcancel|Xtxdac_cancel_cap_9<4>|intn Xcancel|Xcap4<2>|intn 2.35712e-16 Cg56819_1878 Xcancel|Xtxdac_cancel_cap_9<4>|intn gnd! 2.49288e-16 Cg56820_1879 ln_X44|X87|X0|11 gnd! 1.19409e-17 Cg56821_1880 ln_X44|X87|X0|12 gnd! 1.25134e-17 C56822_1881 ln_X44|X87|X0|13 ln_X44|X103|X1|13 8.94447e-18 Cg56822_1882 ln_X44|X87|X0|13 gnd!
1.04407e-17 C56823_1883 ln_X44|X87|X0|14 ln_X44|X103|X1|14 8.94447e-18 Cg56823_1884 ln_X44|X87|X0|14 gnd! 1.31103e-17 Cg56824_1885 ln_X44|X87|X0|15 gnd! 7.14434e-18 Cg56825_1886 ln_X44|X87|X0|16 gnd!
8.37446e-18 C56826_1887 ln_X44|X87|X0|17 ln_X44|X103|X1|17 6.12758e-18 Cg56826_1888 ln_X44|X87|X0|17 gnd! 8.65347e-18 C56827_1889 ln_X44|X87|X0|18 ln_X44|X103|X1|18 6.12758e-18 Cg56827_1890 ln_X44|X87|X0|18 gnd! 1.17519e-17 C56828_1891 Xcancel|Xtxdac_cancel_cap_9<4>|intp Xcancel|Xtxdac_cancel_cap_9<4>|intn 3.22766e-16 C56828_1892 Xcancel|Xtxdac_cancel_cap_9<4>|intp ln_X44|X87|X0|12 3.77404e-17 C56828_1893 Xcancel|Xtxdac_cancel_cap_9<4>|intp ln_X44|X87|X0|14
3.79354e-17 C56828_1894 Xcancel|Xtxdac_cancel_cap_9<4>|intp ln_X44|X87|X0|16 3.16881e-17 C56828_1895 Xcancel|Xtxdac_cancel_cap_9<4>|intp ln_X44|X87|X0|18 3.19942e-17 C56828_1896 Xcancel|Xtxdac_cancel_cap_9<4>|intp Xcancel|Xcap4<2>|intp
1.17334e-16 C56828_1897 Xcancel|Xtxdac_cancel_cap_9<4>|intp Xcancel|Xtxdac_cancel_cap_8<1>|intp 4.26524e-16 Cg56828_1898 Xcancel|Xtxdac_cancel_cap_9<4>|intp gnd! 2.1848e-16 C56829_1899 Xcancel|Xcap4<0>|intn Xcancel|Xtxdac_cancel_cap_9<4>|intn 3.10177e-16