您好我在下面的脚本中编写了这个脚本,它基本上将df中的一列替换为另一列具有增量ID的列。该脚本工作得非常好,但我想知道是否有更好/更有效的方法来编写这个。
import sys
fh_in = open(sys.argv[1], 'r')
fh_out = open(sys.argv[2], 'w')
count = 1
dict = {}
final = list()
for line in fh_in:
line = line.strip()
line = line.split()
if count < 10:
dict[line[3]] = "Bra100000" + str(count)
count = count + 1
else:
dict[line[3]] = "Bra10000" + str(count)
count = count + 1
test = "{:10s}" .format(dict[line[3]])
fh_out.write(line[0])
fh_out.write("\t")
fh_out.write(line[1])
fh_out.write("\t")
fh_out.write(line[2])
fh_out.write("\t")
fh_out.write(line[1])
fh_out.write("\t")
fh_out.write(test)
fh_out.write("\t")
fh_out.write(line[4])
fh_out.write("\t")
fh_out.write(line[5])
fh_out.write("\t")
fh_out.write(line[6])
fh_out.write("\t")
fh_out.write(line[7])
fh_out.write("\t")
fh_out.write(line[8])
fh_out.write("\t")
fh_out.write(line[9])
fh_out.write("\t")
fh_out.write(line[10])
fh_out.write("\t")
fh_out.write(line[11])
fh_out.write("\n")
输入
A01 158188 158533 Contig545|m.1503 228 + 158188 158533 255,0,0 1 345 0
A01 272139 272465 comp285432_c0_seq1|m.9436 230 - 272139 272465 255,0,0 1 326 0
A01 339617 340806 TCONS_00003584|m.11226 157 - 339617 340806 255,0,0 5 95,127,68,50,432 0,190,467,619,757
A01 888838 889347 Contig1477|m.3679 92 - 888838 889347 255,0,0 1 509 0
A01 1165488 1165761 comp3043338_c0_seq1|m.9546 228 - 1165488 1165761 255,0,0 1 273 0
A01 1167009 1167386 Contig1598|m.4011 238 - 1167009 1167386 255,0,0 1 377 0
A01 1234347 1234827 comp260850_c0_seq1|m.10201 85 + 1234347 1234827 255,0,0 1 480 0
输出
A01 158188 158533 158188 Bra1000001 228 + 158188 158533 255,0,0 1 345 0
A01 272139 272465 272139 Bra1000002 230 - 272139 272465 255,0,0 1 326 0
A01 339617 340806 339617 Bra1000003 157 - 339617 340806 255,0,0 5 95,127,68,50,432 0,190,467,619,757
A01 888838 889347 888838 Bra1000004 92 - 888838 889347 255,0,0 1 509 0
A01 1165488 1165761 1165488 Bra1000005 228 - 1165488 1165761 255,0,0 1 273 0
A01 1167009 1167386 1167009 Bra1000006 238 - 1167009 1167386 255,0,0 1 377 0
A01 1234347 1234827 1234347 Bra1000007 85 + 1234347 1234827 255,0,0 1 480 0
答案 0 :(得分:1)
对于大量fh_out.write
,您可以写为:
output = '\t'.join(line[:3] + [line[1], test] + line[4:12]) + '\n'
fh_out.write(output)