第一个open()
的输出是第二个open()
的输入。我需要创建一个tmp文件或存储第一个输出的对象。基本上我分开写了这两个操作,我很难将它们组合在一起。现在发生的是两个输出在一个文件中连接:output1 + output2,我只想在最终文件中输出。
import csv
import sys
from collections import Counter
ofile = open(sys.argv[2], 'wb')
writer = csv.writer(ofile, delimiter='\t')
with open(sys.argv[1], 'rb') as ifile:
reader = csv.reader(ifile, delimiter='\t')
for line in reader:
freqs = Counter(line)
if len(freqs.items()) < 4:
continue
else: writer.writerow(line)
with open(sys.argv[2], 'rb') as ifile2:
findlist = ['A', 'G', 'C', 'T', 'Y', 'R', 'W', 'S', 'K', 'M', 'X', 'N', '-']
replacelist = ['2', '19', '5', '29', '17', '7', '11', '13', '23', '3', '0', '0', '0']
s = ifile2.read()
for item, replacement in zip(findlist, replacelist):
s = s.replace(item, replacement)
ofile.write(s)
ofile.close()
答案 0 :(得分:2)
我已经重新组织了您的代码,以便更容易重用:
import csv
import sys
from collections import Counter
def load_csv(fname, **kwargs):
with open(fname, 'rb') as inf:
in_csv = csv.reader(inf, **kwargs)
return list(in_csv)
def save_csv(fname, data, header=None, **kwargs):
with open(fname, 'wb') as outf:
out_csv = csv.writer(outf, **kwargs)
if header is not None:
out_csv.writerow(header)
out_csv.writerows(data)
def main(in_fname, out_fname):
data = load_csv(in_fname, delimiter='\t')
filtered = (row for row in data if len(Counter(row).items()) >= 4)
# save_csv(out_fname, filtered, delimiter='\t') # just skip this step!
replace = dict(zip(
['A', 'G', 'C', 'T', 'Y', 'R', 'W', 'S', 'K', 'M', 'X', 'N', '-'],
['2', '19', '5', '29', '17', '7', '11', '13', '23', '3', '0', '0', '0']
))
replaced = ([replace.get(item, item) for item in row] for row in filtered)
save_csv(out_fname, replaced, delimiter='\t')
if __name__=="__main__":
if len(sys.argv) < 3:
print('Usage: myprog.py input.csv output.csv')
else:
main(sys.argv[1], sys.argv[2])
答案 1 :(得分:0)
如果您不希望第一个文件中的数据显示在输出文件中,则应删除相应的行
else: writer.writerow(line)