我有以下代码从pdb文件打印预定义序列。现在我想为每个chain_id保存单独的输出文件。
如何为每个chain_id保存单独的输出?
预期产出:
我想为每个chain_id保存输出文件。
如果输入文件名是1AHI.PDB
,则在此文件中如果我们有四个链ID A,B,C,D,那么我想要输出文件:1AHIA.txt
,1AHIB.txt
,{ {1}},1AHIC.txt
。这将适用于每个输入文件。我的输入目录中有2000多个输入文件。
代码:
1AHID.txt
*在Ans *之后编辑
错误:
from glob import glob
in_loc = r'C:/Users/Documents/NAD/NAD/result/test_result_file/'
out_loc = r'C:/Users/Documents/NAD/NAD/result/test_result_file/Final_result/'
def test():
fnames = glob(in_loc+'*.pdb')
for each in fnames:
# This is the new generated file out of input file (.txt).
formatted_file = each.replace('pdb', 'txt')
formatted_file = formatted_file.replace(in_loc, out_loc)
# This is the input file
in_f = open(each, 'r')
# A new file to be opened.
out_f = open(formatted_file, "w")
# Filtering results from input file
try:
out_chain_list = filter_file(in_f)
for each_line in out_chain_list:
out_f.write(each_line)
# Closing all the opened files.
out_f.close()
in_f.close()
except Exception as e:
print('Exception for file: ', each, '\n', e)
out_f.close()
in_f.close()
def filter_file(in_f):
atom_ids = ['C4B', 'O4B', 'C1B', 'C2B', 'C3B']
chain_ids = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
order = [0, 1, 4, 3, 2]
previous_chain_id = None
chain_list = []
out_chain_list = []
for line in in_f:
if line.startswith('HETATM '):
line = line.replace('HETATM ', 'HETATM')
if line.startswith('HETATM'):
line_list = line.split()
chain_id = line_list[3][0]
atom_id = line_list[1]
if atom_id in atom_ids and chain_id in chain_ids:
if chain_id != previous_chain_id:
c_ls = []
if chain_list:
c_l = chain_list[-5:]
c_l = [c_l[i] for i in order]
for i in range(5):
c_ls += c_l[:4]
c_ls.append('\n')
c_l = c_l[-4:] + c_l[:1]
out_chain_list += c_ls
chain_list.append('\n')
chain_list.append(line)
previous_chain_id = chain_id
c_ls = []
if chain_list:
c_l = chain_list[-5:]
c_l = [c_l[i] for i in order]
for i in range(5):
c_ls += c_l[:4]
c_ls.append('\n')
c_l = c_l[-4:] + c_l[:1]
out_chain_list += c_ls
return out_chain_list
test()
答案 0 :(得分:0)
您当前的代码会为每个输入文件打开一个输出。但是你想要每个out_chain项的输出文件,每个输入文件中可以有多个out_chain项。因此,您需要在处理out_chain项目的内部循环中打开和关闭输出文件。这是你可以做到的一种方式:
for(int i = 0; i < v.size; i++){
float[] vertices = new float[v.size()]
vertices[i - 1] = v.get(i - 1);
vertices[i] = v.get(i);
}
sr.polygon(v);
答案 1 :(得分:0)
您可以修改filter_file
您可以收到以chain_id
为键的字典。如果您的out_chain_dict
格式为{'chain_id':out_chain_list}
,则可以轻松为每个chain_id
创建一个不同的文件:
def test():
fnames = glob(in_loc+'*.pdb')
for each in fnames:
# This is the path for new generated file.
path_file = each.replace(in_loc, out_loc)
# This is the input file and iltering results from input file
with open(each, 'r') as in_f:
try:
out_chain_dict = filter_file(in_f)
except Exception as e:
print('Exception for file: ', each, '\n', e)
continue
for (chain_id, out_chain_list) in out_chain_dict.items():
# This is the new generated file out of input file (.txt).
formatted_file = path_file.replace('.pdb', chain_id + '.txt')
# A new file to be opened.
with open(formatted_file, "w") as out_f:
for each_line in out_chain_list:
out_f.write(each_line)
filter_file
的编辑:
def filter_file(in_f):
atom_ids = ['C4B', 'O4B', 'C1B', 'C2B', 'C3B']
chain_ids = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
order = [0, 1, 4, 3, 2]
previous_chain_id = None
chain_list = []
out_chain_dict = {} # Change out_chain_list to dict
for line in in_f:
if line.startswith('HETATM '):
line = line.replace('HETATM ', 'HETATM')
if line.startswith('HETATM'):
line_list = line.split()
chain_id = line_list[3][0]
atom_id = line_list[1]
if atom_id in atom_ids and chain_id in chain_ids:
if chain_id != previous_chain_id:
c_ls = []
if chain_list:
c_l = chain_list[-5:]
c_l = [c_l[i] for i in order]
for i in range(5):
c_ls += c_l[:4]
c_ls.append('\n')
c_l = c_l[-4:] + c_l[:1]
try: # Here add c_ls to an existing key chain_id
out_chain_dict[chain_id] += c_ls #
except KeyError: # or create new chain_id key
out_chain_dict[chain_id] = c_ls # if it appears at the first time
chain_list.append('\n')
chain_list.append(line)
previous_chain_id = chain_id
c_ls = []
if chain_list:
c_l = chain_list[-5:]
c_l = [c_l[i] for i in order]
for i in range(5):
c_ls += c_l[:4]
c_ls.append('\n')
c_l = c_l[-4:] + c_l[:1]
# I guess here we add the last chain_id which corresponds to `chain_id` key
try:
out_chain_dict[chain_id] += c_ls
except KeyError:
out_chain_dict[chain_id] = c_ls
return out_chain_dict