我有一个带有几千个结构的PDB文件,我想将前十个结构的alpha碳的位置坐标保存为numpy数组。我可以使用下面的代码将具有单个结构的PDB文件解析为数组,但不能将其扩展到具有许多结构的文件。
from Bio.PDB.PDBParser import PDBParser
import numpy
pdb_filename ='./1fqy.pdb'
parser = PDBParser(PERMISSIVE=1)
structure = parser.get_structure("1fqy", pdb_filename)
model = structure[0]
chain = model["A"]
S1coor = numpy.zeros(shape=(226, 3))
i = 0
for residue1 in chain:
resnum = residue1.get_id()[1]
atom1 = residue1['CA']
S1coor[i] = atom1.get_coord()
i = i + 1
答案 0 :(得分:1)
from Bio.PDB.PDBParser import PDBParser
import numpy , tempfile ,os , re
models_re = re.compile("MODEL")
pdb_re = re.compile(r"MODEL(.*?)ENDMDL", re.DOTALL)
def PDB_parse(pdb_file_handle):
model_pos = []
models = []
k = open(pdb_file_handle,"r").read()
for i in models_re.finditer(k):
model_pos.append(i.start())
for i in model_pos:
models.append(pdb_re.search(k,i).group())
return models
array_all_structure = []
for i in PDB_parse(pdb_file_handle):
temp_file = tempfile.NamedTemproaryFile(delete = False)
temp_file.write(i)
temp_file.close
structure = parser.get_structure("1fqy", temp_file.name)
os.remove(temp_file.name)
model = structure[0]
chain = model["A"]
S1coor = numpy.zeros(shape=(226, 3))
i = 0
for residue1 in chain:
resnum = residue1.get_id()[1]
atom1 = residue1['CA']
S1coor[i] = atom1.get_coord()
i = i + 1
array_all_structure.append(i)
可能这种链接器会有所帮助,首先隔离pdb文件,然后相应地读取它们。