我正在尝试在此处复制 Tavanaei 制作的代码: https://github.com/tavanaei/Cancer-Suppressor-Gene-Deep-Learning 并用作 CNN 来比较蛋白质模型。
我自己完成了第一部分,一个将 PDB(Protein Data Bank) 文件转换为 CSV 文件的 python 代码,格式如下:
charged,Polar,Hydrophobic,Hydrophobic,Moderate,Hydrophillic,polar,Aromatic,Aliphatic,Acid,Basic,negative charge,Neutral,positive charge,Pka_NH2,P_ka_COOH,x,y
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,13,22
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,9,22
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029,4,22
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,5,23
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,5,23
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,2,25
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,5,27
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,3,28
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,6,29
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,6,30
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,0,32
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,2,30
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,0,27
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,1,26
0,1,0,0,1,0,0,0,0,0,1,0,0,1,0.085858586,0.009735744,3,22
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,7,19
0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,8,21
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,10,19
0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,11,21
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,21
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,17,24
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,20,26
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,23,25
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438,27,27
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,29,29
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,29,29
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,31,26
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,28,24
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,27,26
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747,30,27
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,31,23
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,28,22
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,28,22
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,26,20
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,26,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,22,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,17,18
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,12,21
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,12,21
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,10,23
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,7,24
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029,4,24
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,4,26
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,4,26
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,4,22
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,4,22
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,4,22
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,6,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,6,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,6,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,6,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,10,18
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,10,17
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,15
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,15
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,15
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,15
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,17,12
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,17,11
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747,19,6
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,19,0
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,17,1
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,17,1
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,19,4
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,19,4
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,15,6
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,16,9
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,13,9
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,13,9
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438,10,11
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747,10,11
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,8,13
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,8,14
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,5,16
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,6,18
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,7,24
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,9,23
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,9,23
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,13,27
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,16,28
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,14,32
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,14,32
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,16,34
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,16,34
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,16,34
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438,17,33
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,14,35
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,11,34
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,30
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,13,32
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,13,32
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,9,33
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,9,33
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,8,29
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,10,28
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,10,28
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,12,29
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,10,32
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,7,31
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,9,29
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,10,32
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,7,33
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,7,29
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,10,30
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,8,33
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,8,33
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,6,31
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,6,31
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,8,29
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,9,26
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,12,25
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,12,25
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,12,25
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,12,25
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,12,25
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,12,25
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,15,23
0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,15,24
0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,15,24
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,18,22
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,19,22
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,20,18
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,21,17
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747,25,17
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,28,19
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,28,19
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,28,19
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,28,19
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,28,21
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029,24,22
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029,24,22
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,23,25
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,23,25
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,23,25
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,23,24
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,25,27
0,1,0,0,1,0,0,0,0,0,1,0,0,1,0.085858586,0.009735744,24,29
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,21,29
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,21,29
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,22,29
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,25,31
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,25,32
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,25,32
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029,23,39
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,21,37
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,21,33
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,21,33
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,19,32
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,19,32
使用我编写的这段代码(由于这是我在 python 中的第一个代码,所以效率非常低,我以前用 Java 编码)
import math
import os
directory = print(os.getcwd())
cont = False
for filename in os.listdir(directory):
cont = False
print(filename)
if filename.endswith(".txt"):
name = filename[0:4]
cont = True
if cont:
with open(filename, "r") as work_data:
Central_Carbons = []
x_y_z = []
x_values = []
y_values = []
z_values = []
#takes all Central Carbons and their lines into Central_Carbons
for line in work_data:
if "ATOM " in line:
if "CA" in line:
Central_Carbons.append(line)
#finds minmax for all variables
x_min = 100000
x_max = 0
for number in range(len(Central_Carbons)):
x_values.append(int(round(float(Central_Carbons[number][31:38]))))
if x_values[number] > x_max:
x_max = x_values[number]
if x_values[number] < x_min:
x_min = x_values[number]
y_min = 100000
y_max = 0
for number in range(len(Central_Carbons)):
y_values.append(int(round(float(Central_Carbons[number][39:46]))))
if y_values[number] > y_max:
y_max = y_values[number]
if y_values[number] < y_min:
y_min = y_values[number]
z_min = 100000
z_max = 0
for number in range(len(Central_Carbons)):
z_values.append(int(round(float(Central_Carbons[number][47:54]))))
if z_values[number] > z_max:
z_max = z_values[number]
if z_values[number] < z_min:
z_min = z_values[number]
#Standardizes coordinates
for number in range(len(Central_Carbons)):
x_y_z.append([x_values[number]-(x_max+x_min)/2,y_values[number]-(y_max+y_min)/2,z_values[number]-(z_max+z_min)/2])
#Takes best theta value and Carbon, removes replicates
replaced = False
taken_degrees = []
passed_values = []
passed_Carbons = []
for number in range(len(Central_Carbons)):
if(x_y_z[number][0] != 0):
theta = round(math.degrees(math.atan(x_y_z[number][1]/x_y_z[number][0])))
else:
if (x_y_z[number][1] >= 0):
theta = 0
if (x_y_z[number][1] <= 0):
theta = 180
replaced = False
for deg in range(len(taken_degrees)):
if theta == taken_degrees[deg][0]:
replaced = True
current_range = (x_y_z[number][0])**2+(x_y_z[number][1])**2
previous_range = taken_degrees[deg][1]
if current_range > previous_range:
if taken_degrees[deg][0] == theta:
taken_degrees.pop(deg)
passed_Carbons.pop(deg)
taken_degrees.append([round(theta), (x_y_z[number][0]) ** 2 + (x_y_z[number][1]) ** 2])
passed_Carbons.append(Central_Carbons[number])
replaced = True
deg = len(taken_degrees)
print(taken_degrees)
if not replaced:
taken_degrees.append([round(theta), (x_y_z[number][0]) ** 2 + (x_y_z[number][1]) ** 2])
passed_Carbons.append(Central_Carbons[number])
print(passed_Carbons)
f = open(name + ".pdb_xy.txt", "w")
#!!! need change for gene annotations
f.write('charged,Polar,Hydrophobic,Hydrophobic,Moderate,Hydrophillic,polar,Aromatic,Aliphatic,Acid,Basic,negative charge,Neutral,positive charge,Pka_NH2,P_ka_COOH,x,y')
for numcarb in range(len(passed_Carbons)):
amino = passed_Carbons[numcarb][17:20]
x = int(round(float(passed_Carbons[numcarb][31:38])))-x_min
y = int(round(float(passed_Carbons[numcarb][39:46])))-y_min
if amino == ("LYS"):
f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1")
elif amino == ("ARG"):
f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567")
elif amino == ("ASP"):
f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395")
elif amino == ("GLU"):
f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388")
elif amino == ("GLN"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747")
elif amino == ("ASN"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438")
elif amino == ("HIS"):
f.write("\n0,1,0,0,1,0,0,0,0,0,1,0,0,1,0.085858586,0.009735744")
elif amino == ("SER"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029")
elif amino == ("THR"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106")
elif amino == ("TYR"):
f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209")
elif amino == ("CYS"):
f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0")
elif amino == ("MET"):
f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773")
elif amino == ("TRP"):
f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.297979798,0.093184979")
elif amino == ("ALA"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056")
elif amino == ("ILE"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517")
elif amino == ("LEU"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338")
elif amino == ("PHE"):
f.write("\n0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391")
elif amino == ("VAL"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594")
elif amino == ("PRO"):
f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976")
elif amino == ("GLY"):
f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697")
else:
print("error" + str(amino))
f.write(","+ str(x))
f.write(","+str(y))
# Takes best theta value and Carbon, removes replicates
replaced = False
taken_degrees = []
passed_values = []
passed_Carbons = []
for number in range(len(Central_Carbons)):
if (x_y_z[number][0] != 0):
theta = round(math.degrees(math.atan(x_y_z[number][2] / x_y_z[number][0])))
else:
if (x_y_z[number][2] >= 0):
theta = 0
if (x_y_z[number][2] <= 0):
theta = 180
replaced = False
for deg in range(len(taken_degrees)):
if theta == taken_degrees[deg][0]:
replaced = True
current_range = (x_y_z[number][0]) ** 2 + (x_y_z[number][2]) ** 2
previous_range = taken_degrees[deg][1]
if current_range > previous_range:
if taken_degrees[deg][0] == theta:
taken_degrees.pop(deg)
passed_Carbons.pop(deg)
taken_degrees.append([round(theta), (x_y_z[number][0]) ** 2 + (x_y_z[number][2]) ** 2])
passed_Carbons.append(Central_Carbons[number])
replaced = True
deg = len(taken_degrees)
print(taken_degrees)
if not replaced:
taken_degrees.append([round(theta), (x_y_z[number][0]) ** 2 + (x_y_z[number][2]) ** 2])
passed_Carbons.append(Central_Carbons[number])
print(passed_Carbons)
f = open(name + ".pdb_xz.txt", "w")
# !!! need change for gene annotations
f.write(
'charged,Polar,Hydrophobic,Hydrophobic,Moderate,Hydrophillic,polar,Aromatic,Aliphatic,Acid,Basic,negative charge,Neutral,positive charge,Pka_NH2,P_ka_COOH,x,z')
for numcarb in range(len(passed_Carbons)):
amino = passed_Carbons[numcarb][17:20]
x = int(round(float(passed_Carbons[numcarb][31:38]))) - x_min
z = int(round(float(passed_Carbons[numcarb][47:54]))) - z_min
if amino == ("LYS"):
f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1")
elif amino == ("ARG"):
f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567")
elif amino == ("ASP"):
f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395")
elif amino == ("GLU"):
f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388")
elif amino == ("GLN"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747")
elif amino == ("ASN"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438")
elif amino == ("HIS"):
f.write("\n0,1,0,0,1,0,0,0,0,0,1,0,0,1,0.085858586,0.009735744")
elif amino == ("SER"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029")
elif amino == ("THR"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106")
elif amino == ("TYR"):
f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209")
elif amino == ("CYS"):
f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0")
elif amino == ("MET"):
f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773")
elif amino == ("TRP"):
f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.297979798,0.093184979")
elif amino == ("ALA"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056")
elif amino == ("ILE"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517")
elif amino == ("LEU"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338")
elif amino == ("PHE"):
f.write("\n0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391")
elif amino == ("VAL"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594")
elif amino == ("PRO"):
f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976")
elif amino == ("GLY"):
f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697")
else:
print("error" + str(amino))
f.write("," + str(x))
f.write("," + str(z))
# Takes best theta value and Carbon, removes replicates
replaced = False
taken_degrees = []
passed_values = []
passed_Carbons = []
for number in range(len(Central_Carbons)):
if (x_y_z[number][1] != 0):
theta = round(math.degrees(math.atan(x_y_z[number][2] / x_y_z[number][1])))
else:
if (x_y_z[number][2] >= 0):
theta = 0
if (x_y_z[number][2] <= 0):
theta = 180
replaced = False
for deg in range(len(taken_degrees)):
if theta == taken_degrees[deg][1]:
replaced = True
current_range = (x_y_z[number][1]) ** 2 + (x_y_z[number][2]) ** 2
previous_range = taken_degrees[deg][1]
if current_range > previous_range:
if taken_degrees[deg][1] == theta:
taken_degrees.pop(deg)
passed_Carbons.pop(deg)
taken_degrees.append([round(theta), (x_y_z[number][1]) ** 2 + (x_y_z[number][2]) ** 2])
passed_Carbons.append(Central_Carbons[number])
replaced = True
deg = len(taken_degrees)
print(taken_degrees)
if not replaced:
taken_degrees.append([round(theta), (x_y_z[number][1]) ** 2 + (x_y_z[number][2]) ** 2])
passed_Carbons.append(Central_Carbons[number])
print(passed_Carbons)
f = open(name + ".pdb_yz.txt", "w")
# !!! need change for gene annotations
f.write(
'charged,Polar,Hydrophobic,Hydrophobic,Moderate,Hydrophillic,polar,Aromatic,Aliphatic,Acid,Basic,negative charge,Neutral,positive charge,Pka_NH2,P_ka_COOH,y,z')
for numcarb in range(len(passed_Carbons)):
amino = passed_Carbons[numcarb][17:20]
y = int(round(float(passed_Carbons[numcarb][39:46]))) - y_min
z = int(round(float(passed_Carbons[numcarb][47:54]))) - z_min
if amino == ("LYS"):
f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1")
elif amino == ("ARG"):
f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567")
elif amino == ("ASP"):
f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395")
elif amino == ("GLU"):
f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388")
elif amino == ("GLN"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747")
elif amino == ("ASN"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438")
elif amino == ("HIS"):
f.write("\n0,1,0,0,1,0,0,0,0,0,1,0,0,1,0.085858586,0.009735744")
elif amino == ("SER"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029")
elif amino == ("THR"):
f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106")
elif amino == ("TYR"):
f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209")
elif amino == ("CYS"):
f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0")
elif amino == ("MET"):
f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773")
elif amino == ("TRP"):
f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.297979798,0.093184979")
elif amino == ("ALA"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056")
elif amino == ("ILE"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517")
elif amino == ("LEU"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338")
elif amino == ("PHE"):
f.write("\n0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391")
elif amino == ("VAL"):
f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594")
elif amino == ("PRO"):
f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976")
elif amino == ("GLY"):
f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697")
else:
print("error" + str(amino))
f.write("," + str(y))
f.write("," + str(z))
现在我被这些 CSV 文件困住了,而 Tavanaei 上传到他的 github 中的 lua 代码是我无法复制的。我希望有人可以就我可以重新调整 CSV 文件的内容给我建议,例如不同类型的机器学习。 Tavanaei 的代码太复杂了,我无法阅读,因为它是在 lua 中编写的,并且代码的编写方式使得他使用的路径非常模糊。因此,我一直在考虑如何以更简单的方式重新编码 CNN 以应用于我的 CSV 文件。我的计划是比较来自阿尔茨海默氏症和帕金森氏症的蛋白质,以找出两者之间的相似之处。 如果您有任何建议,我将不胜感激。