蛋白质分析的深度学习用途

时间:2021-03-04 19:55:29

标签: csv github conv-neural-network

我正在尝试在此处复制 Tavanaei 制作的代码: https://github.com/tavanaei/Cancer-Suppressor-Gene-Deep-Learning 并用作 CNN 来比较蛋白质模型。

我自己完成了第一部分,一个将 PDB(Protein Data Bank) 文件转换为 CSV 文件的 python 代码,格式如下:

charged,Polar,Hydrophobic,Hydrophobic,Moderate,Hydrophillic,polar,Aromatic,Aliphatic,Acid,Basic,negative charge,Neutral,positive charge,Pka_NH2,P_ka_COOH,x,y
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,13,22
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,9,22
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029,4,22
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,5,23
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,5,23
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,2,25
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,5,27
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,3,28
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,6,29
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,6,30
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,0,32
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,2,30
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,0,27
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,1,26
0,1,0,0,1,0,0,0,0,0,1,0,0,1,0.085858586,0.009735744,3,22
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,7,19
0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,8,21
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,10,19
0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,11,21
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,21
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,17,24
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,20,26
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,23,25
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438,27,27
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,29,29
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,29,29
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,31,26
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,28,24
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,27,26
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747,30,27
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,31,23
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,28,22
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,28,22
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,26,20
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,26,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,22,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,17,18
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,12,21
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,12,21
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,10,23
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,7,24
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029,4,24
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,4,26
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,4,26
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,4,22
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,4,22
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,4,22
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,6,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,6,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,6,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,6,20
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,10,18
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,10,17
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,15
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,15
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,15
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,15
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,17,12
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,17,11
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747,19,6
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,19,0
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,17,1
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,17,1
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,19,4
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,19,4
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,15,6
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,16,9
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,13,9
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,13,9
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438,10,11
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747,10,11
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,8,13
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,8,14
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,5,16
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,6,18
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,7,24
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,9,23
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,9,23
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,13,27
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395,16,28
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,14,32
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,14,32
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,16,34
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,16,34
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,16,34
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438,17,33
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,14,35
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,11,34
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,13,30
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,13,32
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,13,32
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,9,33
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,9,33
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,8,29
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,10,28
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,10,28
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,12,29
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,10,32
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976,7,31
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338,9,29
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594,10,32
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,7,33
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,7,29
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,10,30
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,8,33
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,8,33
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,6,31
1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388,6,31
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567,8,29
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,9,26
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,12,25
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,12,25
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,12,25
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,12,25
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,12,25
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,12,25
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,15,23
0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,15,24
0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0,15,24
0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391,18,22
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056,19,22
0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209,20,18
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,21,17
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747,25,17
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,28,19
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,28,19
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,28,19
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,28,19
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,28,21
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029,24,22
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029,24,22
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,23,25
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,23,25
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,23,25
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,23,24
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,25,27
0,1,0,0,1,0,0,0,0,0,1,0,0,1,0.085858586,0.009735744,24,29
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,21,29
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106,21,29
0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773,22,29
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,25,31
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,25,32
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,25,32
0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029,23,39
1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1,21,37
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,21,33
0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697,21,33
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,19,32
0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517,19,32

使用我编写的这段代码(由于这是我在 python 中的第一个代码,所以效率非常低,我以前用 Java 编码)


import math
import os
directory = print(os.getcwd())
cont = False
for filename in os.listdir(directory):
    cont = False
    print(filename)
    if filename.endswith(".txt"):
        name = filename[0:4]
        cont = True
    if cont:
        with open(filename, "r") as work_data:
            Central_Carbons = []
            x_y_z = []
            x_values = []
            y_values = []
            z_values = []
            #takes all Central Carbons and their lines into Central_Carbons
            for line in work_data:
                if "ATOM    " in line:
                    if "CA" in line:
                        Central_Carbons.append(line)

            #finds minmax for all variables
            x_min = 100000
            x_max = 0
            for number in range(len(Central_Carbons)):
                x_values.append(int(round(float(Central_Carbons[number][31:38]))))
                if x_values[number] > x_max:
                    x_max = x_values[number]

                if x_values[number] < x_min:
                    x_min = x_values[number]

            y_min = 100000
            y_max = 0
            for number in range(len(Central_Carbons)):
                y_values.append(int(round(float(Central_Carbons[number][39:46]))))
                if y_values[number] > y_max:
                    y_max = y_values[number]

                if y_values[number] < y_min:
                    y_min = y_values[number]

            z_min = 100000
            z_max = 0


            for number in range(len(Central_Carbons)):
                z_values.append(int(round(float(Central_Carbons[number][47:54]))))
                if z_values[number] > z_max:
                    z_max = z_values[number]

                if z_values[number] < z_min:
                    z_min = z_values[number]

            #Standardizes coordinates
            for number in range(len(Central_Carbons)):
                x_y_z.append([x_values[number]-(x_max+x_min)/2,y_values[number]-(y_max+y_min)/2,z_values[number]-(z_max+z_min)/2])


            #Takes best theta value and Carbon, removes replicates
            replaced = False
            taken_degrees = []
            passed_values = []
            passed_Carbons = []
            for number in range(len(Central_Carbons)):
                if(x_y_z[number][0] != 0):
                    theta = round(math.degrees(math.atan(x_y_z[number][1]/x_y_z[number][0])))
                else:
                    if (x_y_z[number][1] >= 0):
                        theta = 0
                    if (x_y_z[number][1] <= 0):
                        theta = 180
                replaced = False
                for deg in range(len(taken_degrees)):
                    if theta == taken_degrees[deg][0]:
                        replaced = True
                        current_range = (x_y_z[number][0])**2+(x_y_z[number][1])**2
                        previous_range = taken_degrees[deg][1]
                        if current_range > previous_range:
                                if taken_degrees[deg][0] == theta:
                                    taken_degrees.pop(deg)
                                    passed_Carbons.pop(deg)
                        taken_degrees.append([round(theta), (x_y_z[number][0]) ** 2 + (x_y_z[number][1]) ** 2])
                        passed_Carbons.append(Central_Carbons[number])
                        replaced = True
                        deg = len(taken_degrees)
                print(taken_degrees)
                if not replaced:
                    taken_degrees.append([round(theta), (x_y_z[number][0]) ** 2 + (x_y_z[number][1]) ** 2])
                    passed_Carbons.append(Central_Carbons[number])
            print(passed_Carbons)

            f = open(name + ".pdb_xy.txt", "w")
            #!!! need change for gene annotations
            f.write('charged,Polar,Hydrophobic,Hydrophobic,Moderate,Hydrophillic,polar,Aromatic,Aliphatic,Acid,Basic,negative charge,Neutral,positive charge,Pka_NH2,P_ka_COOH,x,y')
            for numcarb in range(len(passed_Carbons)):
                amino = passed_Carbons[numcarb][17:20]
                x = int(round(float(passed_Carbons[numcarb][31:38])))-x_min
                y = int(round(float(passed_Carbons[numcarb][39:46])))-y_min
                if amino == ("LYS"):
                    f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1")
                elif amino == ("ARG"):
                    f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567")
                elif amino == ("ASP"):
                    f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395")
                elif amino == ("GLU"):
                    f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388")
                elif amino == ("GLN"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747")
                elif amino == ("ASN"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438")
                elif amino == ("HIS"):
                    f.write("\n0,1,0,0,1,0,0,0,0,0,1,0,0,1,0.085858586,0.009735744")
                elif amino == ("SER"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029")
                elif amino == ("THR"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106")
                elif amino == ("TYR"):
                    f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209")
                elif amino == ("CYS"):
                    f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0")
                elif amino == ("MET"):
                    f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773")
                elif amino == ("TRP"):
                    f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.297979798,0.093184979")
                elif amino == ("ALA"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056")
                elif amino == ("ILE"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517")
                elif amino == ("LEU"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338")
                elif amino == ("PHE"):
                    f.write("\n0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391")
                elif amino == ("VAL"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594")
                elif amino == ("PRO"):
                    f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976")
                elif amino == ("GLY"):
                    f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697")
                else:
                    print("error" + str(amino))

                f.write(","+ str(x))
                f.write(","+str(y))

            # Takes best theta value and Carbon, removes replicates
            replaced = False
            taken_degrees = []
            passed_values = []
            passed_Carbons = []
            for number in range(len(Central_Carbons)):
                if (x_y_z[number][0] != 0):
                    theta = round(math.degrees(math.atan(x_y_z[number][2] / x_y_z[number][0])))
                else:
                    if (x_y_z[number][2] >= 0):
                        theta = 0
                    if (x_y_z[number][2] <= 0):
                        theta = 180
                replaced = False
                for deg in range(len(taken_degrees)):
                    if theta == taken_degrees[deg][0]:
                        replaced = True
                        current_range = (x_y_z[number][0]) ** 2 + (x_y_z[number][2]) ** 2
                        previous_range = taken_degrees[deg][1]
                        if current_range > previous_range:
                            if taken_degrees[deg][0] == theta:
                                taken_degrees.pop(deg)
                                passed_Carbons.pop(deg)
                        taken_degrees.append([round(theta), (x_y_z[number][0]) ** 2 + (x_y_z[number][2]) ** 2])
                        passed_Carbons.append(Central_Carbons[number])
                        replaced = True
                        deg = len(taken_degrees)
                print(taken_degrees)
                if not replaced:
                    taken_degrees.append([round(theta), (x_y_z[number][0]) ** 2 + (x_y_z[number][2]) ** 2])
                    passed_Carbons.append(Central_Carbons[number])
            print(passed_Carbons)

            f = open(name + ".pdb_xz.txt", "w")
            # !!! need change for gene annotations
            f.write(
                'charged,Polar,Hydrophobic,Hydrophobic,Moderate,Hydrophillic,polar,Aromatic,Aliphatic,Acid,Basic,negative charge,Neutral,positive charge,Pka_NH2,P_ka_COOH,x,z')
            for numcarb in range(len(passed_Carbons)):
                amino = passed_Carbons[numcarb][17:20]
                x = int(round(float(passed_Carbons[numcarb][31:38]))) - x_min
                z = int(round(float(passed_Carbons[numcarb][47:54]))) - z_min
                if amino == ("LYS"):
                    f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1")
                elif amino == ("ARG"):
                    f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567")
                elif amino == ("ASP"):
                    f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395")
                elif amino == ("GLU"):
                    f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388")
                elif amino == ("GLN"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747")
                elif amino == ("ASN"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438")
                elif amino == ("HIS"):
                    f.write("\n0,1,0,0,1,0,0,0,0,0,1,0,0,1,0.085858586,0.009735744")
                elif amino == ("SER"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029")
                elif amino == ("THR"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106")
                elif amino == ("TYR"):
                    f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209")
                elif amino == ("CYS"):
                    f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0")
                elif amino == ("MET"):
                    f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773")
                elif amino == ("TRP"):
                    f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.297979798,0.093184979")
                elif amino == ("ALA"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056")
                elif amino == ("ILE"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517")
                elif amino == ("LEU"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338")
                elif amino == ("PHE"):
                    f.write("\n0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391")
                elif amino == ("VAL"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594")
                elif amino == ("PRO"):
                    f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976")
                elif amino == ("GLY"):
                    f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697")
                else:
                    print("error" + str(amino))

                f.write("," + str(x))
                f.write("," + str(z))

            # Takes best theta value and Carbon, removes replicates
            replaced = False
            taken_degrees = []
            passed_values = []
            passed_Carbons = []
            for number in range(len(Central_Carbons)):
                if (x_y_z[number][1] != 0):
                    theta = round(math.degrees(math.atan(x_y_z[number][2] / x_y_z[number][1])))
                else:
                    if (x_y_z[number][2] >= 0):
                        theta = 0
                    if (x_y_z[number][2] <= 0):
                        theta = 180
                replaced = False
                for deg in range(len(taken_degrees)):
                    if theta == taken_degrees[deg][1]:
                        replaced = True
                        current_range = (x_y_z[number][1]) ** 2 + (x_y_z[number][2]) ** 2
                        previous_range = taken_degrees[deg][1]
                        if current_range > previous_range:
                            if taken_degrees[deg][1] == theta:
                                taken_degrees.pop(deg)
                                passed_Carbons.pop(deg)
                        taken_degrees.append([round(theta), (x_y_z[number][1]) ** 2 + (x_y_z[number][2]) ** 2])
                        passed_Carbons.append(Central_Carbons[number])
                        replaced = True
                        deg = len(taken_degrees)
                print(taken_degrees)
                if not replaced:
                    taken_degrees.append([round(theta), (x_y_z[number][1]) ** 2 + (x_y_z[number][2]) ** 2])
                    passed_Carbons.append(Central_Carbons[number])
            print(passed_Carbons)

            f = open(name + ".pdb_yz.txt", "w")
            # !!! need change for gene annotations
            f.write(
                'charged,Polar,Hydrophobic,Hydrophobic,Moderate,Hydrophillic,polar,Aromatic,Aliphatic,Acid,Basic,negative charge,Neutral,positive charge,Pka_NH2,P_ka_COOH,y,z')
            for numcarb in range(len(passed_Carbons)):
                amino = passed_Carbons[numcarb][17:20]
                y = int(round(float(passed_Carbons[numcarb][39:46]))) - y_min
                z = int(round(float(passed_Carbons[numcarb][47:54]))) - z_min
                if amino == ("LYS"):
                    f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.747474747,1")
                elif amino == ("ARG"):
                    f.write("\n1,0,0,0,0,1,0,0,0,0,1,0,0,1,0.146464646,0.065368567")
                elif amino == ("ASP"):
                    f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.404040404,0.02364395")
                elif amino == ("GLU"):
                    f.write("\n1,0,0,0,0,1,0,0,0,1,0,1,0,0,0.439393939,0.066759388")
                elif amino == ("GLN"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.166666667,0.063977747")
                elif amino == ("ASN"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0.043115438")
                elif amino == ("HIS"):
                    f.write("\n0,1,0,0,1,0,0,0,0,0,1,0,0,1,0.085858586,0.009735744")
                elif amino == ("SER"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.176767677,0.069541029")
                elif amino == ("THR"):
                    f.write("\n0,1,0,0,0,1,1,0,0,0,0,0,1,0,0.161616162,0.061196106")
                elif amino == ("TYR"):
                    f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.156565657,0.068150209")
                elif amino == ("CYS"):
                    f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,1,0")
                elif amino == ("MET"):
                    f.write("\n0,1,0,0,1,0,1,0,0,0,0,0,1,0,0.207070707,0.079276773")
                elif amino == ("TRP"):
                    f.write("\n0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.297979798,0.093184979")
                elif amino == ("ALA"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.484848485,0.084840056")
                elif amino == ("ILE"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.54040404,0.089012517")
                elif amino == ("LEU"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.404040404,0.090403338")
                elif amino == ("PHE"):
                    f.write("\n0,0,1,1,0,0,0,1,0,0,0,0,1,0,0.222222222,0.121001391")
                elif amino == ("VAL"):
                    f.write("\n0,0,1,1,0,0,0,0,1,0,0,0,1,0,0.464646465,0.080667594")
                elif amino == ("PRO"):
                    f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.909090909,0.038942976")
                elif amino == ("GLY"):
                    f.write("\n0,0,1,1,0,0,0,0,0,0,0,0,1,0,0.404040404,0.087621697")
                else:
                    print("error" + str(amino))

                f.write("," + str(y))
                f.write("," + str(z))

现在我被这些 CSV 文件困住了,而 Tavanaei 上传到他的 github 中的 lua 代码是我无法复制的。我希望有人可以就我可以重新调整 CSV 文件的内容给我建议,例如不同类型的机器学习。 Tavanaei 的代码太复杂了,我无法阅读,因为它是在 lua 中编写的,并且代码的编写方式使得他使用的路径非常模糊。因此,我一直在考虑如何以更简单的方式重新编码 CNN 以应用于我的 CSV 文件。我的计划是比较来自阿尔茨海默氏症和帕金森氏症的蛋白质,以找出两者之间的相似之处。 如果您有任何建议,我将不胜感激。

0 个答案:

没有答案