pdb文件中两个原子之间的距离计算

时间:2014-02-04 00:04:48

标签: awk distance

我有一个pdb文件,如下所示。

ATOM     76  N   ARG     6      -6.350  -3.503   0.748 -0.3479  1.5500
ATOM     77  H   ARG     6      -6.002  -3.513   1.699  0.2747  1.3000
ATOM     78  CA  ARG     6      -7.756  -3.896   0.534 -0.2637  1.7000
ATOM     79  HA  ARG     6      -8.024  -4.587   1.334  0.1560  1.3000
ATOM     80  CB  ARG     6      -8.589  -2.608   0.737 -0.0007  1.7000
ATOM     81  HB2 ARG     6      -8.208  -2.089   1.620  0.0327  1.3000
ATOM     82  HB3 ARG     6      -8.439  -1.940  -0.114  0.0327  1.3000
ATOM     83  CG  ARG     6     -10.086  -2.834   0.970  0.0390  1.7000
ATOM     84  HG2 ARG     6     -10.274  -3.897   1.124  0.0285  1.3000
ATOM     85  HG3 ARG     6     -10.378  -2.310   1.876  0.0285  1.3000
ATOM     86  CD  ARG     6     -10.953  -2.295  -0.177  0.0486  1.7000
ATOM     87  HD2 ARG     6     -10.985  -1.204  -0.114  0.0687  1.3000
ATOM     88  HD3 ARG     6     -10.524  -2.559  -1.143  0.0687  1.3000
ATOM     89  NE  ARG     6     -12.315  -2.833  -0.075 -0.5295  1.5500
ATOM     90  HE  ARG     6     -13.010  -2.260   0.390  0.3456  1.3000
ATOM     91  CZ  ARG     6     -12.694  -4.065  -0.346  0.8076  1.7000
ATOM     92  NH1 ARG     6     -13.877  -4.453   0.009 -0.8627  1.5500
ATOM     93 1HH1 ARG     6     -14.040  -5.443   0.120  0.4478  1.3000
ATOM     94 2HH1 ARG     6     -14.432  -3.807   0.560  0.4478  1.3000
ATOM     95  NH2 ARG     6     -11.920  -4.944  -0.916 -0.8627  1.5500
ATOM     96 1HH2 ARG     6     -12.283  -5.850  -1.134  0.4478  1.3000
ATOM     97 2HH2 ARG     6     -10.979  -4.672  -1.190  0.4478  1.3000
ATOM     98  C   ARG     6      -8.062  -4.673  -0.777  0.7341  1.7000
ATOM     99  O   ARG     6      -9.133  -4.539  -1.331 -0.5894  1.5000
......
ATOM    172  N   S1P    12     -14.038  -6.148   4.609 -0.4157  1.5500
ATOM    173  H   S1P    12     -13.159  -6.030   4.131  0.2719  1.3000
ATOM    174  CA  S1P    12     -14.531  -4.998   5.067  1.8687  1.7000
ATOM    175  HA  S1P    12     -15.438  -5.137   5.650 -0.3423  1.3000
ATOM    176  CB  S1P    12     -13.509  -4.203   5.988 -0.2136  1.7000
ATOM    177  2HB S1P    12     -14.138  -4.223   6.879  0.0612  1.3000
ATOM    178  3HB S1P    12     -12.848  -5.033   6.243  0.0612  1.3000
ATOM    179  OG  S1P    12     -12.482  -3.545   5.746 -0.5829  1.5000
ATOM    180  P   S1P    12     -11.815  -3.403   4.409  1.4551  1.8500
ATOM    181  O3P S1P    12     -10.346  -3.231   4.357 -0.8194  1.5000
ATOM    182  O1P S1P    12     -11.897  -4.712   3.815 -0.9362  1.5000
ATOM    183  H1P S1P    12     -10.980  -4.847   3.540  0.5898  0.8000
ATOM    184  O2P S1P    12     -12.533  -2.620   3.418 -0.7440  1.5000
ATOM    185  H2P S1P    12     -13.423  -2.977   3.402  0.5973  0.8000
ATOM    186  C   S1P    12     -14.940  -4.030   3.844 -0.6568  1.7000
ATOM    187  O   S1P    12     -14.809  -4.386   2.691 -0.1942  1.5000

我想做的是想找到文件中两个原子之间的距离。

我感兴趣的两个原子是

ATOM     91  CZ  ARG     6     -12.694  -4.065  -0.346  0.8076  1.7000

ATOM    180  P   S1P    12     -11.815  -3.403   4.409  1.4551  1.8500

例如,行中的x y z坐标为-11.815 -3.403 4.409

我环顾四周,但只有我能找到的脚本是计算所有原子的距离或两个不同文件之间的距离。

先谢谢你。

2 个答案:

答案 0 :(得分:2)

你可以从这里开始:

 awk '$2=="91"{x1=$6;y1=$7;z1=$8}                                 # get the ATOM 1
      $2=="180" {x2=$6;y2=$7;z2=$8}                               # get the ATOM 2
      END{print sqrt((x1-x2)^2 + (y1-y2)^2 + (z1-z2)^2)}' file    # calculate the distance.

4.88067

不确定为什么^在GAWK 3.1.5中不受支持,这个怎么样?

 awk '$2=="91"{x1=$6;y1=$7;z1=$8}                                 # get the ATOM 1
      $2=="180" {x2=$6;y2=$7;z2=$8}                               # get the ATOM 2
      END{print sqrt((x1-x2)*(x1-x2) + (y1-y2)*(y1-y2) + (z1-z2)*(z1-z2))}' file    # calculate the distance.

答案 1 :(得分:0)

这是一个包含建议的awk命令的完整脚本。将pdb文件更改为您的下面,并将其放在与pdb文件相同的目录中。

#!/usr/bin/env bash

echo " "
echo "Use one letter abbreviations. Case doesn't matter." 
echo "Example: A 17 CA or n 162 cg"

echo " - - - - - - - - - - - - - - - - - -"
#------------First Selection------------

read -e -p "What first atom? " sel1
echo " "

# echo $sel1
sel1caps=${sel1^^}
# echo "sel1caps="$sel1caps

arr1=($sel1caps)
# echo "arr1[0]= "${arr1[0]}
# echo "arr1[1]= "${arr1[1]}
# echo "arr1[2]= "${arr1[2]}

#To convert one to three letters

if [ ${arr1[0]} = A ] ; then
    SF1=ALA
elif [ ${arr1[0]} = H ] ; then
    SF1=HIS
elif [ ${arr1[0]} = R ] ; then
    SF1=ARG
elif [ ${arr1[0]} = K ] ; then
    SF1=LYS
elif [ ${arr1[0]} = I ] ; then
    SF1=ILE
elif [ ${arr1[0]} = F ] ; then
    SF1=PHE 
elif [ ${arr1[0]} = L ] ; then
    SF1=LEU
elif [ ${arr1[0]} = W ] ; then
    SF1=TRP
elif [ ${arr1[0]} = M ] ; then
    SF1=MET
elif [ ${arr1[0]} = P ] ; then
    SF1=PRO 
elif [ ${arr1[0]} = C ] ; then
    SF1=CYS 
elif [ ${arr1[0]} = N ] ; then
    SF1=ASN
elif [ ${arr1[0]} = V ] ; then
    SF1=VAL
elif [ ${arr1[0]} = G ] ; then
    SF1=GLY 
elif [ ${arr1[0]} = S ] ; then
    SF1=SER 
elif [ ${arr1[0]} = Q ] ; then
    SF1=GLN 
elif [ ${arr1[0]} = Y ] ; then
    SF1=TYR 
elif [ ${arr1[0]} = D ] ; then
    SF1=ASP
elif [ ${arr1[0]} = E ] ; then
    SF1=GLU 
elif [ ${arr1[0]} = T ] ; then
    SF1=THR 
else
    echo "use one letter codes"
    echo "exiting"
    exit
fi

# echo "SF1 ="$SF1

#If there is nothing printing for line 1, check the expression for your pdb file. The spaces may need adjustment at the end.
line1=$(grep -E "${arr1[2]} *?${SF1}(.*?) ${arr1[1]}     " 1A23.pdb)
# echo $line1

ar_l1=($line1)
# echo "ar_l1[1]="${ar_l1[1]}

echo " - - - - - - - - - - - - - - - - - -"
#------------Second Selection------------

read -e -p "What second atom? " sel2
echo " "

# echo $sel2

sel2caps=${sel2^^}
# echo "sel2caps="$sel2caps

arr2=($sel2caps)
# echo "arr2[0]= "${arr2[0]}
# echo "arr2[1]= "${arr2[1]}
# echo "arr2[2]= "${arr2[2]}

#To convert one to three letters

if [ ${arr2[0]} = A ] ; then
    SF2=ALA
elif [ ${arr2[0]} = H ] ; then
    SF2=HIS
elif [ ${arr2[0]} = R ] ; then
    SF2=ARG
elif [ ${arr2[0]} = K ] ; then
    SF2=LYS
elif [ ${arr2[0]} = I ] ; then
    SF2=ILE
elif [ ${arr2[0]} = F ] ; then
    SF2=PHE 
elif [ ${arr2[0]} = L ] ; then
    SF2=LEU
elif [ ${arr2[0]} = W ] ; then
    SF2=TRP
elif [ ${arr2[0]} = M ] ; then
    SF2=MET
elif [ ${arr2[0]} = P ] ; then
    SF2=PRO 
elif [ ${arr2[0]} = C ] ; then
    SF2=CYS 
elif [ ${arr2[0]} = N ] ; then
    SF2=ASN
elif [ ${arr2[0]} = V ] ; then
    SF2=VAL
elif [ ${arr2[0]} = G ] ; then
    SF2=GLY 
elif [ ${arr2[0]} = S ] ; then
    SF2=SER 
elif [ ${arr2[0]} = Q ] ; then
    SF2=GLN 
elif [ ${arr2[0]} = Y ] ; then
    SF2=TYR 
elif [ ${arr2[0]} = D ] ; then
    SF2=ASP
elif [ ${arr2[0]} = E ] ; then
    SF2=GLU 
elif [ ${arr2[0]} = T ] ; then
    SF2=THR 
else
    echo "use one letter codes"
    echo "exiting"
    exit
fi

# echo "SF2 ="$SF2


line2=$(grep -E "${arr2[2]} *?${SF2}(.*?) ${arr2[1]}     " 1A23.pdb)
# echo $line2

ar_l2=($line2)
# echo "ar_l2[1]="${ar_l2[1]}
# echo "ar_l2[1]="${ar_l2[1]}

atom1=${ar_l1[1]}
atom2=${ar_l2[1]}
echo " "
echo "==========================="
# 6, 7, 8 are column numbers in the pdb file. 
# If there are multiple molecules it should be 7, 8, 9.
awk '$2=='$atom1'{x1=$6;y1=$7;z1=$8}                                 # get the ATOM 1
     $2=='$atom2'{x2=$6;y2=$7;z2=$8}                               # get the ATOM 2
     END{print sqrt((x1-x2)*(x1-x2) + (y1-y2)*(y1-y2) + (z1-z2)*(z1-z2))}' 1A23.pdb    # calculate the distance.

echo "Angstroms"
echo "==========================="