
时间:2015-08-30 08:17:31

标签: python pandas matplotlib






from rdkit.Chem import AllChem as Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import PandasTools
from rdkit.Chem import Draw
import pybel
import pandas as pd
import matplotlib
import matplotlib.pyplot as p
import matplotlib.ticker as plticker
%matplotlib inline 

#commandline application to calculate properties
output = !/Applications/ChemAxon/MarvinBeans/bin/evaluate /Users/username/Desktop/SampleFiles/Fragments.sdf -g -e "field('IDNUMBER'); molString('smiles'); logp(); logd('7.4'); apka('1'); bpka('1'); atomCount(); mass(); acceptorcount(); donorcount(); topologicalPolarSurfaceArea(); rotatablebondcount(); refractivity(); ASAHydrophobic('7.4'); ASAPolar('7.4'); atomCount()-atomCount('1');aromaticAtomCount()/(atomCount()-atomCount('1'))"

[line.split(';') for line in output]

cols = ['ID', 'smiles', 'logP', 'logD', 'apKa', 'bpKa', 'atomCount', 'mass', 'HBA', 'HBD', 'TPSA', 'RBC', 'MR', 'ASAh', 'ASAp', 'HAC', 'FractionAromatic']
df = pd.DataFrame([line.split(';') for line in output], columns=cols)
df = df.convert_objects(convert_numeric=True)

#series of calculations using the calculated data to add several categorical numeric and text fields to dataframe.

myLogP = df['logP']
myLogD = df['logD']
myMass = df['mass']
myTPSA = df['TPSA']
myRBC = df['RBC']
myRBCmax = max(myRBC) +1
myHBA = df['HBA']
myHBAmax = max(myHBA) +1
myHBD = df['HBD']
myHBDmax = max(myHBD) +1
myHAC = df['HAC']
myHACmax= range(min(myHAC), max(myHAC) + 1)

myFraromatic = df['FractionAromatic']

fig, axes = plt.subplots(nrows=5, ncols=4)
ax0, ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9, ax10, ax11, ax12, ax13, ax14, ax15, ax17, ax18, ax19 = axes.flat
axis_font = {'fontname':'Arial', 'size':'14'}
title_font = {'fontname':'Arial', 'size':'14', 'color' :'blue'}

loc = plticker.MultipleLocator(base=1.0)

ax0.hist(myLogP, histtype='bar')
ax0.set_title('LogP', title_font)
ax0.set_xlabel('Range of LogP', axis_font)

ax1.hist(myLogD, histtype='bar')
ax1.set_title('LogD', title_font)
ax1.set_xlabel('Range of LogD', axis_font)
ax1.set_ylabel('Count', axis_font)

ax2.hist(myMass, histtype='bar', color = 'red')
ax2.set_title('Mass', title_font)
ax2.set_xlabel('Range of MWt', axis_font)
ax2.set_ylabel('Count', axis_font)

ax3.hist(myTPSA,  histtype='bar', color = 'yellow')
ax3.set_title('TPSA', title_font)
ax3.set_xlabel('Range of TPSA', axis_font)
ax3.set_ylabel('Count', axis_font)


#ax8 'AZBN' is a categorical text field 

ax9.hist(myFraromatic, bins= 10, histtype='bar')
ax9.set_title('Aromatic', title_font)
ax9.set_xlabel('Fraction of Aromatic atoms', axis_font)
ax9.set_ylabel('Count', axis_font)

#further categorical plots

fig.set_size_inches(20, 15)



1 个答案:

答案 0 :(得分:0)

你应该发布你尝试过的代码和一些示例数据。否则,不可能知道最好的方法。但是,我认为您可能希望尝试使用matplotlib API而不是pandas的以下方法,并让您更好地控制每个图中的内容:

from matplotlib import pyplot as plt
fig, axes = plt.subplots(5, 4)   # axes is a numpy array of pyplot Axes
axes = iter(axes.ravel())   # set up an iterator for the set of axes. 

categoricals = df.columns[df.dtypes == 'category']
numeric = df.columns[df.dtypes != 'category']

for col in categoricals: 
    ax = df[col].value_counts().plot(kind='bar', ax=axes.next())
    # do other stuff with ax, formatting etc.  
    # the plot method returns the axis used for the plot for further manipulation

for col in numeric: 
     ax = df[col].plot(ax=axes.next())
     # etc. 
