Gpped Barplot Python

时间:2017-09-06 12:30:08

标签: python pandas seaborn

我需要帮助,我尝试制作分组的条形图。 但它不起作用。 起初我读了所有标签文件夹而不是我想要的,所以得到x轴的2列('特殊名称'和'ab')。 在'特殊名称'中只有3种不同的名称,但它们都有4或7'ab'属于它们。

Example tab file
Names  names_id  first second special_name   ab
lili      1         a      b    Tm           a
Katrin    2         c      d    Tm           u
Paul      3         e      f    ui           f
bob       4         g      h    ui           b
tina      5         i      j    ac           a

y轴应该告诉我每个“ab”的计数频率。

%matplotlib inline
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import sys
import os
import glob
import seaborn as sns
import pandas as pd
import ggplot
from ggplot import aes

sns.set(style= "whitegrid", palette="pastel", color_codes=True )

tab_folder = 'myData'
out_folder ='myData/plots'
tab = glob.glob('%s/R*.tab'%(tab_folder))

#is reading all my data
for i, tab_file in enumerate(tab):
    folder,file_name=os.path.split(tab_file)
    s_id=file_name[:-4].replace('DD','')
    df=pd.DataFrame.from_csv(tab_file, sep='\t')
    df_2 = df.groupby(['special_name','ab']).size().reset_index(name='count')

    #Here I wanted to create grouped barplots
    ggplot(df_2, aes(x=('special_name'), y=('count'), fill=('ab'))) + geom_bar(stat='identity',position='dodge')

    ax.set_title(s_id)
    ax.set_xlabel('')
    ax.set_ylabel('')

png_t = '%s/%s.b.png'%(out_folder,s_id)
plt.savefig(png_t, dpi = 500)

我的代码不再出错了。但我只得到空格子......出了什么问题?

当我尝试ggplot.ggplot()时:

AttributeError:                                 Traceback (most recent call last)
<ipython-input-33-03dc98f5428a> in <module>()
    100 
    101     #barplots 
--> 102     ggplot.ggplot(df_2, aes(x=('special_name'), y=('count'), fill=('ab'))) + geom_bar(stat='identity',position='dodge')

AttributeError:type object'ggplot'没有属性'ggplot'

1 个答案:

答案 0 :(得分:0)

我发现了我的错误! 我不需要ggplot。我只需要修改我的df_2

%matplotlib inline
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import sys
import os
import glob
import seaborn as sns
import pandas as pd
import ggplot
from ggplot import aes

sns.set(style= "whitegrid", palette="pastel", color_codes=True )

tab_folder = 'myData'
out_folder ='myData/plots'
tab = glob.glob('%s/R*.tab'%(tab_folder))

#is reading all my data
for i, tab_file in enumerate(tab):
    folder,file_name=os.path.split(tab_file)
    s_id=file_name[:-4].replace('DD','')
    df=pd.DataFrame.from_csv(tab_file, sep='\t')


    #Here I create grouped barplots
    df_2 = df.groupby(['special_name','ab']).size().reset_index(name='count')
    table = pd.pivot_table(df_2, index='special_name', columns='ab', values='count')
    table.plot(kind='barh', color= ['r', 'g', 'b', 'k', 'm', 'y'], ax = ax)

    ax.set_title(s_id)
    ax.set_xlabel('')
    ax.set_ylabel('')

png_t = '%s/%s.b.png'%(out_folder,s_id)
plt.savefig(png_t, dpi = 500)