我通过将类转换为数字来绘制分类数据(从另一个stackoverflow问题here中获取灵感)。是否有可能在散景中做类似的事情(像example这样的东西)? sns的主要优点是对数据帧的处理最少。在阅读上面的例子时,如果我创建一个函数,标记轴将是一个相当大的问题。我本来的目标是通过在一个轴上获取索引而在另一个轴上获取列来创建一个热图,如图。
我目前的代码:
import gzip, sys, getopt, shlex, subprocess, os
from pylatex import Document, Section, Subsection, Table, Math, TikZ, Axis, Plot, Figure, Package
from pylatex.utils import italic, escape_latex
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib import colors
import matplotlib.patches as mpatches
from pylatex import Document, Section, Figure, SubFigure, NoEscape
import os
import pandas as pd
import seaborn as sns
df = pd.read_csv('postq.csv')
df = df.groupby(['sample', 'tot.seq', 'module'])['status'].apply(', '.join).unstack().reset_index().rename_axis(None, axis=1)
mapping = {'PASS': 1, 'WARN': 2, 'FAIL': 3}
df = df.replace({'Basic Statistics': mapping, 'Per base sequence quality': mapping, 'Per tile sequence quality': mapping, 'Per sequence quality scores': mapping, 'Per base sequence content': mapping, 'Per sequence GC content': mapping, 'Per base N content': mapping, 'Sequence Length Distribution': mapping, 'Sequence Duplication Levels': mapping, 'Overrepresented sequences': mapping, 'Adapter Content': mapping,'Kmer Content': mapping})
df = df.drop('tot.seq', 1)
df = df.set_index('sample')
f, ax = plt.subplots()
hm = sns.heatmap(data = df, cmap="Pastel2", ax=ax, cbar=False)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.7, box.height])
legend_ax = f.add_axes([.7, .5, 1, .1])
legend_ax.axis('off')
colors = plt.cm.Pastel2(np.linspace(0, 1, len(mapping)))
patches = [mpatches.Patch(facecolor=c, edgecolor=c) for c in colors]
legend = legend_ax.legend(patches,
sorted(mapping),
handlelength=0.8, loc='lower left')
for t in legend.get_texts():
t.set_ha("left")
f.savefig('output.pdf', bbox_inches='tight')
用作输入的csv文件片段:
"sample","module","status","tot.seq","seq.length","pct.gc","pct.dup"
"ERR435936_cleaned_1","Basic Statistics","PASS","19823396","62",51,51.06
"ERR435936_cleaned_1","Per base sequence quality","PASS","19823396","62",51,51.06
"ERR435936_cleaned_1","Per tile sequence quality","PASS","19823396","62",51,51.06
"ERR435936_cleaned_1","Per sequence quality scores","PASS","19823396","62",51,51.06
"ERR435936_cleaned_1","Per base sequence content","PASS","19823396","62",51,51.06
"ERR435936_cleaned_1","Per sequence GC content","WARN","19823396","62",51,51.06
"ERR435936_cleaned_1","Per base N content","PASS","19823396","62",51,51.06
"ERR435936_cleaned_1","Sequence Length Distribution","PASS","19823396","62",51,51.06
"ERR435936_cleaned_1","Sequence Duplication Levels","FAIL","19823396","62",51,51.06
"ERR435936_cleaned_1","Overrepresented sequences","WARN","19823396","62",51,51.06
"ERR435936_cleaned_1","Adapter Content","PASS","19823396","62",51,51.06
"ERR435936_cleaned_1","Kmer Content","FAIL","19823396","62",51,51.06
"ERR435936_cleaned_2","Basic Statistics","PASS","19823396","62",51,43.33
"ERR435936_cleaned_2","Per base sequence quality","PASS","19823396","62",51,43.33
"ERR435936_cleaned_2","Per tile sequence quality","PASS","19823396","62",51,43.33
"ERR435936_cleaned_2","Per sequence quality scores","PASS","19823396","62",51,43.33
"ERR435936_cleaned_2","Per base sequence content","PASS","19823396","62",51,43.33
"ERR435936_cleaned_2","Per sequence GC content","FAIL","19823396","62",51,43.33
"ERR435936_cleaned_2","Per base N content","PASS","19823396","62",51,43.33
"ERR435936_cleaned_2","Sequence Length Distribution","PASS","19823396","62",51,43.33
"ERR435936_cleaned_2","Sequence Duplication Levels","WARN","19823396","62",51,43.33
"ERR435936_cleaned_2","Overrepresented sequences","FAIL","19823396","62",51,43.33
"ERR435936_cleaned_2","Adapter Content","PASS","19823396","62",51,43.33
"ERR435936_cleaned_2","Kmer Content","FAIL","19823396","62",51,43.33
"ERR435937_cleaned_1","Basic Statistics","PASS","23937659","62",50,48.53
"ERR435937_cleaned_1","Per base sequence quality","PASS","23937659","62",50,48.53
"ERR435937_cleaned_1","Per tile sequence quality","PASS","23937659","62",50,48.53
"ERR435937_cleaned_1","Per sequence quality scores","PASS","23937659","62",50,48.53
"ERR435937_cleaned_1","Per base sequence content","PASS","23937659","62",50,48.53
"ERR435937_cleaned_1","Per sequence GC content","WARN","23937659","62",50,48.53
"ERR435937_cleaned_1","Per base N content","PASS","23937659","62",50,48.53
"ERR435937_cleaned_1","Sequence Length Distribution","PASS","23937659","62",50,48.53
"ERR435937_cleaned_1","Sequence Duplication Levels","WARN","23937659","62",50,48.53
"ERR435937_cleaned_1","Overrepresented sequences","WARN","23937659","62",50,48.53
"ERR435937_cleaned_1","Adapter Content","PASS","23937659","62",50,48.53
"ERR435937_cleaned_1","Kmer Content","FAIL","23937659","62",50,48.53