我正在尝试从熊猫数据框中获取以下图表。
我不确定该如何将seaborn和panda结合起来。
这是我要使用的数据框:
import pandas as pd
data = pd.DataFrame({'a': np.random.randn(1000) + 1,
'b': np.random.randn(1000),
'c': np.random.rand(1000) + 10},
columns=['a', 'b', 'c'])
data.a[data.a.sample(100).index] = np.NaN
data.b[data.b.sample(800).index] = np.NaN
请注意,由于数据点和分布的数量明显不同且分布具有不同的“ y尺度”,因此需要对频率进行标准化(直方图的高度)。
data.plot.hist();
这是seaborn的代码,生成了我一开始使用的图形。
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(150)
g = np.tile(list("ABC"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=5, height=1, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .3, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.0025)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
答案 0 :(得分:2)
这里是创建kde图(“ joyplot”)网格的功能,每个数据帧列具有一个图。
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
def joyplot_from_dataframe(data, cmap=None):
mi, ma = np.nanmin(data.values), np.nanmax(data.values)
minx = mi - (ma-mi)/5
maxx = ma + (ma-mi)/5
x = np.linspace(minx,maxx, 1000)
n = len(data.columns)
if not cmap:
cmap = plt.cm.get_cmap("Blues")
colors = cmap(np.linspace(.2,1,n))
fig, axes = plt.subplots(nrows = n, sharex=True)
for c, ax, color in zip(data.columns, axes, colors):
y = data[c].values
y = y[~np.isnan(y)]
kde = gaussian_kde(y)
ax.fill_between(x, kde(x), color=color)
ax.yaxis.set_visible(False)
for spine in ["left", "right", "top"]:
ax.spines[spine].set_visible(False)
ax.spines["bottom"].set_linewidth(2)
ax.spines["bottom"].set_color(color)
ax.margins(y=0)
ax.tick_params(bottom=False)
return fig, axes
用作
import pandas as pd
data = pd.DataFrame({'a': np.random.randn(1000) + 1,
'b': np.random.randn(1000),
'c': np.random.rand(1000) + 10},
columns=['a', 'b', 'c'])
data.a[data.a.sample(100).index] = np.NaN
data.b[data.b.sample(800).index] = np.NaN
joyplot_from_dataframe(data)
plt.show()