我想创建指定数据框列的条形图,并按另一列(此处<5)的标准进行划分。它的工作方式如下所示,但肯定还有其他类似数据框的方式吗?像df.makeCoolBarPlots()之类的东西?
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
df = pd.DataFrame({'score':[1,6,2,3,1,9,5], 'age':[10,16,33,12,8,24,18], 'IQ':[89,120,88,94,103,110,102]})
df_pass = df[df['score'] >= 5]
df_fail = df[df['score'] < 5]
fieldsOfInterest = ['age', 'IQ']
ind = np.arange(2)
for fieldOfInterest in fieldsOfInterest:
plt.figure()
plt.bar(ind, [df_pass[fieldOfInterest].mean(), df_fail[fieldOfInterest].mean()], yerr=[df_pass[fieldOfInterest].std(), df_fail[fieldOfInterest].std()])
stat, p = stats.ttest_ind(df_pass[fieldOfInterest], df_fail[fieldOfInterest])
plt.title("p={:0.3f}".format(p))
plt.xticks(ind, ('pass', 'fail'))
plt.ylabel(fieldOfInterest)
plt.show()
答案 0 :(得分:0)
# First make your conditions using np.select
df["group"] = np.select([df["score"].ge(5), df["score"].lt(5)], ["pass", "fail"])
# Create a groupby
gb = df.groupby('group')
for col in ["age", "IQ"]:
# Get p value, mean, and std for each column
_, p = stats.ttest_ind(*[g[col] for n, g in gb])
means = gb[col].mean()
errors = gb[col].std()
# Plot using pandas.plot
fig, ax = plt.subplots()
means.plot.bar(yerr=errors, ax=ax)
ax.set(ylabel=col, title="p={:.3f}".format(p))
结果: