从我的previous question开始,遵循以下代码
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
import seaborn as sns
length = 1000
np.random.seed(100)
dictOne = {
"A": np.random.randn(length),
"B": np.random.randn(length),
"C": np.random.randn(length)
}
df2 = pd.DataFrame(dictOne)
column = 'B'
fig, ax = plt.subplots()
df2[df2[column] > -999].hist(column, alpha = 0.5, density = False, ax = ax, bins = 100)
param = stats.norm.fit(df2[column].dropna()) # Fit a normal distribution to the data
x = np.linspace(*df2[column].agg([min, max]), 1000) # x-values
pdf_fitted = (x, stats.norm.pdf(df2[column], *param))
plt.plot(x, stats.norm.pdf(x, *param), color = 'r')
产生
当density = False
和
density = True
时。
我的问题是,如何将第一张图中的直方图与第二张图中的曲线相结合,以显示随机数每个值的真实计数?我需要使用两种不同的y轴,一种用于计数,另一种用于pdf吗?
答案 0 :(得分:0)
您可以将密度乘以直方图的总和乘以二进制宽度;这将为您提供绝对值。
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np; np.random.seed(100)
length = 1000
df2 = pd.DataFrame(np.random.randn(length, 3), columns=list("ABC"))
column = 'B'
df = df2[df2[column] > -999]
fig, ax = plt.subplots()
h, edges, _ = ax.hist(df[column], alpha = 0.5, density = False, bins = 100)
param = stats.norm.fit(df[column].dropna()) # Fit a normal distribution to the data
x = np.linspace(*df[column].agg([min, max]), 1000) # x-values
binwidth = np.diff(edges).mean()
ax.plot(x, stats.norm.pdf(x, *param)*h.sum()*binwidth, color = 'r')
plt.show()