我想做对数正态分布。我使用的是对数x轴。但是,我无法正确地缩放概率密度函数。我在论坛上发现了一篇帖子,找到了我在那里找不到的完整答案(Scaling and fitting to a log-normal distribution using a logarithmic axis in python)。随着参数的变化" s"图形具有不同的形状。是否可以获得明确正确的分布形状?谢谢你的帮助。
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd
from math import log
# Import data
data = pd.read_excel('data2018.xlsx')
# Create bins (log)
classes = 16
s=10
bins_log10 = np.logspace(np.log10(data['diff'].min()), np.log10(data['diff'].max()), classes + 1)
bins_log10_s = np.logspace(np.log10(data['diff'].min()), np.log10(data['diff'].max()), (classes + 1) * s)
# Plot histogram
plt.style.use('ggplot')
counts, bins, _ = plt.hist(data['diff'], bins=bins_log10, edgecolor='black', linewidth=1,
label="Histogram")
# Calculation of bin centers and multiplied them
restored = [[d] * int(counts[n]) for n, d in enumerate((bins[1:] + bins[:-1]) / 2)]
# Flatten the result
restored = [item for sublist in restored for item in sublist]
# Calculate of fitting parameters. shape = sigma, log(scale) = mu
shape, loc, scale = stats.lognorm.fit(restored, floc=0)
# Calculate centers and length log_bins
cen_log_bins = (bins_log10[1:] + bins_log10[:-1]) / 2
len_log_bins = (bins_log10[1:] - bins_log10[:-1])
samples_fit_log_cntr = stats.lognorm.pdf(cen_log_bins, shape, loc=loc, scale=scale)
plt.plot(cen_log_bins,samples_fit_log_cntr * len_log_bins * counts.sum(), ls='dashed',label='PDF with centers', linewidth=2)
# Smooth pdf
bins_log10_cntr_s = (bins_log10_s[1:] + bins_log10_s[:-1]) / 2
samples_fit_log_cntr = stats.lognorm.pdf(bins_log10_cntr_s, shape, loc=loc, scale=scale)
bins_log_cntr = bins_log10_s[1:] - bins_log10_s[:-1]
plt.plot(bins_log10_cntr_s, samples_fit_log_cntr * bins_log_cntr * counts.sum() * s,color='blue',label='Smooth PDF with centers', linewidth=2)
plt.title("Fit results: $\mu = %.2f, \sigma$ = %.2f" % (log(scale), shape))
plt.xscale('log')
plt.legend()
plt.tight_layout()
plt.show()