在具有共享x轴的子图中拟合对数正态分布的直方图

时间:2016-07-21 13:50:13

标签: python matplotlib gaussian data-fitting

我有三个不同长度的数组,比如standxstandystandz,它们只包含正值。 我想以与this plot类似的方式绘制直方图分布,即共享x轴(参见下面的图,在EDIT之后)。 但我希望x轴的比例为log,三个图中的分箱大小相同(后一种情况暂时可以放宽)。

然后我想在log空间中使用高斯函数拟合这些分布(即对数正态分布)。我不知何故总是搞砸了拟合的东西,而高斯真的不会重现分布(它通常比实际分布或其他奇怪的行为更平坦)。

最后更新 这是我设法获得的:拟合曲线没有按预期进行

import pyfits
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.optimize import curve_fit
import pylab as py

def gaussian(x, a, mean, sigma):
    return a * np.exp(-((x - mean)**2 / (2 * sigma**2)))

f, (ax1, ax2, ax3) = plt.subplots(3, sharex=True)
bins = np.histogram(standx, bins = 100)[1]

num_1, bins_1  = np.histogram(standx, np.histogram(standx, bins = 100)[1])
bins_01 = np.logspace( np.log10( standx.min()  ), np.log10(standx.max() ), 100 )
x_fit = py.linspace(bins_01[0], bins_01[-1], 100)
popt, pcov = curve_fit(gaussian, x_fit, num_1, p0=[1, np.mean(standx), np.std(standx)])
y_fit = gaussian(bins_01, *popt)
counts, edges, patches = ax1.hist(standx, bins_01, facecolor='blue', alpha=0.5) # bins=100
area = sum(np.diff(edges)*counts)

# calculate length of each bin (required for scaling PDF to histogram)
bins_log_len = np.zeros( x_fit.size )
for ii in range( counts.size):
    bins_log_len[ii] = edges[ii+1]-edges[ii]

# Create an array of length num_bins containing the center of each bin.
centers = 0.5*(edges[:-1] + edges[1:])
# Make a fit to the samples.
shape, loc, scale = stats.lognorm.fit(standx, floc=0)
# get pdf-values for same intervals as histogram
samples_fit_log = stats.lognorm.pdf( bins_01, shape, loc=loc, scale=scale )
# oplot fitted and scaled PDF into histogram

new_x = np.linspace(np.min(standx), np.max(standx), 100)
pdf = stats.norm.pdf(new_x, loc=np.log(scale), scale=shape)
ax1.plot(new_x, pdf*sum(counts), 'k-')
ax1.plot(bins_01, np.multiply(samples_fit_log,    bins_log_len)*sum(counts), 'g--', label='PDF using histogram bins', linewidth=2 )
ax1.set_xscale('log')
ax1.plot(x_fit, stats.norm.pdf(x_fit, popt[1], popt[2])*area,'r--',linewidth=2,label='Fit: $\mu$=%.3f , $\sigma$=%.3f'%(popt[1],popt[2]) )
ax1.legend(loc='best', frameon=False, prop={'size':15})

# And similar for the ax2, ax3 plots

以下是结果情节: enter image description here

即使直方图分布变为零,顶部图中的拟合高斯左翼也被提升到零以上。我在这里做错了什么?

编辑2:以下是重现图中顶部图的数据示例。

[ 101.51694114  118.85313212   91.69531845   90.26532237   90.28341631  105.12906896  262.7891152   486.49418076  161.05389372  163.73690191  166.77302778  222.02090477  126.19058434   86.05609479   88.91853857  193.97923929  239.15533093  106.52112332   60.84555301   88.45753752  123.02881537  124.81366349   27.19285691  104.71247832  146.07595491  106.56780994  118.54743181  182.01683537  155.86798209  212.47778143  154.97126376   91.52202431  112.49359451  164.37672439  173.27686471  209.55033453  224.81250249  117.96784525  241.48515315   90.20163858  242.82090455  195.16391416  157.28399949  236.17969925   52.60286058  153.19747048  220.8835675   160.28413028  183.82540253   78.87306634   87.7934009    29.2185999   129.05052788  105.9416127   104.47906222  303.81976836  231.82568094  234.7277374   133.87567039   84.21624497   83.77612409  100.3160127    66.60196186   93.82032598   98.88012693  235.07139859   44.74506772   90.43154857   97.83903455   56.6958664   87.39357325   80.4975729    44.50914276   80.04352253  122.69702279  181.73622079  114.35305809   72.8500753    92.97985176  167.82181244 23.89170096  277.56842175  120.27960673  188.24283156   87.85287841  104.65666064   55.56738985  113.74158901  160.78501265  144.7793944 146.26352811   72.42916164   81.58934891   82.03941082  140.62209553   98.12528712   27.80664138   60.33766399   69.16640959   76.58721414  129.2027075    92.0469369    58.65284569   74.47532813  272.38073082   25.6830871   120.49394762  153.67903201  108.99329823   73.31596785  158.44313205  108.06319404  149.67655877  100.98970685  183.89276773  259.99372599  146.67345963  151.87414015   56.50412433   68.30454916   87.91449416  136.98367718   85.89559447  146.20528695  137.48987622  119.43868024  127.65423602   95.12679396   74.19057758   37.78992221  124.93823546   76.83988791  156.26098736   52.77456371   74.56009299   72.83196226  126.33366119  114.75476007   71.07015661  203.58334989  115.37482779  112.41575426   52.67146874   34.41173382   91.43309873   84.56022527   97.52863818   64.69175291   98.82649613  110.33549604  88.73162329   63.33406042   67.50249703   51.80125226   93.77331898 134.86070329  104.78906904  180.36527776   96.10291219   73.86951609 61.85057464   85.4873267    19.49122558   94.90673405   54.70439619   44.11875268   77.00669426  106.03192447   72.14576138   32.88507942 43.71636039   69.09934896  164.33347129  184.71203014   91.85472367 112.8524319   130.65249146   93.07362972   82.04078274   77.55368682 37.01401147   95.27927068   45.84825324   78.97197286   56.51405138 55.6592834   123.75173665  146.25507348  100.94836797  148.27354976 75.66748311  249.42155118  103.90381969   96.81010983   94.77583435 68.77485119   23.38673989   88.64533289   67.76195191  177.0339476 103.49888373  101.77976527  121.43646273  150.67473968  134.80596161 110.43357052  109.31380389   46.4057108   202.95885552  368.77902191 151.79275675   84.19636911   72.80008013   46.03038795   57.46082639 53.41813204  178.14381109  135.27764511   76.58440241   71.31719469 60.19553618   27.25850013   32.44469416   22.57373214   36.81684014 27.31495127   70.17993686  142.8763359   135.88971259   72.97332852 86.41262044   64.57571923  143.87039206  155.27256205  110.78974448 151.27678795  147.15253312   52.58800732  104.08482961   79.94199525 122.04554796  110.58938546   50.32322361   77.34908774  111.69467931 166.33807553   72.91820982   79.81368763   57.5947018   103.52493188 163.77297985  144.02647916  113.26699317  147.49539845   85.72692319 30.22168157  116.74761705   74.51974655   80.10030241   75.37240728 63.55822184  243.37524675  231.9249136   113.26550804   72.43832113 55.14416523  120.54661712  147.10974035   72.92975739   69.32965749 120.95141745   37.68729105   66.24036939  203.91863535   55.8913402 95.73112443   96.24012717  176.62058262   79.31680757  162.42756296 78.39239957  169.11233776  100.20872299   62.93332374   30.91932801 38.07484721   54.18812526  172.53322492   89.52425567   84.25552157 130.99786509   94.25222458   60.10524134   62.86851886   76.52525125 59.58721735   92.13854969  174.06688353  138.10744182  194.01223744 151.1429943   140.01681885   47.14387464   11.84490967    6.96245414 47.70510341  101.54753328  108.36307095  157.82389186  166.39075768 151.60755493   65.70209698  143.84160067  126.19604257  102.22278009 45.26080872  108.46101698  158.36097588  141.08731145   83.69653695 118.36827104  118.32749524  143.7909344    37.68873242  115.57921476 139.13432742  138.44656014   94.29691791   94.18191872   85.85732773 51.69086583   66.97353588   59.40691006   95.74665069   92.3880327 75.95646049   18.87321191   54.9681136   114.54996764  131.89699216 123.48381482   72.87593216  139.98739954  122.6154045   143.29503576 271.88908663  262.73039299  155.66868313  101.36700756  216.940961 84.36613486   74.54262361  170.46092396   74.96294713   80.65423117 123.18869993   90.12445866   63.49877742  118.44434098  308.95279788 255.71401823  162.75657523  153.1426693    18.39821795   13.24170647 112.97427259  220.2135291   102.58993152   43.24075783   54.34572251 106.78667036  113.02930818   84.60049337  125.86238265   37.77423088 59.49255685  118.06299299  113.96271631   24.43862174   57.94269235 50.87677692  116.38177017  177.47487286  110.86615691  108.23451165 170.39527188  326.17663873  183.0187635    91.91273324  101.3131493 35.39369149  122.47551828  148.65749349   95.25557961   57.29064772 70.35810775   69.1915958    81.80452845  125.35745323   71.86708276 109.91184751   93.73739808   98.42700723   76.31195397   95.91546147 177.6087925   170.84268012   82.02914243   93.76613621   78.39962097 104.58703334   36.59546855  116.05663747  116.3494942    68.79781642 109.93397594  151.25008586  172.46504215   85.93646199   51.43955677 42.28647472   66.93113746   60.77211697   96.28259636   82.22735049 49.54423262  178.94159839   93.76859479   45.54744672   94.4599803 71.19930623  104.09904187   75.79761794   69.93849545  130.88921733 126.67404755   81.1833829    62.33448081   84.5987729   152.13563736 96.85621001  276.75452386  139.3158367   171.07567204  173.5501148 148.58205472   43.75713099   80.5508343    51.58395044   95.91107361 129.91845099  124.15592207  137.38840679   92.28611414  120.2618697 187.74571371   22.86841981  119.45375294  105.22286286   80.31061238 62.40199987  167.05483245   47.33392878  166.50472376  153.6375309 88.34718903  135.61514556  119.43909776  128.71538875  140.71852651 169.89867936  219.83340846  143.79419523   47.90655796  179.50489278 146.87141422   52.42075947   57.91783746   68.93906889   37.94645557 88.17616503  112.79640294  103.59258333  134.18698633  116.95667835 70.14118921   56.32427154  125.85321223   61.04903197   43.4000049 87.08489101   40.89691119   79.42038892  106.29486574   74.89994892 104.88572333  152.7553574   172.16266051  117.84344965   89.89983418 73.36633027  101.8498084    71.1734305    63.86839788   52.28033569 87.30368207   58.4308207    54.05836602  149.96873987   54.83900084 64.84848435  309.27088231  138.21289193  122.33905816   89.70053273 39.84886492   98.53375932   95.2274298    92.20005886   90.92608997 81.77090328  104.50069549   78.80647072  131.17258666  163.53527862]

1 个答案:

答案 0 :(得分:1)

我认为你想要适应日志转换的箱子,并通过在每个箱子上乘以你的方法来校正比例。

enter image description here

def gaussian(x, a, mean, sigma):
    return a * np.exp(-((x - mean)**2 / (2 * sigma**2)))

f, (ax1, ax2, ax3) = plt.subplots(3, sharex=True)
bins = np.histogram(standx, bins = 100)[1]

from scipy.optimize import curve_fit
from scipy import stats


num_1, bins_1  = np.histogram(standx, np.histogram(standx, bins = 100)[1])

#log transform the bins!
bins_log=np.log10(bins_1[:-1])
bins_01 = np.logspace( np.log10( standx.min()  ), np.log10(standx.max() ), 100 )
x_fit = np.linspace(bins_01[0], bins_01[-1], 100)

#popt, pcov = curve_fit(gaussian, x_fit, num_1, p0=[1, np.mean(standx), np.std(standx)])
popt, pcov = curve_fit(gaussian, bins_log, num_1, p0=[1, np.mean(standx), np.std(standx)])

#y_fit = gaussian(bins_01, *popt)
y_fit = gaussian(bins_log, *popt)
counts, edges, patches = ax1.hist(standx, bins_01, facecolor='blue', alpha=0.5) # bins=100
area = sum(np.diff(edges)*counts)

# calculate length of each bin (required for scaling PDF to histogram)
bins_log_len = np.zeros( x_fit.size )
for ii in range( counts.size):
    bins_log_len[ii] = edges[ii+1]-edges[ii]

# Create an array of length num_bins containing the center of each bin.
centers = 0.5*(edges[:-1] + edges[1:])
# Make a fit to the samples.
shape, loc, scale = stats.lognorm.fit(standx, floc=0)
# get pdf-values for same intervals as histogram
samples_fit_log = stats.lognorm.pdf( bins_01, shape, loc=loc, scale=scale )
# oplot fitted and scaled PDF into histogram


new_x = np.linspace(np.min(standx), np.max(standx), 100)
pdf = stats.norm.pdf(new_x, loc=np.log(scale), scale=shape)
ax1.plot(new_x, pdf*sum(counts), 'k-')
ax1.plot(bins_01, np.multiply(samples_fit_log,    bins_log_len)*sum(counts), 'g--', label='PDF using histogram bins', linewidth=2 )

#ax1.plot(x_fit, stats.norm.pdf(x_fit, popt[1], popt[2])*area,'r--',linewidth=2,label='Fit: $\mu$=%.3f , $\sigma$=%.3f'%(popt[1],popt[2]) )

log_adjusted_pdf=np.multiply(bins_log_len,stats.norm.pdf(bins_log, popt[1], popt[2]))
scale_factor=len(standx)/sum(log_adjusted_pdf)
ax1.plot(bins_1[:-1], scale_factor*log_adjusted_pdf,'r--',linewidth=2,label='Fit: $\mu$=%.3f , $\sigma$=%.3f'%(popt[1],popt[2]) )
ax1.set_xscale('log')
ax1.legend(loc='best', frameon=False, prop={'size':15})

# And similar for the ax2, ax3 plots