使用高斯图作为pyplot中的误差线

时间:2018-11-25 19:21:46

标签: python numpy matplotlib

我想知道是否有可能在matplotlib中绘制散点图,以使每个误差条都以高斯分布本身表示。 每个散点都有多次重复和一个标准偏差,我想在图表上强调这一点。 我认为可以通过在每个点上覆盖一个单独的较小的图(按比例缩放)来实现,但是我在StackOverflow或其他Web上找不到任何帮助。

如果不清楚,我深表歉意,这是我第一次在线寻求帮助。

非常感谢您。

1 个答案:

答案 0 :(得分:1)

我周围有一些代码,其功能与您所描述的相似(编辑:我对代码进行了大幅度的清理/改进)。该代码提供了一个gaussianScatter函数,该函数可以生成如下图(颜色条和从每个点延伸的误差线是可选的):

enter image description here

您必须试用样式,以使其适应您的需求,但这应该可以帮助您入门。

这是我用来生成上述示例图的代码:

import numpy as np

N = 10
testpoints = np.random.randint(0, 10, size=(2, N))
testnoise = np.random.uniform(.25, .75, size=(2, N))

fig,ax = gaussianScatter(*testpoints, *testnoise, docbar=True, doerrbar=True, c='C3')

这是gaussianScatter的完整实现:​​

import numpy as np
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import scipy.stats as sts

def cmapwhite(cmap, p=.05):
    """Modifies a named cmap so that its smallest values blend into white
    """
    N = 256
    Nold = int((1 - p)*N/p)

    old = plt.cm.get_cmap(cmap)

    cnames = ('red', 'green', 'blue')
    wdict = {cname: [[0, 1, 1],
                     [.5, 1, 1],
                     [1, c, c]] for cname,c in zip(cnames, old(0))}    
    white = LinearSegmentedColormap(cmap + '_white', segmentdata=wdict, N=N)

    colorComb = np.vstack((
        white(np.linspace(0, 1, N)),
        old(np.linspace(0, 1, Nold))
    ))
    return ListedColormap(colorComb, name=cmap + '_white')

def datalimits(*data, err=None, pad=None):
    if err is not None:
        dmin,dmax = min((d - err).min() for d in data), max((d + err).max() for d in data)
    else:
        dmin,dmax = min(d.min() for d in data), max(d.max() for d in data)

    if pad is not None:
        spad = pad*(dmax - dmin)
        dmin,dmax = dmin - spad, dmax + spad

    return dmin,dmax

def getcov(xerr, yerr):
    cov = np.array([
        [xerr, 0],
        [0, yerr]
    ])

    return cov

def mvpdf(x, y, xerr, yerr, xlim, ylim):
    """Creates a grid of data that represents the PDF of a multivariate normal distribution (ie an ND Gaussian).

    x, y: The center of the returned PDF
    (xy)lim: The extent of the returned PDF
    (xy)err: The noise the PDF is meant to represent. Will scale pdf in the appropriate direction

    returns: X, Y, PDF. X and Y hold the coordinates of the PDF.
    """
    # create the coordinate grids
    X,Y = np.meshgrid(np.linspace(*xlim), np.linspace(*ylim))

    # stack them into the format expected by the multivariate pdf
    XY = np.stack([X, Y], 2)

    # get the covariance matrix with the appropriate transforms
    cov = getcov(xerr, yerr)

    # generate the data grid that represents the PDF
    PDF = sts.multivariate_normal([x, y], cov).pdf(XY)

    return X, Y, PDF

def gaussianScatter(x, y, xerr, yerr, xlim=None, ylim=None, cmap='Blues', fig=None, ax=None, docbar=False, doerrbar=False, doclines=False, donorm=False, cfkwargs=None, **pltkwargs):
    """
    x,y: sequence of coordinates to be plotted
    (x,y)err: sequence of error/noise associated with each plotted coordinate
    (x,y)lim: sequence of (start, end). Determines extents of data displayed in plot
    cmap: str of named cmap, or cmap instance
    fig: the figure to be plotted on
    ax: the axes to be plotted on
    docbar: add a color bar
    doerrbar: plot the error bars associated with each point as lines
    doclines: plot the contour lines of the gaussians
    donorm: normalize each plotted gaussian so that its largest value is 1
    cfkwargs: a dict of arguments that will be passed to the `contourf` function used to plot the gaussians
    pltkwargs: a dict of arguments that will be passed to the `plot` function used to plot the xy points
    """  
    if xlim is None: xlim = datalimits(x, err=2*xerr)
    if ylim is None: ylim = datalimits(y, err=2*yerr)
    if cfkwargs is None: cfkwargs = {}

    if fig is None:
        fig = plt.figure(figsize=(8,8))
        ax = fig.add_axes([0, 0, 1, 1])
    elif ax is None:
        ax = fig.add_axes([0, 0, 1, 1])

    if isinstance(cmap, str):
        cmap = cmapwhite(cmap)

    cfDefault = {'cmap': cmap, 'levels': 100}
    pltDefault = {'marker': '.', 'ms': 20, 'ls': 'None', 'c': 'C1'}

    # plot gaussians
    PDFs = []
    for _x,_y,_xeta,_yeta in zip(x, y, xerr, yerr):
        X, Y, PDF = mvpdf(_x, _y, _xeta, _yeta, xlim, ylim)
        PDFs.append(PDF)

    if donorm:
        # norm the individual PDFs
        PDFs = [(PDF - PDF.min())/(PDF.max() - PDF.min()) for PDF in PDFs]

    # combine PDFs by treating them like 3D structures. At each xy point, we pick the "tallest" one
    PDFcomb = np.max(PDFs, axis=0)

    # plot the filled contours that will represent the gaussians.
    cfDefault.update(cfkwargs)
    cfs = ax.contourf(X, Y, PDFcomb, **cfDefault)

    if doclines:
        # plot and label the contour lines of the 2D gaussian
        cs = ax.contour(X, Y, PDFcomb, levels=6, colors='w', alpha=.5)
        ax.clabel(cs, fmt='%.3f', fontsize=12)

    # plot scatter
    pltDefault.update(pltkwargs)
    if doerrbar:
        ax.errorbar(x, y, xerr=xerr, yerr=yerr, **pltDefault)
    else:
        ax.plot(x, y, **pltDefault)

    # ensure that x vs y scaling doesn't disrupt the transforms applied to the 2D gaussian
    ax.set_aspect('equal', 'box')

    if docbar:
        # create the colorbar
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.2)
        cbar = fig.colorbar(cfs, ax=ax, cax=cax, format='%.2f')
        cbar.set_ticks(np.linspace(0, PDFcomb.max(), num=6))

    return fig,ax