使用matplotlib

时间:2018-11-19 04:19:53

标签: python pandas matplotlib covariance gaussian

我正在尝试使用plot bivariate gaussian distribution matplotlib。我想使用两个xy点(组A)(组B)的scatter坐标来执行此操作。

我想通过调整distribution COV来调整matrix,以解决每个velocity组及其与用作{参考点。

我已经计算出每组xy坐标与参考点的距离。距离表示为xy,标记为radius[GrA_Rad]

因此,它们离参考点越远,[GrB_Rad]越大。我还计算了radius,标记为velocity[GrA_Vel]。每个组的[GrB_Vel]表示为direction。标记为orientation[GrA_Rotation]

关于我希望如何针对[GrB_Rotation]和距离distribution调整velocity的问题:

我希望使用(radius)。具体来说,如果我有每个SVD的{​​{1}} rotation,则提供anglescatter可用于描述direction velocity scalingmatrix。因此,可以使用此[GrA_Scaling] [GrB_Scaling]来扩展scaling中的matrix并收缩radius中的x-direction。这表示radius y-direction

最后,通过将COV matrix组转换成distribution的一半来找到mean location值。

简单地输入:将(x,y)应用于每个组的velocity点。通过radiusscatter调整COV矩阵。因此,使用radius velocity展开scaling中的matrix并在radius中收缩。 x-direction是根据y-direction direction测量的。然后通过将组位置rotation翻译成angle的一半来确定distribution mean的值。

下面是这些变量的(x,y)

velocity

我已经为每个df坐标创建了import numpy as np import pandas as pd import matplotlib.pyplot as plt import matplotlib.animation as animation d = ({ 'Time' : [1,2,3,4,5,6,7,8], 'GrA_X' : [10,12,17,16,16,14,12,8], 'GrA_Y' : [10,12,13,7,6,7,8,8], 'GrB_X' : [5,8,13,16,19,15,13,5], 'GrB_Y' : [6,15,12,7,8,9,10,8], 'Reference_X' : [6,8,14,18,13,11,16,15], 'Reference_Y' : [10,12,8,12,15,12,10,8], 'GrA_Rad' : [8.3,8.25,8.2,8,8.15,8.15,8.2,8.3], 'GrB_Rad' : [8.3,8.25,8.3,8.4,8.6,8.4,8.3,8.65], 'GrA_Vel' : [0,2.8,5.1,6.1,1.0,2.2,2.2,4.0], 'GrB_Vel' : [0,9.5,5.8,5.8,3.16,4.12,2.2,8.2], 'GrA_Scaling' : [0,0.22,0.39,0.47,0.07,0.17,0.17,0.31], 'GrB_Scaling' : [0,0.53,0.2,0.2,0.06,0.1,0.03,0.4], 'GrA_Rotation' : [0,45,23.2,-26.56,-33.69,-36.86,-45,-135], 'GrB_Rotation' : [0,71.6,36.87,5.2,8.13,16.70,26.57,90], }) df = pd.DataFrame(data = d) animated

plot

1 个答案:

答案 0 :(得分:3)

更新

该问题已更新,并且变得更加清晰。我已更新代码以使其匹配。这是最新的输出:

enter image description here

除了样式外,我认为这与OP描述的内容相符。

以下是用于生成上述情节的代码:

dfake = ({    
    'GrA_X' : [15,15],                 
    'GrA_Y' : [15,15], 
    'Reference_X' : [15,3],                 
    'Reference_Y' : [15,15],                  
    'GrA_Rad' : [15,25],                 
    'GrA_Vel' : [0,10],
    'GrA_Scaling' : [0,0.5],
    'GrA_Rotation' : [0,45]                     
})

dffake = pd.DataFrame(dfake)
fig,axs = plt.subplots(1, 2, figsize=(16,8))
fig.subplots_adjust(0,0,1,1)
plotone(dffake, 'A', 0, xlim=(0,30), ylim=(0,30), fig=fig, ax=axs[0])
plotone(dffake, 'A', 1, xlim=(0,30), ylim=(0,30), fig=fig, ax=axs[1])
plt.show()

以及我使用的plotone函数的完整实现在下面的代码块中。如果您只想了解用于生成和转换2D高斯PDF的数学运算,请查看mvpdf函数(以及它依赖的rotgetcov函数):

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts

def rot(theta):
    theta = np.deg2rad(theta)
    return np.array([
        [np.cos(theta), -np.sin(theta)],
        [np.sin(theta), np.cos(theta)]
    ])

def getcov(radius=1, scale=1, theta=0):
    cov = np.array([
        [radius*(scale + 1), 0],
        [0, radius/(scale + 1)]
    ])

    r = rot(theta)
    return r @ cov @ r.T

def mvpdf(x, y, xlim, ylim, radius=1, velocity=0, scale=0, theta=0):
    """Creates a grid of data that represents the PDF of a multivariate gaussian.

    x, y: The center of the returned PDF
    (xy)lim: The extent of the returned PDF
    radius: The PDF will be dilated by this factor
    scale: The PDF be stretched by a factor of (scale + 1) in the x direction, and squashed by a factor of 1/(scale + 1) in the y direction
    theta: The PDF will be rotated by this many degrees

    returns: X, Y, PDF. X and Y hold the coordinates of the PDF.
    """
    # create the coordinate grids
    X,Y = np.meshgrid(np.linspace(*xlim), np.linspace(*ylim))

    # stack them into the format expected by the multivariate pdf
    XY = np.stack([X, Y], 2)

    # displace xy by half the velocity
    x,y = rot(theta) @ (velocity/2, 0) + (x, y)

    # get the covariance matrix with the appropriate transforms
    cov = getcov(radius=radius, scale=scale, theta=theta)

    # generate the data grid that represents the PDF
    PDF = sts.multivariate_normal([x, y], cov).pdf(XY)

    return X, Y, PDF

def plotmv(x, y, xlim=None, ylim=None, radius=1, velocity=0, scale=0, theta=0, xref=None, yref=None, fig=None, ax=None):
    """Plot an xy point with an appropriately tranformed 2D gaussian around it.
    Also plots other related data like the reference point.
    """
    if xlim is None: xlim = (x - 5, x + 5)
    if ylim is None: ylim = (y - 5, y + 5)

    if fig is None:
        fig = plt.figure(figsize=(8,8))
        ax = fig.gca()
    elif ax is None:
        ax = fig.gca()

    # plot the xy point
    ax.plot(x, y, '.', c='C0', ms=20)

    if not (xref is None or yref is None):
        # plot the reference point, if supplied
        ax.plot(xref, yref, '.', c='w', ms=12)

    # plot the arrow leading from the xy point
    if velocity > 0:
        ax.arrow(x, y, *rot(theta) @ (velocity, 0), 
                 width=.4, length_includes_head=True, ec='C0', fc='C0')

    # fetch the PDF of the 2D gaussian
    X, Y, PDF = mvpdf(x, y, xlim=xlim, ylim=ylim, radius=radius, velocity=velocity, scale=scale, theta=theta)

    # normalize PDF by shifting and scaling, so that the smallest value is 0 and the largest is 1
    normPDF = PDF - PDF.min()
    normPDF = normPDF/normPDF.max()

    # plot and label the contour lines of the 2D gaussian
    cs = ax.contour(X, Y, normPDF, levels=6, colors='w', alpha=.5)
    ax.clabel(cs, fmt='%.3f', fontsize=12)

    # plot the filled contours of the 2D gaussian. Set levels high for smooth contours
    cfs = ax.contourf(X, Y, normPDF, levels=50, cmap='viridis', vmin=-.9, vmax=1)

    # create the colorbar and ensure that it goes from 0 -> 1
    cbar = fig.colorbar(cfs, ax=ax)
    cbar.set_ticks([0, .2, .4, .6, .8, 1])

    # add some labels
    ax.grid()
    ax.set_xlabel('X distance (M)')
    ax.set_ylabel('Y distance (M)')

    # ensure that x vs y scaling doesn't disrupt the transforms applied to the 2D gaussian
    ax.set_aspect('equal', 'box')

    return fig, ax

def fetchone(df, l, i, **kwargs):
    """Fetch all the needed data for one xy point
    """
    keytups = (
        ('x', 'Gr%s_X'%l),
        ('y', 'Gr%s_Y'%l),
        ('radius', 'Gr%s_Rad'%l),
        ('velocity', 'Gr%s_Vel'%l),
        ('scale', 'Gr%s_Scaling'%l),
        ('theta', 'Gr%s_Rotation'%l),
        ('xref', 'Reference_X'),
        ('yref', 'Reference_Y')
    )

    ret = {k:df.loc[i, l] for k,l in keytups}
    # add in any overrides
    ret.update(kwargs)

    return ret

def plotone(df, l, i, xlim=None, ylim=None, fig=None, ax=None, **kwargs):
    """Plot exactly one point from the dataset
    """
    # look up all the data to plot one datapoint
    xydata = fetchone(df, l, i, **kwargs)

    # do the plot
    return plotmv(xlim=xlim, ylim=ylim, fig=fig, ax=ax, **xydata)

旧答案-2

我已将答案调整为与OP发布的示例相符:

enter image description here

以下是产生以上图像的代码:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts

def rot(theta):
    theta = np.deg2rad(theta)
    return np.array([
        [np.cos(theta), -np.sin(theta)],
        [np.sin(theta), np.cos(theta)]
    ])

def getcov(radius=1, scale=1, theta=0):
    cov = np.array([
        [radius*(scale + 1), 0],
        [0, radius/(scale + 1)]
    ])

    r = rot(theta)
    return r @ cov @ r.T

def datalimits(*data, pad=.15):
    dmin,dmax = min(d.min() for d in data), max(d.max() for d in data)
    spad = pad*(dmax - dmin)
    return dmin - spad, dmax + spad

d = ({
    'Time' : [1,2,3,4,5,6,7,8],       
    'GrA_X' : [10,12,17,16,16,14,12,8],                 
    'GrA_Y' : [10,12,13,7,6,7,8,8], 
    'GrB_X' : [5,8,13,16,19,15,13,5],                 
    'GrB_Y' : [6,15,12,7,8,9,10,8],   
    'Reference_X' : [6,8,14,18,13,11,16,15],                 
    'Reference_Y' : [10,12,8,12,15,12,10,8],                  
    'GrA_Rad' : [8.3,8.25,8.2,8,8.15,8.15,8.2,8.3],  
    'GrB_Rad' : [8.3,8.25,8.3,8.4,8.6,8.4,8.3,8.65],               
    'GrA_Vel' : [0,2.8,5.1,6.1,1.0,2.2,2.2,4.0],
    'GrB_Vel' : [0,9.5,5.8,5.8,3.16,4.12,2.2,8.2],               
    'GrA_Scaling' : [0,0.22,0.39,0.47,0.07,0.17,0.17,0.31],
    'GrB_Scaling' : [0,0.53,0.2,0.2,0.06,0.1,0.03,0.4],                   
    'GrA_Rotation' : [0,45,23.2,-26.56,-33.69,-36.86,-45,-135], 
    'GrB_Rotation' : [0,71.6,36.87,5.2,8.13,16.70,26.57,90],                       
     })

df = pd.DataFrame(data=d)

limitpad = .5
clevels = 5
cflevels = 50

xmin,xmax = datalimits(df['GrA_X'], df['GrB_X'], pad=limitpad)
ymin,ymax = datalimits(df['GrA_Y'], df['GrB_Y'], pad=limitpad)

X,Y = np.meshgrid(np.linspace(xmin, xmax), np.linspace(ymin, ymax))

fig = plt.figure(figsize=(10,6))
ax = plt.gca()

Zs = []
for l,color in zip('AB', ('red', 'yellow')):
    # plot all of the points from a single group
    ax.plot(df['Gr%s_X'%l], df['Gr%s_Y'%l], '.', c=color, ms=15, label=l)

    Zrows = []
    for _,row in df.iterrows():
        x,y = row['Gr%s_X'%l], row['Gr%s_Y'%l]

        cov = getcov(radius=row['Gr%s_Rad'%l], scale=row['Gr%s_Scaling'%l], theta=row['Gr%s_Rotation'%l])
        mnorm = sts.multivariate_normal([x, y], cov)
        Z = mnorm.pdf(np.stack([X, Y], 2))
        Zrows.append(Z)

    Zs.append(np.sum(Zrows, axis=0))

# plot the reference points

# create Z from the difference of the sums of the 2D Gaussians from group A and group B
Z = Zs[0] - Zs[1]

# normalize Z by shifting and scaling, so that the smallest value is 0 and the largest is 1
normZ = Z - Z.min()
normZ = normZ/normZ.max()

# plot and label the contour lines
cs = ax.contour(X, Y, normZ, levels=clevels, colors='w', alpha=.5)
ax.clabel(cs, fmt='%2.1f', colors='w')#, fontsize=14)

# plot the filled contours. Set levels high for smooth contours
cfs = ax.contourf(X, Y, normZ, levels=cflevels, cmap='viridis', vmin=0, vmax=1)
# create the colorbar and ensure that it goes from 0 -> 1
cbar = fig.colorbar(cfs, ax=ax)
cbar.set_ticks([0, .2, .4, .6, .8, 1])


ax.set_aspect('equal', 'box')

旧答案-1

要确切地说出您要追求的目标有点困难。可以通过其协方差矩阵缩放和旋转多元高斯分布。这是一个根据您的数据执行此操作的示例:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts

def rot(theta):
    theta = np.deg2rad(theta)
    return np.array([
        [np.cos(theta), -np.sin(theta)],
        [np.sin(theta), np.cos(theta)]
    ])

def getcov(scale, theta):
    cov = np.array([
        [1*(scale + 1), 0],
        [0, 1/(scale + 1)]
    ])

    r = rot(theta)
    return r @ cov @ r.T

d = ({
    'Time' : [1,2,3,4,5,6,7,8],       
    'GrA_X' : [10,12,17,16,16,14,12,8],                 
    'GrA_Y' : [10,12,13,7,6,7,8,8], 
    'GrB_X' : [5,8,13,16,19,15,13,5],                 
    'GrB_Y' : [6,15,12,7,8,9,10,8],   
    'Reference_X' : [6,8,14,18,13,11,16,15],                 
    'Reference_Y' : [10,12,8,12,15,12,10,8],                  
    'GrA_Rad' : [8.3,8.25,8.2,8,8.15,8.15,8.2,8.3],  
    'GrB_Rad' : [8.3,8.25,8.3,8.4,8.6,8.4,8.3,8.65],               
    'GrA_Vel' : [0,2.8,5.1,6.1,1.0,2.2,2.2,4.0],
    'GrB_Vel' : [0,9.5,5.8,5.8,3.16,4.12,2.2,8.2],               
    'GrA_Scaling' : [0,0.22,0.39,0.47,0.07,0.17,0.17,0.31],
    'GrB_Scaling' : [0,0.53,0.2,0.2,0.06,0.1,0.03,0.4],                   
    'GrA_Rotation' : [0,45,23.2,-26.56,-33.69,-36.86,-45,-135], 
    'GrB_Rotation' : [0,71.6,36.87,5.2,8.13,16.70,26.57,90],                       
     })

df = pd.DataFrame(data=d)
xmin,xmax = min(df['GrA_X'].min(), df['GrB_X'].min()), max(df['GrA_X'].max(), df['GrB_X'].max())
ymin,ymax = min(df['GrA_Y'].min(), df['GrB_Y'].min()), max(df['GrA_Y'].max(), df['GrB_Y'].max())

X,Y = np.meshgrid(
    np.linspace(xmin - (xmax - xmin)*.1, xmax + (xmax - xmin)*.1),
    np.linspace(ymin - (ymax - ymin)*.1, ymax + (ymax - ymin)*.1)
)

fig,axs = plt.subplots(df.shape[0], sharex=True, figsize=(4, 4*df.shape[0]))
fig.subplots_adjust(0,0,1,1,0,-.82)

for (_,row),ax in zip(df.iterrows(), axs):
    for c in 'AB':
        x,y = row['Gr%s_X'%c], row['Gr%s_Y'%c]

        cov = getcov(scale=row['Gr%s_Scaling'%c], theta=row['Gr%s_Rotation'%c])
        mnorm = sts.multivariate_normal([x, y], cov)
        Z = mnorm.pdf(np.stack([X, Y], 2))

        ax.contour(X, Y, Z)

        ax.plot(row['Gr%s_X'%c], row['Gr%s_Y'%c], 'x')
        ax.set_aspect('equal', 'box')

这将输出:

enter image description here