Question

有人知道如何使用Python在单个绘图上绘制多个高斯分布吗？我有一些正态分布的数据，需要绘制不同的均值和标准差。非常感谢我只能画一张。请跟我说简单，香港专业教育学院刚开始使用Python

Answer 1

让我们假设平均值mu和标准偏差sigma有3种不同的组合。您可以随意选择，但出于示例目的，我使用了3。

from matplotlib import pyplot as mp
import numpy as np

def gaussian(x, mu, sig):
    return 1./(np.sqrt(2*np.pi)*sigma)*np.exp(-0.5 * (1./sigma*(x - mu))**2)

for mu, sig in [(0.5, 0.1), (1.0, 0.2), (1.5, 0.3)]: #(mu,sigma)
    mp.plot(gaussian(np.linspace(-8, 8, 100), mu, sig))

mp.show()

在此行中定义mu和sigma，您可以根据需要添加任意数量的组合：

for mu, sig in [(0.5, 0.1), (1.0, 0.2), (1.5, 0.3)]: #(mu,sigma)

在我的情况下是

mu = 0.5，sigma = 0.1
mu = 1.0，sigma = 0.2
mu = 1.5，sigma = 0.3

结果：

*编辑

%matplotlib inline
from matplotlib import pyplot as mp
import numpy as np

def gaussian(x, mu, sig):
    return 1./(np.sqrt(2*np.pi)*sigma)*np.exp(-0.5 * (1./sigma*(x - mu))**2)

for mu, sigma in [(1, 2), (0.5, 1), (0, 0.5)]: #(mu,sigma)
    mp.plot(gaussian(np.linspace(-4, 6, 100, ), mu, sigma))
    mp.xlim(0,110)  #set x-axes limits
    mp.ylim(0,1)  #set y-axes limits

mp.show()

结果：

Answer 2

@dejanmarich 提出的解决方案有一个小问题。 x 轴上的值与数据分布中的实际值不对应。为了解决这个问题，我们不应该生成任意的线性间隔范围。

相反，我们希望绘制 x 从下限到上限的图，使均值位于中间。以下代码片段实现了这一目标：

#!/usr/bin/python

import numpy as np
import matplotlib.pyplot as plt

class Gaussian:
  @staticmethod
  def plot(mean, std, lower_bound=None, upper_bound=None, resolution=None,
    title=None, x_label=None, y_label=None, legend_label=None, legend_location="best"):
    
    lower_bound = ( mean - 4*std ) if lower_bound is None else lower_bound
    upper_bound = ( mean + 4*std ) if upper_bound is None else upper_bound
    resolution  = 100
    
    title        = title        or "Gaussian Distribution"
    x_label      = x_label      or "x"
    y_label      = y_label      or "N(x|μ,σ)"
    legend_label = legend_label or "μ={}, σ={}".format(mean, std)
    
    X = np.linspace(lower_bound, upper_bound, resolution)
    dist_X = Gaussian._distribution(X, mean, std)
    
    plt.title(title)
    
    plt.plot(X, dist_X, label=legend_label)
    
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.legend(loc=legend_location)
    
    return plt
  
  @staticmethod
  def _distribution(X, mean, std):
    return 1./(np.sqrt(2*np.pi)*std)*np.exp(-0.5 * (1./std*(X - mean))**2)

一旦定义了类，我们可以通过以下方式绘制高斯分布：

Gaussian.plot(0.5, 1).show()

# Or, for multiple plots:

plot = Gaussian.plot(1, 2)
plot = Gaussian.plot(0.5, 1)
plot = Gaussian.plot(0, 0.5)
plot.show()

这里我们定义或计算边界 x 范围。计算使用平均值和标准差的 4 倍以将图居中。

这是一个非常简单的类，可以对其进行扩展以解决一些问题。 1) 除非绘制多个图，否则我们（似乎）会看同一张图，并且 2) 绘制多个分布时尾部会被截断。

1) 通过向 plt.ylim(0,1) 方法添加 plot 可以轻松修复。 2）另一方面，需要我们在绘制时考虑所有地块的形状。

为此，我们可以将类更改为 builder class。我们首先聚合所有绘图信息，并在准备绘制所有绘图时才计算 x 范围。

以下类实现了这个目标：

#!/usr/bin/python

import numpy as np
import matplotlib.pyplot as plt

class GaussianPlot:
  def __init__(self, title="Gaussian Distribution", x_label="x", y_label=None,
    y_limit=None, lower_bound=None, upper_bound=None, 
    with_grid=True, fill_below=True, legend_location="best"):
    
    self.title           = title
    self.x_label         = x_label
    self.y_label         = y_label or "N({}|μ,σ)".format(x_label)
    self.y_limit         = y_limit
    self.lower_bound     = lower_bound
    self.upper_bound     = upper_bound
    self.with_grid       = with_grid
    self.fill_below      = fill_below
    self.legend_location = legend_location
    
    self.plots = []
  
  def plot(self, mean, std, resolution=None, legend_label=None):
    self.plots.append({
      "mean":         mean,
      "std":          std,
      "resolution":   resolution,
      "legend_label": legend_label
    })
    
    return self
  
  def show(self):
    self._prepare_figure()
    self._draw_plots()
    
    plt.legend(loc=self.legend_location)
    plt.show()
  
  def _prepare_figure(self):
    plt.figure()
    plt.title(self.title)
    
    plt.xlabel(self.x_label)
    plt.ylabel(self.y_label)
    
    if self.y_limit is not None:
      plt.ylim(0, self.y_limit)
    
    if self.with_grid: 
      plt.grid()
  
  def _draw_plots(self):
    lower_bound = self.lower_bound if self.lower_bound is not None else self._compute_lower_bound()
    upper_bound = self.upper_bound if self.upper_bound is not None else self._compute_upper_bound()
    
    for plot_data in self.plots:
      mean         = plot_data["mean"]
      std          = plot_data["std"]
      resolution   = plot_data["resolution"]
      legend_label = plot_data["legend_label"]
      
      self._draw_plot(lower_bound, upper_bound, mean, std, resolution, legend_label)
  
  def _draw_plot(self, lower_bound, upper_bound, mean, std, resolution, legend_label):
    resolution   = resolution or max(100, int(upper_bound - lower_bound)*10)
    legend_label = legend_label or "μ={}, σ={}".format(mean, std)
    
    X = np.linspace(lower_bound, upper_bound, resolution)
    dist_X = self._distribution(X, mean, std)
    
    if self.fill_below: plt.fill_between(X, dist_X, alpha=0.1)
    plt.plot(X, dist_X, label=legend_label)
  
  def _compute_lower_bound(self):
    return np.min([plot["mean"] - 4*plot["std"] for plot in self.plots])
  
  def _compute_upper_bound(self):
    return np.max([plot["mean"] + 4*plot["std"] for plot in self.plots])
  
  def _distribution(self, X, mean, std):
    return 1./(np.sqrt(2.*np.pi)*std)*np.exp(-np.power((X - mean)/std, 2.)/2)

与其更简单的前身相比，该类的使用方式略有不同：

gaussian_plot = GaussianPlot()\
  .plot(1, 2)\
  .plot(0.5, 1)\
  .plot(0, 0.5)

gaussian_plot.show()

查看 __init__ 和 plot 的各种参数以自定义绘图图的外观。另请注意，在高斯不够平滑的情况下，每个单独图的分辨率可能会发生变化。

例如，上图中的分布 (μ=0, σ=0.5) 可以从更高的分辨率中受益：

gaussian_plot = GaussianPlot()\
  .plot(1, 2)\
  .plot(0.5, 1)\
  .plot(0, 0.5, resolution=400)

gaussian_plot.show()

绘制多个单变量正态分布

2 个答案: