Question

据我所知，seaborn回归总是带有置信度带（下图中的虚线带）。我很想看到一个函数来计算seaborn中的预测带（图中的虚线带）。有这样的选择吗？

Answer 1

以下是解释它的示例代码：

import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline 

x = np.arange(100)
y = np.arange(100)*3.2 + np.random.rand(100)*100 

rp = sns.regression._RegressionPlotter(x,y)
grid, yhat, err_bands = rp.fit_regression(x_range=(-10,110))
plt.plot(grid,yhat)
plt.scatter(x,y)
plt.fill_between(grid,err_bands[0,:],err_bands[1,:],alpha=.5)

此处fit_regression方法计算错误和预测。如果您使用默认关键字参数（取自源代码），则此函数计算预测。

def fit_fast(self, grid):
    """Low-level regression and prediction using linear algebra."""
    def reg_func(_x, _y):
        return np.linalg.pinv(_x).dot(_y)

    X, y = np.c_[np.ones(len(self.x)), self.x], self.y
    grid = np.c_[np.ones(len(grid)), grid]
    yhat = grid.dot(reg_func(X, y))
    if self.ci is None:
        return yhat, None

    beta_boots = algo.bootstrap(X, y, func=reg_func,
                                n_boot=self.n_boot, units=self.units).T
    yhat_boots = grid.dot(beta_boots).T
    return yhat, yhat_boots

这是用于获得置信区间的置信函数（也来自源代码）：

def bootstrap(*args, **kwargs):

    # Ensure list of arrays are same length
    if len(np.unique(list(map(len, args)))) > 1:
        raise ValueError("All input arrays must have the same length")
    n = len(args[0])

    # Default keyword arguments
    n_boot = kwargs.get("n_boot", 10000)
    func = kwargs.get("func", np.mean)
    axis = kwargs.get("axis", None)
    units = kwargs.get("units", None)
    smooth = kwargs.get("smooth", False)
    random_seed = kwargs.get("random_seed", None)
    if axis is None:
        func_kwargs = dict()
    else:
        func_kwargs = dict(axis=axis)

    # Initialize the resampler
    rs = np.random.RandomState(random_seed)

    # Coerce to arrays
    args = list(map(np.asarray, args))
    if units is not None:
        units = np.asarray(units)

    # Do the bootstrap
    if smooth:
        return _smooth_bootstrap(args, n_boot, func, func_kwargs)

    if units is not None:
        return _structured_bootstrap(args, n_boot, units, func,
                                     func_kwargs, rs)

    boot_dist = []
    for i in range(int(n_boot)):
        resampler = rs.randint(0, n, n)
        sample = [a.take(resampler, axis=0) for a in args]
        boot_dist.append(func(*sample, **func_kwargs))
    return np.array(boot_dist)

seaborn计算预测（NOT置信度）波段吗？

1 个答案: