熊猫autocorrelation_plot更改置信区间

时间:2020-05-19 02:36:19

标签: python pandas plot confidence-interval autocorrelation

我从时间序列和自动相关图中获得了以下数据。

from pandas.plotting import autocorrelation_plot
import numpy as np

data = np.array([ 37.3    ,  11.9    ,  43.3    ,  23.6    ,  30.2    ,   8.4    ,
         9.1    ,  10.3    ,  74.7    ,   4.4    ,  45.7    ,  18.6    ,
        13.9    ,   7.5    ,  30.4    ,  15.5    ,  11.6    ,   3.2    ,
         6.8    ,   3.     ,   9.7    ,   4.     ,   9.8    ,  14.6    ,
         6.5    ,   7.2    ,   6.1    ,  10.3    ,   7.9    ,   3.4    ,
         3.4    ,   6.4    ,  38.5    ,  10.     ,   6.1    ,  11.7    ,
        16.9    ,   4.1    ,   8.9    ,   8.1    ,   7.6    ,  13.2    ,
        11.7    ,   7.1    ,  42.6    ,   7.2    ,  17.9    ,  42.2    ,
        18.5    ,   7.1    ,  42.1    ,  10.     , 100.1    ,   4.5    ,
        42.52905,   4.8081 ,  15.66435,  12.0056 ,   6.744  ,  96.7745 ,
        13.8    ,   8.2    ,   2.3    ,  14.8    ,  21.5    ,  11.3    ,
        10.2    ,  12.6    ,  42.7    ,  18.     ,  26.8    ,  31.9    ,
        22.047  ,   6.057  ,  20.8    ,  49.014  ,  20.788  ,   7.198  ,
         9.993  ,  19.393  ,  44.9456 ,  13.912  ,  11.404  ,  38.367  ,
        34.792  ,   7.99425,   5.37   ,  11.358  ,  16.519  ,   7.337  ,
         5.717  ,   7.248  ,  50.1475 ,  19.277  ,  41.596  ,  66.106  ])

fig, ax = plt.subplots(figsize=(8, 5))
autocorrelation_plot(data)
plt.show()

enter image description here

如您所见,在默认置信区间下,任何延迟之间均未显示任何显着相关。As documentation here图中显示的水平线对应于95%和99%的置信带。虚线是99%置信带。

现在我需要检查其他CI下的ACF,但找不到如何更改CI。

2 个答案:

答案 0 :(得分:1)

您可以复制代码并在以下位置传递置信区间:

def autocorrelation(series, lower=0.95, upper=0.99, ax=None, **kwds):
    # require scipy
    import scipy
    import matplotlib.pyplot as plt

    n = len(series)
    data = np.asarray(series)
    if ax is None:
        ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0))
    mean = np.mean(data)
    c0 = np.sum((data - mean) ** 2) / float(n)

    def r(h):
        return ((data[: n - h] - mean) * (data[h:] - mean)).sum() / float(n) / c0

    x = np.arange(n) + 1
    y = [r(loc) for loc in x]

    # customize the z's
    z95, z99 = scipy.stats.t.ppf((1 + np.array([lower,upper])) / 2., 1e9)

    ax.axhline(y=z99 / np.sqrt(n), linestyle="--", color="grey")
    ax.axhline(y=z95 / np.sqrt(n), color="grey")
    ax.axhline(y=0.0, color="black")
    ax.axhline(y=-z95 / np.sqrt(n), color="grey")
    ax.axhline(y=-z99 / np.sqrt(n), linestyle="--", color="grey")
    ax.set_xlabel("Lag")
    ax.set_ylabel("Autocorrelation")
    ax.plot(x, y, **kwds)
    if "label" in kwds:
        ax.legend()
    ax.grid()
    return ax


# test
autocorrelation(data, 0.6, 0.9)

输出:

enter image description here

答案 1 :(得分:0)

我认为无法更改CI。According to the source code here 95和99间隔的Z分数是硬编码的。

def autocorrelation_plot(series, ax=None, **kwds):
    import matplotlib.pyplot as plt

    n = len(series)
    data = np.asarray(series)
    if ax is None:
        ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0))
    mean = np.mean(data)
    c0 = np.sum((data - mean) ** 2) / float(n)

    def r(h):
        return ((data[: n - h] - mean) * (data[h:] - mean)).sum() / float(n) / c0

    x = np.arange(n) + 1
    y = [r(loc) for loc in x]
    z95 = 1.959963984540054
    z99 = 2.5758293035489004
    ax.axhline(y=z99 / np.sqrt(n), linestyle="--", color="grey")
    ax.axhline(y=z95 / np.sqrt(n), color="grey")
    ax.axhline(y=0.0, color="black")
    ax.axhline(y=-z95 / np.sqrt(n), color="grey")
    ax.axhline(y=-z99 / np.sqrt(n), linestyle="--", color="grey")
    ax.set_xlabel("Lag")
    ax.set_ylabel("Autocorrelation")
    ax.plot(x, y, **kwds)
    if "label" in kwds:
        ax.legend()
    ax.grid()
    return ax