我们假设我们有一个时间序列对象,称为" series" 。我知道很容易使用 autocorrelation_plot()方法来绘制系列对象的滞后和自相关维度。
以下是代码:
from matplotlib import pyplot
from pandas.tools.plotting import autocorrelation_plot
autocorrelation_plot(series)
pyplot.show()
这是大熊猫的情节:
有没有办法使用散景服务器获得相同的情节?
答案 0 :(得分:6)
是的,有。我编写的代码可以为您提供与pandas autocorrelation_plot()方法相同的结果。
以下是代码:
from bokeh.layouts import column
from bokeh.plotting import figure, curdoc
import timeseries_model_creator # to get data
import numpy as np
TimeSeriesModelCreator = timeseries_model_creator.TimeSeriesModelCreator()
series = TimeSeriesModelCreator.read_csv() # time series object
def get_autocorrelation_plot_params(series):
n = len(series)
data = np.asarray(series)
mean = np.mean(data)
c0 = np.sum((data - mean) ** 2) / float(n)
def r(h):
return ((data[:n - h] - mean) *
(data[h:] - mean)).sum() / float(n) / c0
x = np.arange(n) + 1
y = map(r, x)
print "x : ", x, " y : ", y
z95 = 1.959963984540054
z99 = 2.5758293035489004
return n, x, y, z95, z99
n, x, y, z95, z99 = get_autocorrelation_plot_params(series)
auto_correlation_plot2 = figure(title='Time Series Auto-Correlation', plot_width=1000,
plot_height=500, x_axis_label="Lag", y_axis_label="Autocorrelation")
auto_correlation_plot2.line(x, y=z99 / np.sqrt(n), line_dash='dashed', line_color='grey')
auto_correlation_plot2.line(x, y=z95 / np.sqrt(n), line_color='grey')
auto_correlation_plot2.line(x, y=0.0, line_color='black')
auto_correlation_plot2.line(x, y=-z95 / np.sqrt(n), line_color='grey')
auto_correlation_plot2.line(x, y=-z99 / np.sqrt(n), line_dash='dashed', line_color='grey')
auto_correlation_plot2.line(x, y, line_width=2)
auto_correlation_plot2.circle(x, y, fill_color="white", size=8) # optional
curdoc().add_root(column(auto_correlation_plot2))
这是散景图:
答案 1 :(得分:2)
我基本上是在复制艾伯克的答案,但把它变成了一个函数
def acf(series):
n = len(series)
data = np.asarray(series)
mean = np.mean(data)
c0 = np.sum((data - mean) ** 2) / float(n)
def r(h):
acf_lag = ((data[:n - h] - mean) * (data[h:] - mean)).sum() / float(n) / c0
return round(acf_lag, 3)
x = np.arange(n) # Avoiding lag 0 calculation
acf_coeffs = pd.Series(map(r, x)).round(decimals = 3)
acf_coeffs = acf_coeffs + 0
return acf_coeffs
def significance(series):
n = len(series)
z95 = 1.959963984540054 / np.sqrt(n)
z99 = 2.5758293035489004 / np.sqrt(n)
return(z95,z99)
def bok_autocor(series):
x = pd.Series(range(1, len(series)+1), dtype = float)
z95, z99 = significance(series)
y = acf(series)
p = figure(title='Time Series Auto-Correlation', plot_width=1000,
plot_height=500, x_axis_label="Lag", y_axis_label="Autocorrelation")
p.line(x, z99, line_dash='dashed', line_color='grey')
p.line(x, z95, line_color = 'grey')
p.line(x, y=0.0, line_color='black')
p.line(x, z99*-1, line_dash='dashed', line_color='grey')
p.line(x, z95*-1, line_color = 'grey')
p.line(x, y, line_width=2)
return p
show(series.pipe(bok_autocor))
我找到了acf here,它表示它来自pandas autocorrelation_plot函数,并且看起来接近于Ayberk使用的内容。