适合矢量自回归与statsmodels

时间:2018-01-25 12:06:40

标签: python statsmodels

考虑以下信号和响应:

import numpy as np


def s1(t, delay):
    """An example of a noisy signal (heaviside function)"""
    return (t > delay).astype(int) + np.random.normal(scale=0.01, size=t.shape)


def s2(t, delay):
    """An example of another noisy signal (delayed sin)"""
    return np.sin(2*np.pi*(t - delay)/36) * (t > delay).astype(int) + np.random.normal(scale=0.01, size=t.shape)


def response(signal, delay):
    """
    An example of a noisy delayed response (delayed identity function)
    """
    delayed_signal = np.append(np.zeros(shape=delay), signal[:-delay])
    return delayed_signal + np.random.normal(scale=0.01, size=signal.shape)


t = np.arange(0, 256, 1)
x1 = s1(t, delay=12)
x2 = s2(t, delay=36)
# the response is the sum of the two signals with different delays
y = response(x1, delay=24) + response(x2, delay=48)

plt.figure(1, figsize=(10, 10))
plt.subplot(511)
plt.ylabel('signal 1')
plt.plot(t, x1, '.')

plt.subplot(512)
plt.ylabel('signal 2')
plt.plot(t, x2, '.')

plt.subplot(513)
plt.ylabel('response')
plt.plot(t, y, '.')

我想使用向量自回归(VAR)和Python来恢复信号yx1的延迟响应为24,以及对x2的延迟响应我希望让Lasso减少相关参数的数量,并且我不会将x1x2建模为自身和y的函数,只需{{1 }}作为yx1

的函数

如何在Python中完成?

1 个答案:

答案 0 :(得分:0)

我能够使用sklearn实现这一点,如下所示(继续上面的代码):

def build_matrix(x):
    """
    Converts a signal into a matrix of delayed signals.
    I.e. `row_j` is each time `t`, `column_i` is the signal at `t - i`,
    It assumes that no past signal => no signal: each row is left-padded with zeros.

    For example, for 3 times, the matrix would be:
    ```
    [0, 0   , 0   ] (-> y[0])
    [0, 0   , x[0]] (-> y[1])
    [0, x[0], x[1]] (-> y[2])
    ```

    I.e.
    The parameter fitted to column 2, a2, is the influence of `x[t - 1]` on `y[t]`.
    The parameter fitted to column 1, a1, is the influence of `x[t - 2]` on `y[t]`.
    It assumes that we only measure x[t] when we measure y[t], the reason why that column does not appear
    """
    data_x = []
    for i in range(len(x)):
        data_x.append(np.append(np.zeros(len(x) - i), x[:i]))
    return np.array(data_x)


class VARClassifier:
    """
    A Classifier based on any sklearn linear classifier that contain a method to return the fitted response function
    """
    def __init__(self, classifier):
        self.classifier = classifier
        self.number_of_signals_ = None
        self.time_len_ = None

    def _transform_x(self, x):
        for x_i in x:
            assert len(x_i) == self.time_len_
            assert len(x_i.shape) == 1, 'Each of the elements must be a time-series (1D)'
        return np.concatenate(tuple(build_matrix(x_i) for x_i in x), axis=1)

    def fit(self, x, y):
        self.number_of_signals_ = len(x)
        self.time_len_ = len(x[0])
        return self.classifier.fit(self._transform_x(x), y)

    def predict(self, x):
        return self.classifier.predict(self._transform_x(x))

    @property
    def response_functions(self):
        # ::-1 because the coefficients are reversed, see `build_matrix `
        return [self.classifier.coef_[i*self.time_len_:(i+1)*self.time_len_][::-1]
                for i in range(self.number_of_signals_)]

    def __getattr__(self, item):
        return self.classifier.__getattr__(item)

classifier = VARClassifier(Lasso(alpha=0.1))
classifier.fit((x1, x2), y)

plt.subplot(514)
plt.xlabel('delay')
plt.ylabel('fitted response\nfunction 1')

r1, r2 = classifier.response_functions

plt.plot(t, r1, '.')

plt.subplot(515)
plt.xlabel('delay')
plt.ylabel('fitted response\nfunction 2')
plt.plot(t, r2, '.')

plt.savefig('fit_var.png')

然而,这并没有来自statsmodels的所有好处(例如置信区间,p值,模型指标等)