我正在用Python处理一些模拟,并且我有以下代码:
createOrReplaceTempView
然后,我定义以下函数:
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.sandbox.distributions import multivariate as mt
# Simulation parameters
n = 100 # number of assets
T = 360 # time-series size
p = 100 # number of controls
b = 4 # number of relevant controls
# Risk price of newly proposed factor
lambdag1 = 0.5
# Other risk prices
lambdah1 = 0.5 * np.random.multivariate_normal(np.zeros(b), cov = 0.1 * np.identity(b)).reshape((b, 1))
# Zero-beta rate
gamma_0 = 0.5
# Simulating moments (covariances, expected returns and betas)
# Computing cross-sectional residuals (nx1)
Ce1 = 0.5 * np.random.multivariate_normal(mean = np.zeros(n), cov = np.identity(n)).reshape((n,1))
# Computing Ch1 (nxb)
Ch1 = 0.1 * np.transpose(np.random.multivariate_normal(mean = np.zeros(n), cov = np.identity(n), size = b))
# Computing Cg1 (nx1)
xi1 = 0.5 * np.random.normal()
chi1 = 0.5 * np.random.multivariate_normal(mean = np.zeros((b)), cov = np.identity(b)).reshape((1, b))
Cg1 = np.ones((n,1))*np.transpose(xi1) + Ch1.dot(np.transpose(chi1)) + Ce1
# Computing Cz1
eta1 = np.random.multivariate_normal(np.zeros(b), cov = np.identity(b)).reshape((1,b))
Cz1 = Cg1 - Ch1.dot(np.transpose(eta1))
# Computing the cross-section of expected returns
Er = np.ones((n,1)) * gamma_0 + Cg1.dot(lambdag1) + Ch1.dot(lambdah1)
# Computing the betas
# Computing the correlation structure for shocks z1
rhoz1 = 0.5
# Defines the covariance matrix for the important controls. Some correlation is desired to challenge the LASSO.
# Too much "independence" would be unreal. Using a non-random co-variance matrix
rhoh1 = 0.5
Sigmah1 = np.zeros((b, b))
for i in range(0, b):
for j in range(0, b):
Sigmah1[i, j] = 0.5 * rhoh1**(np.abs(i - j))
# Use orthogonality conditions to derive the betas
# betag1 = Cz1 * 1/Sigmaz1[0,0]
betag1 = Cz1 * 1/rhoz1
betah1 = Ch1.dot(np.linalg.inv(Sigmah1)) - betag1.dot(eta1)
# Creating Sigmau
rhou = 0.5
Sigmau = np.zeros((n,n))
for i in range(0, n):
for j in range(0, n):
Sigmau[i, j] = rhou**(np.abs(i - j))
# Defining covariances for h2
theta0 = np.zeros((1, p -b))
red_fac = int(np.floor(0.5*(p-b))) # indices of redundant factors in h2
theta1 = 0.5 * np.hstack((np.transpose(np.random.multivariate_normal(mean = np.zeros(b + 1),
cov = np.identity(b+1),
size = red_fac)),
np.zeros((b+1, p - b - red_fac))))
Cepsilon = np.random.multivariate_normal(mean = np.zeros(p-b), cov = 0.5 * np.identity(p-b), size = n)
# The factor shrinking covariances is a way magnitudes agree. This is important for LASSO
Ch2 = 0.1 * (np.ones((n,1)).dot(theta0) + np.hstack((Cg1, Ch1)).dot(theta1) + Cepsilon)
# Implied covariance matrix for returns
Sigmar = (Cz1 * 1/rhoz1).dot(np.transpose(Cz1)) + Ch1.dot(np.linalg.inv(Sigmah1)).dot(np.transpose(Ch1)) + Sigmau
phi = np.linalg.inv(Sigmar).dot(Ch2)
现在我称它为检查一切正常
def gen_data(n, p, T, b):
"""This function simulates one sample dataset of returns and factors from previously computed covariances and betas.
Outputs:
R (n x T): matrix of returns for n assets and T periods
H (p X T): matrix of p factors and T periods
G (1 X T): matrix of the evolution of the newly proposed factor (T periods)
"""
# Computing the important controls
H1 = np.transpose(np.random.multivariate_normal(mean = np.zeros(b), cov = Sigmah1, size = T))
# Computing the shocks z1t and the factor g_t stacked in G
Z1 = np.random.multivariate_normal(mean = np.zeros(T), cov = np.identity(T)).reshape(1, T)
G = eta1.dot(H1) + Z1
# Simulating the shocks
# Reference for multivariate Student-t: https://github.com/statsmodels/statsmodels/blob/master/statsmodels/sandbox/distributions/multivariate.py
U = mt.multivariate_t_rvs(m = np.zeros(n), S = Sigmau, df = 5, n = T).T
# Finally simulating the returns
R = Er.dot(np.ones((1, T))) + betag1.dot(G) + betah1.dot(H1) + U
# Simulating other factors
H2 = Ch2.T.dot(np.linalg.inv(Sigmar)).dot(R - Er.dot(np.ones((1,T)))) + np.random.multivariate_normal(mean = np.zeros(p-b), cov = np.identity(p-b), size = T).T
H = np.vstack((H1, H2))
# Outputs
return(R, G, H)
到目前为止,太好了。一切都有所需的尺寸,依此类推。当我再次运行上述调用时,该错误开始出现。如果这样做,将出现以下错误:
a, b, c = gen_data(n, p, T, b)
首先,我无法理解该错误消息。其次,我无法理解它第一次运行良好,而第二次却没有运行。这对我来说没有意义。抱歉,这很愚蠢,我是Python新手。有什么想法吗?提前非常感谢!
答案 0 :(得分:0)
a, b, c = gen_data(n, p, T, b)
运行该行代码后,b
失去其原始值4,因为您正在将其分配给函数的中间返回值(即G
)。
因此,大概的问题是,当函数第二次运行时,b
是不合适的值。 (我对numpy一无所知,因此我无法具体解决该错误消息。)