Question

我试图使用数据扩充来做一些贝叶斯算子代码。如果我循环输出矩阵的行，我可以让它工作，但我想对它进行矢量化并一次性完成所有操作（可能更快）。

import numpy as np
from numpy import random
import statsmodels.api as sm
from scipy import stats
from scipy.stats import norm, truncnorm
##################################
### Create some simulated data ###
num_leg = 50
num_bills = 20

a = np.random.uniform(-1,1,num_bills).reshape(num_bills, 1)
b = np.random.uniform(-2,2,num_bills).reshape(num_bills, 1)
x = np.random.standard_normal(num_leg).reshape(num_leg, 1)
ystar_base = a + np.dot(b,x.T)
epsilon = np.random.standard_normal(num_leg * num_bills).reshape(num_bills, num_leg)
ystar = ystar_base + epsilon
y = 1*(ystar >0)

### Initialize some stuff I need ###
avec = [0]*num_bills # These are bill parameters
bvec = [0]*num_bills
betavec = [np.matrix(zip(avec,bvec))]
xvec = [0]*num_leg # these are legislator parameters 
_ones = np.ones(num_leg)

def init_y(mat): # initialize a latent y matrix
    if mat==1: return truncnorm.rvs(0,10000)
    else: return truncnorm.rvs(-10000,0)

vectorize_y = np.vectorize(init_y)
latent_y = np.matrix(vectorize_y(y))

burn = 500 # How long to run the MCMC
runs = 500

### define the functions ###
def sample_params(xnow,ynow): # This is the function I'd like to vectorize
    if type(xnow) == list:
        xnow = np.array(xnow)
    if type(ynow) == list:
        ynow = np.array(ynow)

    ynow = ynow.T  #reshape(ynow.shape[0],1)
    sigma = np.linalg.inv(np.dot(xnow.T,xnow)) ###This is the line that produces an error###
    xy = np.dot(xnow.T,ynow)
    mu = np.dot(sigma, xy) # this is just (x'x)inv x'y
    return np.random.multivariate_normal(np.array(mu).flatten(), sigma)

vecparams = np.vectorize(sample_params)

def get_mu(xnow, bnow): # getting the updated mean to draw the latent ys
    if type(xnow) == list:
        xnow = np.array(xnow)
    if type(bnow) == list:
        bnow = np.array(bnow)
    mu = np.dot(xnow,bnow.T)
    mu = np.matrix(mu)
    return mu

def sample_y(mu, ynow): # generate latent y matrix
    if ynow==1: 
        a, b = (0 - mu),(10000-mu)
    else: 
        a, b = (-10000 - mu),(0-mu)
    return truncnorm.rvs(a,b)

vector_sample = np.vectorize(sample_y) # I'd like to be able to do something like this

### Here's the MCMC loop with the internal loop over rows(bills)
for i in range(burn+runs):
    this_beta = []
    this_x = []
    this_y = []
    for j in range(num_bills): #I'd like to get rid of this loop
        ex = zip(x_ones, x)
        newbeta = sample_params(ex, latent_y[j])
        this_beta.append(newbeta)
    #ex = np.array(zip(x_ones, x))
    #this_beta = vecparams(ex, latent_y[:,]) # and call the vectorized function here
    betavec.append(this_beta)

    #Note, I can vectorize the latent outputs easily enough here
    mean = get_mu(ex, betavec[-1])
    latent_y = np.matrix(vector_sample(mean, np.matrix(y).T).T.reshape(latent_y.shape[0], latent_y.shape[1]))

### Now a bit of code to check to see if I've recovered what I want ###
test_beta = [zip(*(z)) for z in betavec[burn:]]
test_a = np.array([z[0] for z in test_beta])
test_b = np.array([z[1] for z in test_beta])

amean = test_a.sum(axis = 0)/float(runs)
bmean = test_b.sum(axis = 0)/float(runs)

print 'a mean'
print np.corrcoef([amean, np.array(a)])
print
print 'b mean'
print np.corrcoef([bmean, np.array(b)])

如果我注释掉循环并使用上面注释掉的行，我会在前面指出的行（定义sigma的行）中收到以下错误：

LinAlgError: 0-dimensional array given. Array must be at least two-dimensional

使用numpy矢量化

0 个答案: