在优化例程中,我的运行功能非常慢。我以前在函数中有一些小循环(请参见here),运行3次迭代大约需要15分钟。进行以下更改并分析功能后,现在只需要1个多小时。删除for循环实际上增加了运行时间。最大的耗时总结:
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
2703 3949.306 1.461 4299.286 1.591 foo.py:212(loc_probs)
24387 230.388 0.009 230.390 0.009 {method 'reduce' of 'numpy.ufunc' objects}
2704 75.005 0.028 75.005 0.028 {method 'dot' of 'numpy.ndarray' objects}
2703 44.783 0.017 44.783 0.017 {built-in method numpy.core.multiarray.where}
2703 6.897 0.003 4308.789 1.594 foo.py:258(bc_mod)
3 3.600 1.200 3.600 1.200 {method 'execute' of 'psycopg2.extensions.cursor' objects}
和完整功能:
def loc_probs(params):
MU = np.multiply(arr_nest, params[-1 * NEST_LEN:])
params[-1 * NEST_LEN] = 1
# Get the utility function
v = v_3d.dot(params[:-1*NEST_LEN])
# Create a 3d matrix of records x alts x nests
# Take exponential over the nests
v_mu_3d = np.exp(v[:,:,None] * MU)
# Component 1
# Sum over the alts
p1_3d = v_mu_3d / v_mu_3d.sum(axis=1)[:,None,:]
# Component 2
# Sum over the alts
num = v_mu_3d.sum(axis=1)
# Divide over alts by MU for each nest
num = np.where(MU > 0, num[:, None, :] / MU, 0)
# Sum over the nests
p2_3d = num / num.sum(axis=2)[:, :, None]
p = p1_3d * p2_3d
p = p.sum(axis=2)
# Pass a 2d matrix of probabilities to the main function (rows x alts)
return p
据我所知,它是在numpy中向量化的,并且应该相对较快地运行(与使用循环等的实现相比)。底层的numpy似乎使用了一个我不熟悉的低效流程。我已经尝试了使用cython的for循环方法,但是这给了我最初的实现一个类似的时间(约15分钟)。实际的优化最初需要进行45次迭代才能收敛(大约需要6个小时),因此1个小时的3次迭代是不可接受的。
编辑带有for循环的版本(注意:MU非常稀疏)
def loc_probs(params):
MU_sp = sp_arr_nest.multiply(params[-1 * NEST_LEN:])
MU = MU_sp.todense()
params[-1 * NEST_LEN] = 1
# Get the utility function
v = v_3d.dot(params[:-1*NEST_LEN])
# Fix first nest parameter
params[-1 * NEST_LEN] = 1
# Create a 3d matrix of records x alts x nests
v_mu_3d = np.zeros((v.shape[0], ALT_LEN, NEST_LEN))
for k in range(NEST_LEN):
v_mu_3d[:,:,k] = v * MU[:,k]
v_mu_3d = np.exp(v_mu_3d)
# Component 1
p1_3d = np.zeros((v.shape[0], ALT_LEN, NEST_LEN))
for k in range(NEST_LEN):
num = v_mu_3d[:,:,k]
denom = v_mu_3d[:,:,k].sum(axis=1)
denom = denom[:,np.newaxis]
p1_3d[:, :, k] = num / denom
# Component 2
p2_3d = np.zeros((v.shape[0], ALT_LEN, NEST_LEN))
for j in range(ALT_LEN):
num = v_mu_3d[:,:,:].sum(axis=1)
num = np.log(num)
temp_MU = MU[j,:]
num = ne.evaluate('where(temp_MU >0, num / temp_MU, 0)')
denom = num.sum(axis=1)
denom = denom[:, np.newaxis]
p2_3d[:, j, :] = num / denom
p = p1_3d * p2_3d
p = p.sum(axis=2)
# Pass a 2d matrix of probabilities to the main function (rows x alts)
return p
这是用于从数据库获取输入数据并在一组静态参数上调用loc_prob()函数的配置文件
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
3 3.581 1.194 3.581 1.194 {method 'execute' of 'psycopg2.extensions.cursor' objects}
1 1.502 1.502 1.656 1.656 foo.py:212(loc_probs)
3 0.867 0.289 0.867 0.289 {method 'fetchall' of 'psycopg2.extensions.cursor' objects}
200 0.579 0.003 0.579 0.003 {built-in method numpy.core.multiarray.array}
817 0.127 0.000 0.127 0.000 {built-in method marshal.loads}
19 0.098 0.005 0.098 0.005 {method 'reduce' of 'numpy.ufunc' objects}
1 0.074 0.074 7.811 7.811 foo.py:10(<module>)
140/110 0.070 0.001 0.102 0.001 {built-in method _imp.create_dynamic}
2605/2561 0.055 0.000 0.176 0.000 {built-in method builtins.__build_class__}
1 0.055 0.055 0.055 0.055 {built-in method numpy.core.multiarray.concatenate}
1249/1 0.052 0.000 7.811 7.811 {built-in method builtins.exec}
这是一个最小程序(被'maxiter':1限制为1次迭代):
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from scipy import sparse
HEAD_LEN = 4
NAICS_LEN = 9
dat_ASC = np.random.randint(1,32,size=(642878))
dummyASC = pd.get_dummies(dat_ASC, drop_first=True).values
ASC_LEN = dummyASC.shape[1]
ALT_LEN = ASC_LEN + 1
chosen = np.random.randint(0,2,size=(642878))
chosen = chosen.reshape((-1, ALT_LEN))
chosen_spar = sparse.csr_matrix(chosen)
dat_CT = np.random.randint(0,6,size=(642878))
VAR_LEN = 1
dat_SALES = np.random.randint(1,100,size=(642878))
VAR_LEN += 1
dat_AREA = np.random.randint(0,100,size=(642878,1))
dat_NAICS = np.random.randint(1,5,size=(642878))
dummyNAICS = pd.get_dummies(dat_NAICS, drop_first=True).values
dummyNAICS = np.multiply(dummyNAICS, dat_AREA)
DUM_LEN = dummyNAICS.shape[1]
VAR_LEN += DUM_LEN
# Params defined by 30 ASC codes for each alt (-1)
params = np.array((0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1))
# Define the nests as a matrix to be multiplied by the utility matrix (alts x nest alts)
NEST_LEN = 14
arr_nest = np.random.randint(0,2,size=(ALT_LEN, NEST_LEN))
# Build the utility function
v_ASC = dummyASC.reshape((-1, ALT_LEN, dummyASC.shape[1]))
v_CT = dat_CT.reshape((-1, ALT_LEN, 1))
v_SALES = dat_SALES.reshape((-1, ALT_LEN, 1))
v_NAICS = dummyNAICS.reshape((-1, ALT_LEN, dummyNAICS.shape[1]))
# Create a 3d matrix of records x alts x attributes
v_3d = np.concatenate((v_ASC, v_CT, v_SALES, v_NAICS), axis=2)
def loc_probs(params):
MU = np.multiply(arr_nest, params[-1 * NEST_LEN:])
params[-1 * NEST_LEN] = 1
# Get the utility function
v = v_3d.dot(params[:-1*NEST_LEN])
# Take exponential over the nests
v_mu_3d = np.exp(v[:,:,None] * MU)
# Component 1
# Sum over the alts
p1_3d = v_mu_3d / v_mu_3d.sum(axis=1)[:,None,:]
# Component 2
# Sum over the alts
num = v_mu_3d.sum(axis=1)
# Divide over alts by MU for each nest
num = np.where(MU > 0, num[:, None, :] / MU, 0)
# Sum over the nests
p2_3d = num / num.sum(axis=2)[:, :, None]
p = p1_3d * p2_3d
p = p.sum(axis=2)
# Pass a 2d matrix of probabilities to the main function (rows x alts)
return p
def bc_mod(params):
LOC_PROB = loc_probs(params)
LP_BR = np.log(LOC_PROB)
llfun = chosen_spar.multiply(LP_BR)
llfun = llfun.sum()
return -1 * llfun
# Run minimization
res = minimize(bc_mod, params, method='BFGS', options = {'maxiter':1,'disp': True})
print(res.x)