Question

在优化例程中，我的运行功能非常慢。我以前在函数中有一些小循环（请参见here），运行3次迭代大约需要15分钟。进行以下更改并分析功能后，现在只需要1个多小时。删除for循环实际上增加了运行时间。最大的耗时总结：

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     2703 3949.306    1.461 4299.286    1.591 foo.py:212(loc_probs)
    24387  230.388    0.009  230.390    0.009 {method 'reduce' of 'numpy.ufunc' objects}
     2704   75.005    0.028   75.005    0.028 {method 'dot' of 'numpy.ndarray' objects}
     2703   44.783    0.017   44.783    0.017 {built-in method numpy.core.multiarray.where}
     2703    6.897    0.003 4308.789    1.594 foo.py:258(bc_mod)
        3    3.600    1.200    3.600    1.200 {method 'execute' of 'psycopg2.extensions.cursor' objects}

和完整功能：

def loc_probs(params):
    MU = np.multiply(arr_nest, params[-1 * NEST_LEN:])
    params[-1 * NEST_LEN] = 1

    # Get the utility function
    v = v_3d.dot(params[:-1*NEST_LEN])

    # Create a 3d matrix of records x alts x nests
    # Take exponential over the nests
    v_mu_3d = np.exp(v[:,:,None] * MU)

    # Component 1
    # Sum over the alts
    p1_3d = v_mu_3d / v_mu_3d.sum(axis=1)[:,None,:]

    # Component 2
    # Sum over the alts
    num = v_mu_3d.sum(axis=1)
    # Divide over alts by MU for each nest
    num = np.where(MU > 0, num[:, None, :] / MU, 0)
    # Sum over the nests
    p2_3d = num / num.sum(axis=2)[:, :, None]

    p = p1_3d * p2_3d
    p = p.sum(axis=2)

    # Pass a 2d matrix of probabilities to the main function (rows x alts)
    return p

据我所知，它是在numpy中向量化的，并且应该相对较快地运行（与使用循环等的实现相比）。底层的numpy似乎使用了一个我不熟悉的低效流程。我已经尝试了使用cython的for循环方法，但是这给了我最初的实现一个类似的时间（约15分钟）。实际的优化最初需要进行45次迭代才能收敛（大约需要6个小时），因此1个小时的3次迭代是不可接受的。

编辑带有for循环的版本（注意：MU非常稀疏）

def loc_probs(params):
    MU_sp = sp_arr_nest.multiply(params[-1 * NEST_LEN:])
    MU = MU_sp.todense()
    params[-1 * NEST_LEN] = 1

    # Get the utility function
    v = v_3d.dot(params[:-1*NEST_LEN])

    # Fix first nest parameter
    params[-1 * NEST_LEN] = 1

    # Create a 3d matrix of records x alts x nests
    v_mu_3d = np.zeros((v.shape[0], ALT_LEN, NEST_LEN))
    for k in range(NEST_LEN):
        v_mu_3d[:,:,k] = v * MU[:,k]
    v_mu_3d = np.exp(v_mu_3d)

    # Component 1
    p1_3d = np.zeros((v.shape[0], ALT_LEN, NEST_LEN))
    for k in range(NEST_LEN):
        num = v_mu_3d[:,:,k]
        denom = v_mu_3d[:,:,k].sum(axis=1)
        denom = denom[:,np.newaxis]
        p1_3d[:, :, k] = num / denom
    # Component 2
    p2_3d = np.zeros((v.shape[0], ALT_LEN, NEST_LEN))
    for j in range(ALT_LEN):
        num = v_mu_3d[:,:,:].sum(axis=1)
        num = np.log(num)
        temp_MU = MU[j,:]
        num = ne.evaluate('where(temp_MU >0, num / temp_MU, 0)')
        denom = num.sum(axis=1)
        denom = denom[:, np.newaxis]
        p2_3d[:, j, :] = num / denom

    p = p1_3d * p2_3d
    p = p.sum(axis=2)

    # Pass a 2d matrix of probabilities to the main function (rows x alts)
    return p

这是用于从数据库获取输入数据并在一组静态参数上调用loc_prob（）函数的配置文件

Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        3    3.581    1.194    3.581    1.194 {method 'execute' of 'psycopg2.extensions.cursor' objects}
        1    1.502    1.502    1.656    1.656 foo.py:212(loc_probs)
        3    0.867    0.289    0.867    0.289 {method 'fetchall' of 'psycopg2.extensions.cursor' objects}
      200    0.579    0.003    0.579    0.003 {built-in method numpy.core.multiarray.array}
      817    0.127    0.000    0.127    0.000 {built-in method marshal.loads}
       19    0.098    0.005    0.098    0.005 {method 'reduce' of 'numpy.ufunc' objects}
        1    0.074    0.074    7.811    7.811 foo.py:10(<module>)
  140/110    0.070    0.001    0.102    0.001 {built-in method _imp.create_dynamic}
2605/2561    0.055    0.000    0.176    0.000 {built-in method builtins.__build_class__}
        1    0.055    0.055    0.055    0.055 {built-in method numpy.core.multiarray.concatenate}
   1249/1    0.052    0.000    7.811    7.811 {built-in method builtins.exec}

这是一个最小程序（被'maxiter'：1限制为1次迭代）：

import pandas as pd
import numpy as np
from scipy.optimize import minimize
from scipy import sparse

HEAD_LEN = 4

NAICS_LEN = 9

dat_ASC = np.random.randint(1,32,size=(642878))

dummyASC = pd.get_dummies(dat_ASC, drop_first=True).values
ASC_LEN = dummyASC.shape[1]
ALT_LEN = ASC_LEN + 1

chosen = np.random.randint(0,2,size=(642878))
chosen = chosen.reshape((-1, ALT_LEN))
chosen_spar = sparse.csr_matrix(chosen)

dat_CT = np.random.randint(0,6,size=(642878))
VAR_LEN = 1

dat_SALES = np.random.randint(1,100,size=(642878))
VAR_LEN += 1

dat_AREA = np.random.randint(0,100,size=(642878,1))

dat_NAICS = np.random.randint(1,5,size=(642878))

dummyNAICS = pd.get_dummies(dat_NAICS, drop_first=True).values
dummyNAICS = np.multiply(dummyNAICS, dat_AREA)
DUM_LEN = dummyNAICS.shape[1]
VAR_LEN += DUM_LEN

# Params defined by 30 ASC codes for each alt (-1)
params = np.array((0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1))


# Define the nests as a matrix to be multiplied by the utility matrix (alts x nest alts)
NEST_LEN = 14
arr_nest = np.random.randint(0,2,size=(ALT_LEN, NEST_LEN))

# Build the utility function
v_ASC = dummyASC.reshape((-1, ALT_LEN, dummyASC.shape[1]))
v_CT = dat_CT.reshape((-1, ALT_LEN, 1))
v_SALES = dat_SALES.reshape((-1, ALT_LEN, 1))
v_NAICS = dummyNAICS.reshape((-1, ALT_LEN, dummyNAICS.shape[1]))

# Create a 3d matrix of records x alts x attributes
v_3d = np.concatenate((v_ASC, v_CT, v_SALES, v_NAICS), axis=2)

def loc_probs(params):
    MU = np.multiply(arr_nest, params[-1 * NEST_LEN:])
    params[-1 * NEST_LEN] = 1

    # Get the utility function
    v = v_3d.dot(params[:-1*NEST_LEN])

    # Take exponential over the nests
    v_mu_3d = np.exp(v[:,:,None] * MU)

    # Component 1
    # Sum over the alts
    p1_3d = v_mu_3d / v_mu_3d.sum(axis=1)[:,None,:]

    # Component 2
    # Sum over the alts
    num = v_mu_3d.sum(axis=1)
    # Divide over alts by MU for each nest
    num = np.where(MU > 0, num[:, None, :] / MU, 0)
    # Sum over the nests
    p2_3d = num / num.sum(axis=2)[:, :, None]

    p = p1_3d * p2_3d
    p = p.sum(axis=2)

    # Pass a 2d matrix of probabilities to the main function (rows x alts)
    return p
def bc_mod(params):
    LOC_PROB = loc_probs(params)
    LP_BR = np.log(LOC_PROB)
    llfun = chosen_spar.multiply(LP_BR)
    llfun = llfun.sum()
    return -1 * llfun

# Run minimization
res = minimize(bc_mod, params, method='BFGS', options = {'maxiter':1,'disp': True})
print(res.x)

对函数进行缓慢的numpy评估

0 个答案: