我正在python / numpy开发基于代理的劳动力市场模型。该模型侧重于匹配工人和公司的过程,其特征在于l维位串。具有紧密匹配位串的工人和公司匹配在一起。
此时,模型正常运行并产生正确的输出。但是,它非常慢。 20次迭代需要大约77秒。 (我在带有i5处理器和8GB RAM的Macbook Pro上运行该模型)。相比之下,我最初在R中编写模型,其中20次迭代大约需要0.5秒。这看起来很奇怪,因为我读过的所有内容都应该比循环和其他编程函数的R语言快得多。
我花了很多时间尝试优化代码并研究numpy的问题。另外,我尝试在Sage中运行模型,但没有发现任何差异。
我正在附上以下代码的关键部分。如果代码有问题或者我可能错过了numpy还有其他问题,请告诉我。
谢谢,
Daniel Scheer
代码:
from __future__ import division
from numpy import*
import numpy as np
import time
import math as math
NUM_WORKERS = 1000
NUM_FIRMS = 65
ITERATIONS = 20
HIRING_THRESHOLD = 0.4
INTERVIEW_THRESHOLD = 0.2
RANDOM_SEED = 1
SKILLSET_LENGTH = 50
CONS_RETURN = 1
INC_RETURN = 1
RETURN_COEFF = 1.8
PRODUCTIVITY_FACTOR = 0.001
#"corr" function computes closeness between worker i and firm j
def corr(x,y):
return 1-(np.sum(np.abs(x-y))/SKILLSET_LENGTH)
#"skill_evolve" function randomly changes a segment of the firm's skill demand bit string
def skill_evolve(start,end,start1,q,j,firms):
random.seed(q*j)
return around(random.uniform(0,1,(end-start1)))
#"production" function computes firm output
def production(prod):
return (CONS_RETURN*prod)+math.pow(INC_RETURN*prod,RETURN_COEFF)
#"hire_unemp" function loops though unemployed workers and matches them with firms
def hire_unemp(j):
for i in xrange(NUM_WORKERS):
correlation = corr(workers[(applicants[i,0]-1),9:(9+SKILLSET_LENGTH+1)],firms[j,4:(4+SKILLSET_LENGTH+1)])
if (workers[(applicants[i,0]-1),3] == 0 and correlation > HIRING_THRESHOLD and production(correlation*PRODUCTIVITY_FACTOR) >= (production((firms[j,2]+(correlation*PRODUCTIVITY_FACTOR))/(firms[j,1]+1)))):
worker_row = (applicants[i,0]-1)
workers[worker_row,3] = firms[j,0]
workers[worker_row,4] = correlation
workers[worker_row,5] = (workers[worker_row,4]+workers[worker_row,1])*PRODUCTIVITY_FACTOR
firms[j,1] = firms[j,1]+1
firms[j,2] = firms[j,2]+workers[worker_row,5]
firms[j,3] = production(firms[j,2])
workers[worker_row,7] = firms[j,3]/firms[j,1]
#print "iteration",q,"loop unemp","worker",workers[worker_row,0]
break
#"hire_unemp" function loops though employed workers and matches them with firms
def hire_emp(j):
for i in xrange(NUM_WORKERS):
correlation = corr(workers[(applicants[i,0]-1),9:(9+SKILLSET_LENGTH+1)],firms[j,4:(4+SKILLSET_LENGTH+1)])
if (workers[(applicants[i,0]-1),3] > 0 and correlation > HIRING_THRESHOLD and (production((firms[j,2]+(correlation*PRODUCTIVITY_FACTOR))/(firms[j,1]+1) > workers[(applicants[i,0]-1),7]))):
worker_row = (applicants[i,0]-1)
otherfirm_row = (workers[worker_row,3]-1)
#print q,firms[otherfirm_row,0],firms[otherfirm_row,1],"before"
firms[otherfirm_row,1] = firms[otherfirm_row,1]-1
#print q,firms[otherfirm_row,0],firms[otherfirm_row,1],"after"
firms[otherfirm_row,2] = array([max(array([0], float),firms[otherfirm_row,2]-workers[worker_row,5])],float)
firms[otherfirm_row,3] = production(firms[otherfirm_row,2])
workers[worker_row,3] = firms[j,0]
workers[worker_row,4] = correlation
workers[worker_row,5] = (workers[worker_row,4]+workers[worker_row,1])*PRODUCTIVITY_FACTOR
firms[j,1] = firms[j,1]+1
firms[j,2] = firms[j,2]+workers[worker_row,5]
firms[j,3] = CONS_RETURN*firms[j,2]+math.pow(INC_RETURN*firms[j,2],RETURN_COEFF)
workers[worker_row,7] = firms[j,3]/firms[j,1]
#print "iteration",q,"loop emp","worker",workers[worker_row,0]
break
workers = zeros((NUM_WORKERS,9+SKILLSET_LENGTH))
workers[:,0] = arange(1,NUM_WORKERS+1)
random.seed(RANDOM_SEED*1)
workers[:,1] = random.uniform(0,1,NUM_WORKERS)
workers[:,2] = 5
workers[:,3] = 0
workers[:,4] = 0
random.seed(RANDOM_SEED*2)
workers[:, 9:(9+SKILLSET_LENGTH)] = around(random.uniform(0,1,(NUM_WORKERS,SKILLSET_LENGTH)))
random.seed(RANDOM_SEED*3)
firms = zeros((NUM_FIRMS, 4))
firms[:,0] = arange(1,NUM_FIRMS+1)
firms = hstack((firms,around(random.uniform(0,1,(NUM_FIRMS,SKILLSET_LENGTH)))))
start_full = time.time()
for q in arange(ITERATIONS):
random.seed(q)
ordering = random.uniform(0,1,NUM_WORKERS).reshape(-1,1)
applicants = hstack((workers, ordering))
applicants = applicants[applicants[:,(size(applicants,axis=1)-1)].argsort(),]
#Hire workers from unemployment
start_time = time.time()
map(hire_unemp, xrange(NUM_FIRMS))
print "Iteration unemp %2d: %2.5f seconds" % (q, time.time() - start_time)
#Hire workers from employment
start_time = time.time()
map(hire_emp, xrange(NUM_FIRMS))
print "Iteration emp %2d: %2.5f seconds" % (q, time.time() - start_time)