我正在使用numpy.loadtxt()
从目录加载一系列文件并将其加载到两个数组中。每个文件都是一个包含不同行数的双列csv文件。
我注意到代码非常慢,98%的时间花在了代码的阅读文本部分; [yout1,yout2]=numpy.loadtxt('training_set/mod_data/'+fnamelst[riter],delimiter=',',usecols=(0,1), dtype=int,unpack=True)
我已经使用line_profiler对代码进行了分析,并附加了输出。
有没有办法让这一步更快?
感谢。
import numpy as numpy
import scipy.io
from math import log
from scipy.sparse import coo_matrix
import time
mat = scipy.io.loadmat('netf_proc_data.mat',variable_names='nMc')
nMc=mat['nMc']
frosq=numpy.sum(nMc)
un=2649429+1
#m=17770
iter=0
samples= numpy.ceil(8*log(m))
Indexi=Indexj=SampMatrix=numpy.zeros(2*samples)
fnamelst=numpy.loadtxt('training_set/print.txt',usecols=(0,), dtype='str')
for i in range(0,1):
[xout1,xout2]=numpy.loadtxt('training_set/mod_data/'+fnamelst[i],delimiter=',',usecols=(0,1), dtype=int,unpack=True)
x=coo_matrix((xout2,(xout1,numpy.zeros(len(xout1)))),shape=(un,1),dtype=int)
# sampling
P=samples*(nMc[i]*numpy.ones((m,1))+nMc)/(2*frosq)
for riter in range(0,m):
indices=numpy.random.binomial(1,P[riter])
if indices==1:
[yout1,yout2]=numpy.loadtxt('training_set/mod_data/'+fnamelst[riter],delimiter=',',usecols=(0,1), dtype=int,unpack=True)
y=coo_matrix((yout2,(yout1,numpy.zeros(len(yout1)))),shape=(un,1),dtype=int)
Indexi[iter]=i
Indexj[iter]=riter
SampMatrix[iter]=x.T.dot(y)[0,0]
iter=iter+1
# end
print i
#end
计时器单位:3.66606e-07 s
总时间:91.5677秒
15 def read_netf(m):
16 1 10 10.0 0.0 t0 = time.time()
17 1 3733 3733.0 0.0 mat = scipy.io.loadmat('netf_proc_data.mat',variable_names='nMc')
18 1 7 7.0 0.0 nMc=mat['nMc']
19 1 313 313.0 0.0 frosq=numpy.sum(nMc)
20 1 5 5.0 0.0 un=2649429+1
21 #m=17770
22 1 4 4.0 0.0 iter=0
23 1 42 42.0 0.0 samples= numpy.ceil(8*log(m))
24 1 116 116.0 0.0 Indexi=Indexj=SampMatrix=numpy.zeros(2*samples)
25 1 935968 935968.0 0.4 fnamelst=numpy.loadtxt('training_set/print.txt',usecols=(0,), dtype='str')
26
27 2 20 10.0 0.0 for i in range(0,1):
28 1 46857 46857.0 0.0 [xout1,xout2]=numpy.loadtxt('training_set/mod_data/'+fnamelst[i],delimiter=',',usecols=(0,1), dtype=int,unpack=True)
29 1 652 652.0 0.0 x=coo_matrix((xout2,(xout1,numpy.zeros(len(xout1)))),shape=(un,1),dtype=int)
30
31 # sampling
32 1 766 766.0 0.0 P=samples*(nMc[i]*numpy.ones((m,1))+nMc)/(2*frosq)
33 17771 59955 3.4 0.0 for riter in range(0,m):
34 17770 191466 10.8 0.1 indices=numpy.random.binomial(1,P[riter])
35 17770 68157 3.8 0.0 if indices==1:
36 48 244680827 5097517.2 98.0 [yout1,yout2]=numpy.loadtxt('training_set/mod_data/'+fnamelst[riter],delimiter=',',usecols=(0,1), dtype=int,unpack=True)
37 48 131806 2746.0 0.1 y=coo_matrix((yout2,(yout1,numpy.zeros(len(yout1)))),shape=(un,1),dtype=int)
38 48 760 15.8 0.0 Indexi[iter]=i
39 48 230 4.8 0.0 Indexj[iter]=riter
40 48 3649053 76021.9 1.5 SampMatrix[iter]=x.T.dot(y)[0,0]
41 48 470 9.8 0.0 iter=iter+1
42 # end
43 1 241 241.0 0.0 print i
44 #end
45 1 6 6.0 0.0 t1 = time.time()
46 1 3 3.0 0.0 total = t1-t0
47 1 3 3.0 0.0 return(total)