hdf5数组的点积

时间:2016-03-23 09:53:45

标签: python hdf5 h5py

我正在创建神经网络,其输入数组是hdf5数组X,权重矩阵是W1。我正在尝试做这些2的点积,如下所示,并在其他hdf5数组中进行扫描。

f = h5py.File('z2.hdf5')
self.f['z2'] = np.dot(X,self.W1)

但是上面的行给了我MemoryError。怎么解决?如何执行hdf5数组的点积?

1 个答案:

答案 0 :(得分:2)

来自http://dask.pydata.org/en/latest/array-overview.html

""" Dask Array使用阻塞算法实现NumPy ndarray接口的子集,将大型阵列切割成许多小型阵列。这使我们可以使用所有内核在大于内存的数组上进行计算。"""

""" dask.array库支持numpy的以下接口:

...

张力收缩/点积/矩阵乘法,tensordot"""

为了说明的工作示例,尝试不同的维度来查看numpy与dask的表现。

import dask as dk
import tables
import numpy as np
from time import time

outpath = "/tmp/"
lenx = 300
leny = 100000
fname = "t{0:03d}_{1:03d}.h5".format(int(lenx/100),int(leny/100))

def write_test_file():
    h5file = tables.open_file(outpath+fname,"w")
    pres = np.random.random((lenx,leny))
    atom = tables.Float64Atom()
    filters = tables.Filters(complevel=6, complib='zlib', shuffle=True)
    print("Writing data")
    t01 = time()
    h5file.create_carray(h5file.root,'pressure',atom,(lenx,leny),filters=filters,obj=pres)
    h5file.flush()
    del pres
    t02 = time()
    lines = np.random.random((leny,lenx))
    h5file.create_carray(h5file.root,"lines",atom,(leny,lenx),filters=filters,obj=lines)
    t03 = time()
    print("Data written",t03-t02,t02-t01)
    h5file.close()

def numpy_dot_test():
    print("Open data")
    t1 = time()
    h5open = tables.open_file(outpath+fname,mode="r")
    pressureObject = h5open.get_node("/", "pressure")
    print(pressureObject.shape)
    linesObject=h5open.get_node("/","lines")
    print(linesObject.shape)
    t2 = time()
    ohoo = np.array(linesObject).dot(np.array(pressureObject))
    t3 = time()
    print(ohoo.shape,np.mean(ohoo))
    print("matmul time:",t3-t2,t2-t1)
    h5open.close()

def dask_dot_test():
    import h5py
    import dask.array as da
    h5open2 = h5py.File(outpath+fname)
    t21=time()
    d1=da.from_array(h5open2["/pressure"],chunks=(100,lenx))
    d2=da.from_array(h5open2["/lines"],chunks=(leny,100))
    t22=time()
    print('d1,d2',d1.shape,d2.shape)
    d1.dot(d2).to_hdf5(outpath+'output.h5','testout')
    t23=time()
    print('ohoo',t23-t22,t22-t21)
    h5open2.close()


write_test_file()
    ## numpy_dot_test()
dask_dot_test()