如何添加(不合并,想要执行数学加法,即两个h5文件的元素加法)两个h5文件(让我们很容易1.h5和2.h5)并存储到新的h5文件中,结构相同。我试过以下:
import h5py
f = h5py.File('1.h5','r')
f1=h5py.File('2.h5','r')
f+f1
但是收到以下错误:
TypeError: unsupported operand type(s) for +: 'File' and 'File'
以下是我从f.visititems(lambda name,obj:print(name, obj))
conv2d_37 <HDF5 group "/conv2d_37" (1 members)>
conv2d_37/conv2d_37 <HDF5 group "/conv2d_37/conv2d_37" (2 members)>
conv2d_37/conv2d_37/bias:0 <HDF5 dataset "bias:0": shape (32,), type "<f4">
conv2d_37/conv2d_37/kernel:0 <HDF5 dataset "kernel:0": shape (2, 2, 1, 32), type "<f4">
conv2d_38 <HDF5 group "/conv2d_38" (1 members)>
conv2d_38/conv2d_38 <HDF5 group "/conv2d_38/conv2d_38" (2 members)>
conv2d_38/conv2d_38/bias:0 <HDF5 dataset "bias:0": shape (32,), type "<f4">
conv2d_38/conv2d_38/kernel:0 <HDF5 dataset "kernel:0": shape (2, 2, 32, 32), type "<f4">
conv2d_39 <HDF5 group "/conv2d_39" (1 members)>
conv2d_39/conv2d_39 <HDF5 group "/conv2d_39/conv2d_39" (2 members)>
conv2d_39/conv2d_39/bias:0 <HDF5 dataset "bias:0": shape (64,), type "<f4">
conv2d_39/conv2d_39/kernel:0 <HDF5 dataset "kernel:0": shape (2, 2, 32, 64), type "<f4">
conv2d_40 <HDF5 group "/conv2d_40" (1 members)>
conv2d_40/conv2d_40 <HDF5 group "/conv2d_40/conv2d_40" (2 members)>
conv2d_40/conv2d_40/bias:0 <HDF5 dataset "bias:0": shape (64,), type "<f4">
conv2d_40/conv2d_40/kernel:0 <HDF5 dataset "kernel:0": shape (2, 2, 64, 64), type "<f4">
dense_19 <HDF5 group "/dense_19" (1 members)>
dense_19/dense_19 <HDF5 group "/dense_19/dense_19" (2 members)>
dense_19/dense_19/bias:0 <HDF5 dataset "bias:0": shape (256,), type "<f4">
dense_19/dense_19/kernel:0 <HDF5 dataset "kernel:0": shape (7744, 256), type "<f4">
dense_20 <HDF5 group "/dense_20" (1 members)>
dense_20/dense_20 <HDF5 group "/dense_20/dense_20" (2 members)>
dense_20/dense_20/bias:0 <HDF5 dataset "bias:0": shape (2,), type "<f4">
dense_20/dense_20/kernel:0 <HDF5 dataset "kernel:0": shape (256, 2), type "<f4">
dropout_28 <HDF5 group "/dropout_28" (0 members)>
dropout_29 <HDF5 group "/dropout_29" (0 members)>
dropout_30 <HDF5 group "/dropout_30" (0 members)>
flatten_10 <HDF5 group "/flatten_10" (0 members)>
max_pooling2d_19 <HDF5 group "/max_pooling2d_19" (0 members)>
max_pooling2d_20 <HDF5 group "/max_pooling2d_20" (0 members)>
从评论中复制的代码(不可读的地方)
data = h5py.File('1.h5','r')
new_data = h5py.File('new.hdf5','w')
datasets = getdatasets('/',data)
groups = list(set([i[::-1].split('/',1)[1][::-1]
for i in datasets]))
groups = [i for i in groups if len(i)>0]
idx = np.argsort(np.array([len(i.split('/')) for i in groups]))
groups = [groups[i] for i in idx]
for group in groups:
new_data.create_group(group)
for path in datasets:
group = path[::-1].split('/',1)[1][::-1] if len(group) == 0: group = '/'
data1=h5py.File('2.h5','r') datasets1 = getdatasets('/',data1)
groups1 = list(set([i[::-1].split('/',1)[1][::-1] for i in datasets1]))
groups1 = [i for i in groups1 if len(i)>0]
idx1 = np.argsort(np.array([len(i.split('/')) for i in groups1]))
groups1 = [groups1[i] for i in idx1]
for path in datasets1:
group1 = path[::-1].split('/',1)[1][::-1]
if len(group1) == 0:
group1 = '/' #%%
for key in datasets:
new_data[key] = data[key][...] + data1[key][...]
答案 0 :(得分:1)
我不完全明白你遇到了什么问题,但我确实有一个可以完全符合你想要的工作实现:
import h5py
import numpy as np
# write example files
# -------------------
for name in ['1.hdf5', '2.hdf5']:
data = h5py.File(name,'w')
data['A'] = np.arange(25).reshape(5,5)
data.close()
# support function
# ----------------
def getdatasets(key,archive):
if key[-1] != '/': key += '/'
out = []
for name in archive[key]:
path = key + name
if isinstance(archive[path], h5py.Dataset):
out += [path]
else:
out += getdatasets(path,archive)
return out
# perform copying
# ---------------
# open both source-files and the destination
data1 = h5py.File('1.hdf5' ,'r')
data2 = h5py.File('2.hdf5' ,'r')
new_data = h5py.File('new.hdf5','w')
# get datasets
datasets = sorted(getdatasets('/', data1))
datasets2 = sorted(getdatasets('/', data2))
# check consistency of datasets
# - number
if len(datasets) != len(datasets2):
raise IOError('files not consistent')
# - item-by-item
for a,b in zip(datasets, datasets2):
if a != b:
raise IOError('files not consistent')
# get the group-names from the lists of datasets
groups = list(set([i[::-1].split('/',1)[1][::-1] for i in datasets]))
groups = [i for i in groups if len(i)>0]
# sort groups based on depth
idx = np.argsort(np.array([len(i.split('/')) for i in groups]))
groups = [groups[i] for i in idx]
# create all groups that contain a dataset
for group in groups:
new_data.create_group(group)
# copy (add) datasets
for path in datasets:
# - get group name
group = path[::-1].split('/',1)[1][::-1]
# - minimum group name
if len(group) == 0: group = '/'
# - copy data
new_data[path] = data1[path][...] + data2[path][...]
# verify
# ------
# copy (add) datasets
for path in datasets:
print(new_data[path][...])
# close all files
# ---------------
new_data.close()
data1.close()
data2.close()
确实给出了arange
的两倍作为例子:
[[ 0 2 4 6 8]
[10 12 14 16 18]
[20 22 24 26 28]
[30 32 34 36 38]
[40 42 44 46 48]]
我真的认为这个问题已经answered here了。解释就在那里。