我正在尝试加载非常大的数据以适合我的计算机内存。加载数据时,出现内存错误。 我正在从hdf5文件加载数据。数据超过45GB。是否有可能以较小的块加载数据。以下是我当前用于加载训练数据的功能。
def data_loader(MODE):
size_test1=0.10
size_test2=0.14
hf = h5py.File('/srv/data_largeangle/temp_patches_all.hdf5', 'r')
f = h5py.File('/srv/data_largeangle/group_data_final_tb.hdf5', 'r')
if MODE=='train':
init_label=np.asarray(f.get('init_label'))
x_data=np.asarray(f.get('x_data'))
x_data=np.swapaxes(x_data,3,1)
x_data=np.swapaxes(x_data,1,2)
sizeidx=init_label.shape[0]
x_data_train,_,init_label_train,_=train_test_split(x_data,init_label,test_size=size_test1, random_state=42)
x_data_train,x_data_valid,init_label_train,init_label_valid=train_test_split(x_data_train,init_label_train,test_size=size_test2, random_state=42)
x_data=0.0
init_label=0.0
x_affine_data=np.asarray(hf.get('patches'))
x_orig_data=np.asarray(hf.get('patches_orig'))
x_affine_data=x_affine_data[0:sizeidx,:,:,:]
x_orig_data=x_orig_data[0:sizeidx,:,:,:]
x_train_orig, _,x_affine_train, _ = train_test_split(x_orig_data, x_affine_data,test_size=size_test1, random_state=42)
x_train_orig,x_train_orig, x_affine_train,x_affine_valid= train_test_split(x_train_orig,x_affine_train,test_size=size_test2, random_state=42)
x_affine_data=0.0
x_orig_data=0.0
print("Affine Train: {}".format(x_affine_train.shape))
#print("Affine Test: {}".format(x_affine_test.shape))
print("Affine Valid: {}".format(x_affine_valid.shape))
mv_x_group=np.asarray(f.get('mvx_group'))
print("mv_x_group: {}".format(mv_x_group.shape))
mv_x_group=np.swapaxes(mv_x_group,3,1)
mv_x_group=np.swapaxes(mv_x_group,1,2)
print("mv_x_group: {}".format(mv_x_group.shape))
mv_x_group_train,_=train_test_split(mv_x_group,test_size=size_test1, random_state=42)
mv_x_group_train,mv_x_group_valid=train_test_split(mv_x_group_train,test_size=size_test2, random_state=42)
mv_x_group=0.0
print("mv_x_group_train: {}".format(mv_x_group_train.shape))
print("mv_x_group_valid: {}".format(mv_x_group_valid.shape))
mv_y_group=np.asarray(f.get('mvy_group'))
print("mv_y_group: {}".format(mv_y_group.shape))
mv_y_group=np.swapaxes(mv_y_group,3,1)
mv_y_group=np.swapaxes(mv_y_group,1,2)
print("mv_y_group: {}".format(mv_y_group.shape))
mv_y_group_train,_=train_test_split(mv_y_group,test_size=size_test1, random_state=42)
mv_y_group_train,mv_y_group_valid=train_test_split(mv_y_group_train,test_size=size_test2, random_state=42)
mv_y_group=0.0
print("mv_y_group_train: {}".format(mv_y_group_train.shape))
print("mv_y_group_valid: {}".format(mv_y_group_valid.shape))
pv_x_group=np.asarray(f.get('pvx_group'))
print("pv_x_group: {}".format(pv_x_group.shape))
pv_x_group=np.swapaxes(pv_x_group,3,1)
pv_x_group=np.swapaxes(pv_x_group,1,2)
print("pv_y_group: {}".format(pv_x_group.shape))
pv_x_group_train,_=train_test_split(pv_x_group,test_size=size_test1, random_state=42)
pv_x_group_train,pv_x_group_valid=train_test_split(pv_x_group_train,test_size=size_test2, random_state=42)
pv_x_group=0.0
print("pv_x_group_train: {}".format(pv_x_group_train.shape))
print("pv_x_group_valid: {}".format(pv_x_group_valid.shape))
pv_y_group=np.asarray(f.get('pvy_group'))
print("pv_y_group: {}".format(pv_y_group.shape))
pv_y_group=np.swapaxes(pv_y_group,3,1)
pv_y_group=np.swapaxes(pv_y_group,1,2)
print("pv_y_group: {}".format(pv_y_group.shape))
pv_y_group_train,_=train_test_split(pv_y_group,test_size=size_test1, random_state=42)
pv_y_group_train,pv_y_group_valid=train_test_split(pv_y_group_train,test_size=size_test2, random_state=42)
pv_y_group=0.0
print("pv_y_group_train: {}".format(pv_y_group_train.shape))
print("pv_y_group_valid: {}".format(pv_y_group_valid.shape))
return x_train_orig, x_valid_orig,x_affine_train,x_affine_valid,x_data_train,x_data_valid,mv_x_group_train,mv_x_group_valid,mv_y_group_train, mv_y_group_valid,pv_x_group_train,pv_x_group_valid,pv_y_group_train,pv_y_group_valid,init_label_train,init_label_valid