我试图限制hdf5文件的最大字节大小(在我的电脑上存储)。
我的应用程序中的数据集数量是可变的,我可以选择在我的测试设置中添加或删除传感器。来自每个传感器的数据类型也是可变的,包含不同的输入(整数,浮点数,布尔值等)< / p>
所以我正在寻找一种限制hdf5文件大小的方法。根据要求,我在下面上传了一个严格简化的工作示例。任何想法将不胜感激
import numpy as np
import ctypes
import h5py
import time
# ------------------------------------------------------------------------------
class ExampleA(ctypes.Structure):
_pack_ = 8
_fields_ = [('fUnderrange', ctypes.c_float),
('fOverrange', ctypes.c_float),
('bError', ctypes.c_bool),
('TXPDO', ctypes.c_int)]
class ExampleB(ctypes.Structure):
_pack_ = 8
_fields_ = [('bStatus', ctypes.c_bool),
('nValue', ctypes.c_ushort),
('fValue2', ctypes.c_float)]
#Generating some random data, usually comes from sensors
#You can ignore this part for now
def generate_data():
sample_A = ExampleA()
sample_B = ExampleB()
bytes_A = bytes()
bytes_B = bytes()
for i in range(10):
sample_A.fUnderrange = np.random.random() #random float [0,1]
sample_A.fOverrange = 1+5*np.random.random() #random float [1,6]
sample_A.bError = 0 #No errors
sample_A.TXPDO = np.random.randint(0,201) #random int [0,200]
bytes_A += sample_A
for i in range(8):
sample_B.bStatus = 1 #Status ok
sample_B.nValue = np.random.randint(0,31) #random int [0,30]
sample_B.fValue2 = 12+3*np.random.random() #random float between [12, 15]
bytes_B += sample_B
return bytes_A, bytes_B
#Converting the data to a numpy ndarray
#Have a look at what the data looks like using these variables
rawdata_A, rawdata_B = generate_data()
data_A = np.frombuffer(rawdata_A, dtype=ExampleA)
data_B = np.frombuffer(rawdata_B, dtype=ExampleB)
# ------------------------------------------------------------------------------
#Create the hdf5 file
path = "..\\LogfileTest.h5"
d = {'sensor_A':data_A,
'sensor_B':data_B}
maxrows = 10000
#Create the file and a dataset for each sensor
with h5py.File(path, 'w') as f:
for name, data in d.items():
dset = f.create_dataset(name, shape=data.shape, maxshape=(maxrows,), \
dtype=data.dtype, compression='gzip')
dset.attrs['tracker'] = 0 #Keep track of where the last row of data was written
# ------------------------------------------------------------------------------
#Simulate a loop, getting new data each time
for i in range(30):
time.sleep(0.1) #New data comes in every 0.1 seconds
#Generate some random data
rawdata_A, rawdata_B = generate_data()
newdata_A = np.frombuffer(rawdata_A, dtype=ExampleA)
newdata_B = np.frombuffer(rawdata_B, dtype=ExampleB)
#Open the hdf5 file
with h5py.File(path, 'r+') as f:
#Write the data to the file
for (name,data) in zip(d.keys(), [newdata_A, newdata_B]):
#Open the correct dataset and check where the last line was written
dset = f[name]
tracker = dset.attrs['tracker']
#Check if the data still fits in the file
newsize = tracker + len(data)
if newsize < dset.maxshape[0]:
dset.resize((newsize,))
#Write the new data after the tracker
dset[tracker:tracker+len(data)] = data[:]
# increment tracker
dset.attrs['tracker'] += len(data)
else:
#Reset the tracker and start from the top of the file
#I wont bore you with that code for now
#But I'm looking for a system that checks the maximum size of the file
#Instead of the maximum rows
pass