Question

我试图限制hdf5文件的最大字节大小（在我的电脑上存储）。

我的应用程序中的数据集数量是可变的，我可以选择在我的测试设置中添加或删除传感器。来自每个传感器的数据类型也是可变的，包含不同的输入（整数，浮点数，布尔值等）< / p>

所以我正在寻找一种限制hdf5文件大小的方法。根据要求，我在下面上传了一个严格简化的工作示例。任何想法将不胜感激

import numpy as np
import ctypes
import h5py
import time

# ------------------------------------------------------------------------------

class ExampleA(ctypes.Structure):
    _pack_ = 8
    _fields_ = [('fUnderrange', ctypes.c_float),
                ('fOverrange', ctypes.c_float),
                ('bError', ctypes.c_bool),
                ('TXPDO', ctypes.c_int)]

class ExampleB(ctypes.Structure):
    _pack_ = 8
    _fields_ = [('bStatus', ctypes.c_bool),
                ('nValue', ctypes.c_ushort),
                ('fValue2', ctypes.c_float)]

#Generating some random data, usually comes from sensors
#You can ignore this part for now
def generate_data():
    sample_A = ExampleA()
    sample_B = ExampleB()
    bytes_A = bytes()
    bytes_B = bytes()

    for i in range(10):
        sample_A.fUnderrange = np.random.random() #random float [0,1]
        sample_A.fOverrange = 1+5*np.random.random() #random float [1,6]
        sample_A.bError = 0 #No errors
        sample_A.TXPDO = np.random.randint(0,201)  #random int [0,200]
        bytes_A += sample_A

    for i in range(8):
        sample_B.bStatus = 1 #Status ok
        sample_B.nValue = np.random.randint(0,31) #random int [0,30]
        sample_B.fValue2 = 12+3*np.random.random() #random float between [12, 15]
        bytes_B += sample_B

    return bytes_A, bytes_B

#Converting the data to a numpy ndarray
#Have a look at what the data looks like using these variables
rawdata_A, rawdata_B = generate_data()
data_A = np.frombuffer(rawdata_A, dtype=ExampleA)
data_B = np.frombuffer(rawdata_B, dtype=ExampleB)

# ------------------------------------------------------------------------------
#Create the hdf5 file
path = "..\\LogfileTest.h5"
d = {'sensor_A':data_A,
     'sensor_B':data_B}
maxrows = 10000

#Create the file and a dataset for each sensor
with h5py.File(path, 'w') as f:
    for name, data in d.items():
        dset = f.create_dataset(name, shape=data.shape, maxshape=(maxrows,), \
                                dtype=data.dtype, compression='gzip')
        dset.attrs['tracker'] = 0 #Keep track of where the last row of data was written

# ------------------------------------------------------------------------------
#Simulate a loop, getting new data each time
for i in range(30):
    time.sleep(0.1) #New data comes in every 0.1 seconds

    #Generate some random data
    rawdata_A, rawdata_B = generate_data()
    newdata_A = np.frombuffer(rawdata_A, dtype=ExampleA)
    newdata_B = np.frombuffer(rawdata_B, dtype=ExampleB)

    #Open the hdf5 file
    with h5py.File(path, 'r+') as f:

        #Write the data to the file
        for (name,data) in zip(d.keys(), [newdata_A, newdata_B]):

            #Open the correct dataset and check where the last line was written
            dset = f[name]
            tracker = dset.attrs['tracker']

            #Check if the data still fits in the file
            newsize = tracker + len(data)
            if newsize < dset.maxshape[0]:
                dset.resize((newsize,))

            #Write the new data after the tracker
            dset[tracker:tracker+len(data)] = data[:]

            # increment tracker
            dset.attrs['tracker'] += len(data)

            else:
                #Reset the tracker and start from the top of the file
                #I wont bore you with that code for now
                #But I'm looking for a system that checks the maximum size of the file
                #Instead of the maximum rows
                pass

限制h5py文件的最大字节数

0 个答案: