我有一个庞大的二进制数据文件(8GB),像稀疏矩阵(非零数据,行索引,col索引)那样写,出于速度原因,我想在C中读取/导入它。然后,一旦构造好矩阵,出于显示原因,我想将其导入Python(带有ctypes接口)。目前,我设法只使用python编写了整个代码,并且可以正常工作。我不喜欢它,因为它消耗大量内存。这就是为什么我要在2中解决问题。
有人知道如何用C语言编写稀疏矩阵并通过ctypes以稀疏python格式导入它吗?
谢谢。
下面,您将找到我的纯Python代码以读取二进制文件并导入数据,以将其作为稀疏矩阵返回。
<!-- language: lang-py -->
def Sparse_read(binFileName,DoseGridSize,NbrSpots):
NbrVoxels = DoseGridSize[0]*DoseGridSize[1]*DoseGridSize[2]
sparse2DMatrix = np.array((NbrVoxels,NbrSpots))
try:
fid = open(binFileName,'rb')
except IOError:
print('Unable to open file ', binFileName)
col_index = []
row_index = []
beamlet_data = []
last_stacked_col = 0
num_unstacked_col = 1
for i in range(NbrSpots):
[NonZeroVoxels] = struct.unpack('i', fid.read(4))
[BeamID] = struct.unpack('i', fid.read(4))
[LayerID] = struct.unpack('i', fid.read(4))
[xcoord] = struct.unpack('<f',fid.read(4))
[ycoord] = struct.unpack('<f', fid.read(4))
print("Spot " + str(i) + ": BeamID=" + str(BeamID) + " LayerID=" + str(LayerID) + " Position=(" + str(xcoord) + ";" + str(ycoord) + ")")
ReadVoxels = 0
while(1):
[NbrContinuousValues] = struct.unpack('i',fid.read(4))
ReadVoxels+=NbrContinuousValues
[FirstIndex] = struct.unpack('i',fid.read(4))
for j in range(NbrContinuousValues):
[temp] = struct.unpack('<f',fid.read(4))
beamlet_data.append(temp)
row_index.append(FirstIndex+j)
if (ReadVoxels >= NonZeroVoxels):
index_list = np.ones((NonZeroVoxels,), dtype=int) * (i-last_stacked_col)
col_index = col_index + index_list.tolist()
if i == 0:
sparse2DMatrix = sp.csc_matrix((beamlet_data, (row_index, col_index)), shape=(NbrVoxels, 1), dtype=np.float32)
row_index = []
col_index = []
beamlet_data = []
last_stacked_col = i+1
num_unstacked_col = 1
elif(len(beamlet_data) > 1e7):
A = sp.csc_matrix((beamlet_data, (row_index, col_index)), shape=(NbrVoxels, num_unstacked_col),dtype=np.float32)
sparse2DMatrix = sp.hstack([sparse2DMatrix, A])
row_index = []
col_index = []
beamlet_data = []
last_stacked_col = i+1
num_unstacked_col = 1
else:
num_unstacked_col += 1
break
# stack last cols
A = sp.csc_matrix((beamlet_data, (row_index, col_index)), shape=(NbrVoxels, num_unstacked_col-1), dtype=np.float32)
sparse2DMatrix = sp.hstack([sparse2DMatrix, A])
fid.close()
return sparse2DMatrix