我希望能够将稀疏的scipy矩阵分开并将它们重新组合在一起。
例如,从稀疏数组开始:
# 0 1 0
# 3 0 5
# 0 7 0
将其拆分为6个稀疏数组:
# 0 1 0
# 3 0 5
#
# 0 7 0
换句话说,我想要函数split_sparse和merge_sparse,以便跟随测试传递:
# Humpty Dumpy sat on a wall
mat = np.arange(9)
mat[::2] = 0
mat=mat.reshape(3, 3)
mat=csr_matrix(mat)
# Humpy dumpty had a great fall
row_divs = [2]
col_divs = [1, 2]
split_mat = split_sparse(mat, row_divs, col_divs)
sparse_eq = lambda x, y: (x-y).nnz == 0
# All the kings horses and all the kings men
assert sparse_eq(split_mat[0, 0], csr_matrix([[0], [3]]))
assert sparse_eq(split_mat[0, 1], csr_matrix([[1], [0]]))
assert sparse_eq(split_mat[0, 2], csr_matrix([[0], [5]]))
assert sparse_eq(split_mat[1, 0], csr_matrix([[0]]))
assert sparse_eq(split_mat[1, 1], csr_matrix([[7]]))
assert sparse_eq(split_mat[1, 2], csr_matrix([[0]]))
# Pooled their efforts and put Humpy together again.
assert sparse_eq(merge_sparse(split_mat), mat)
答案 0 :(得分:0)
像往常一样出色的问题彼得。这是你的答案。
编辑:正如hpaulj指出的那样,该解决方案与稀疏的数组无关,它只是利用了scipy已经有切片,hstack和vstack的高效实现这一事实。from scipy.sparse import hstack, vstack
import numpy as np
def split_sparse(mat, row_divs = [], col_divs = []):
'''
mat is a sparse matrix
row_divs is a list of divisions between rows. N row_divs will produce N+1 rows of sparse matrices
col_divs is a list of divisions between cols. N col_divs will produce N+1 cols of sparse matrices
return a 2-D array of sparse matrices
'''
row_divs = [None]+row_divs+[None]
col_divs = [None]+col_divs+[None]
mat_of_mats = np.empty((len(row_divs)-1, len(col_divs)-1), dtype = type(mat))
for i, (rs, re) in enumerate(zip(row_divs[:-1], row_divs[1:])):
for j, (cs, ce) in enumerate(zip(col_divs[:-1], col_divs[1:])):
mat_of_mats[i, j] = mat[rs:re, cs:ce]
return mat_of_mats
def merge_sparse(mat_of_mats):
'''
mat_of_mats is a 2D array of sparse matrices where:
mat_of_mats[i, j1].shape[0] == mat_of_mats[i, j2].shape[0] for any j1, j2
mat_of_mats[i1, j].shape[1] == mat_of_mats[i2, j].shape[1] for any i1, i2
i.e. They can be tiled together.
Merge them together into a single sparse matrix.
'''
rows = [hstack(m) for m in mat_of_mats]
newmat = vstack(rows)
return newmat