
时间:2019-01-24 11:03:06

标签: python-3.x numpy scipy numeric linear

我正在尝试使用scipy.linalg(或scipy.sparse.linalg)提供的各种算法之一来解决1_000_000 x 1_000_000稀疏矩阵,但是已经通过创建诸如numpy-2D-array这样的矩阵而失败了(MemoryError )。

我想到了这个主意,想起了我几个学期前学习的课程中的内容,使用了get_value(i,j)函数,该函数可以取i,j的索引,如果满足某些条件,则输出1或0 。该功能完全代表了我想要的矩阵。


函数get_value(i,j)将indees作为Parameters并返回相应矩阵条目的值。 范例: 统一矩阵 [[1,0],  [0,1]]

可以通过以下方式以这种方式表示:     def get_value(i,j):         如果i == j,则返回1,否则返回0





2 个答案:

答案 0 :(得分:0)


x, y = expected.shape
A_dok = scipy.sparse.dok_matrix((x, y))
for i, j in itertools.product(range(x), range(y)):
    A_dok[i, j] = get_value(i, j)
A_csr = A_dok.tocsr()
y = scipy.sparse.linalg.spsolve(A_csr, b)


答案 1 :(得分:0)


import numpy as np

def are_equal(i, j):
    return np.int32(i == j)


import numpy as np

def sparse_from_func_chunked(func, shape, chunks=1):
    rows, cols = shape
    # Chunk along largest dimension
    if rows >= cols:
        ii = np.linspace(0, rows, chunks + 1, dtype=int)
        jj = [0, cols]
        ii = [0, rows]
        jj = np.linspace(0, cols, chunks + 1, dtype=int)
    # Each list will contain the nonzero values and indices for each chunk
    values = []
    indices_i = []
    indices_j = []
    # Iterate chunks
    for i_start, i_end in zip(ii[:-1], ii[1:]):
        for j_start, j_end in zip(jj[:-1], jj[1:]):
            # Evaluate function for chunk grid
            chunk = func(*np.mgrid[i_start:i_end, j_start:j_end])
            # Pick nonzero indices
            i_chunk, j_chunk = np.nonzero(chunk)
            # Save chunk sparse data
            indices_i.append(i_chunk + i_start)
            indices_j.append(j_chunk + j_start)
            values.append(chunk[i_chunk, j_chunk])
    # Concatenate all sparse data
    indices_i = np.concatenate(indices_i)
    indices_j = np.concatenate(indices_j)
    values = np.concatenate(values)
    return values, (indices_i, indices_j)


import scipy.sparse

# Make 10x10 identity matrix in three chunks
values, (i, j) = sparse_from_func_chunked(are_equal, (10, 10), chunks=3)
print(scipy.sparse.coo_matrix((values, (i, j))).todense())
# [[1 0 0 0 0 0 0 0 0 0]
#  [0 1 0 0 0 0 0 0 0 0]
#  [0 0 1 0 0 0 0 0 0 0]
#  [0 0 0 1 0 0 0 0 0 0]
#  [0 0 0 0 1 0 0 0 0 0]
#  [0 0 0 0 0 1 0 0 0 0]
#  [0 0 0 0 0 0 1 0 0 0]
#  [0 0 0 0 0 0 0 1 0 0]
#  [0 0 0 0 0 0 0 0 1 0]
#  [0 0 0 0 0 0 0 0 0 1]]



def sparse_from_func_iter(func, shape):
    rows, cols = shape
    values = []
    indices_i = []
    indices_j = []
    for i in range(rows):
        for j in range(cols):
            v = func(i, j)
            if v != 0:
    return values, (indices_i, indices_j)


import scipy.sparse

values, (i, j) = sparse_from_func_iter(are_equal, (10, 10))
print(scipy.sparse.coo_matrix((values, (i, j))).todense())
# [[1 0 0 0 0 0 0 0 0 0]
#  [0 1 0 0 0 0 0 0 0 0]
#  [0 0 1 0 0 0 0 0 0 0]
#  [0 0 0 1 0 0 0 0 0 0]
#  [0 0 0 0 1 0 0 0 0 0]
#  [0 0 0 0 0 1 0 0 0 0]
#  [0 0 0 0 0 0 1 0 0 0]
#  [0 0 0 0 0 0 0 1 0 0]
#  [0 0 0 0 0 0 0 0 1 0]
#  [0 0 0 0 0 0 0 0 0 1]]


%timeit sparse_from_func_chunked(are_equal, (1_000, 1_000), chunks=100)
# 15.1 ms ± 1.11 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
%timeit sparse_from_func_iter(are_equal, (1_000, 1_000))
# 392 ms ± 21.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


import numba as nb

# Compile with Numba
sparse_from_func_iter_nb = nb.njit(sparse_from_func_iter)
# Generator function must be compiled too
are_equal_nb = nb.njit(are_equal)
# Call once to trigger compilation of both functions
sparse_from_func_iter_nb(are_equal_nb, (1, 1))
%timeit sparse_from_func_iter_nb(are_equal_nb, (1_000, 1_000))
# 637 µs ± 3.07 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

因此,它实际上比分块版本要快得多!当然,您必须能够编译您的生成器函数来做到这一点。同样,性能差异可能会随矩阵大小,生成器功能或块数的不同而变化。无论如何,对矩阵的所有1,000,000 x 1,000,000元素进行迭代都将花费相当长的时间(尽管也许您可以进行一次并将稀疏矩阵保存在某个地方)。