慢速解决方案

Question

我有一些小的对称矩阵，它们是较大对称矩阵的低维表示。我有一个关键的向量，显示高D矩阵的哪些单元应该链接到低D矩阵中的哪些单元。

我想通过在较低维矩阵中填充较大的矩阵及其对应的值来重新创建这些较大的矩阵。我相信应该有一个矢量化的方法，但到目前为止我所能想到的只是一个简单的嵌套for循环，这对于这些矩阵来说非常慢（10k +行和列）。

在这个玩具示例中，键是vec1，低D矩阵是source_mat，高D矩阵是target_mat。我需要创建target_mat，其中每个单元格都根据密钥用source_mat中的相应值填充。

    import pandas as pd
    import numpy as np
    import random

    vec1=[]
    for x in range (0, 100):
        vec1.append(random.randint(0, 19)) #creating the key

    vec1=pd.DataFrame(vec1)
    sizevec1=vec1.shape[0]
    matshape=(sizevec1,sizevec1)
    target_mat=np.zeros(matshape) #key and target have same shape
    target_mat=pd.DataFrame(target_mat)

    temp=np.random.random((20,20))
    source_mat=temp*temp.T

    for row in range(0,target_mat.shape[0]):
        for column in range(0,target_mat.shape[1]):
            print 'row is ', row
            print 'column is', column
            target_mat.iloc[row,column] = source_mat.item(int(vec1.iloc[row]), int(vec1.iloc[column]))

Answer 1

这比你的“快速”答案快3倍。

import random
import time
import numpy as np
vec1=[]
for x in range (0, 1000):
    vec1.append(random.randint(0, 19))

vec1=np.array(vec1)
sizevec1=vec1.shape[0]
matshape=(sizevec1,sizevec1)
target_mat=np.zeros(matshape)

temp=np.random.random((20,20))
source_mat=temp*temp.T
###FasterMethod###

target_mat=np.zeros(matshape)

def matrixops(vec1, source_mat, target_mat):
    matrixtime = time.time()
    for row in range(0,source_mat.shape[0]):
        for column in range(0, source_mat.shape[1]):

            rowmatch = np.array(vec1==row)
            rowmatch = rowmatch*1

            colmatch = np.array(vec1==column)
            colmatch = colmatch*1

            match_matrix=rowmatch*colmatch.T
            target_mat=target_mat+(match_matrix*source_mat[row,column])

    print((time.time() - matrixtime))

if __name__ == "__main__":
    matrixops(vec1, source_mat, target_mat)

您的快速版本时间：4.246443033218384 此版本时间：1.4500105381011963

正如我的评论所说，Cython版本根本不是更快。使其更快的唯一方法是获取依赖于Python GIL的行并转换为C ++样式操作（就像我对==部分所做的那样，编写类似于C ++的循环，它执行与NumPy函数，但MemoryViews不支持。发布在这里作为参考，因为我花了很多时间：

cimport numpy
from numpy import array, multiply, asarray, ndarray, zeros, dtype, int
cimport cython
from cython cimport view
from cython.parallel cimport prange #this is your OpenMP portion
from openmp cimport omp_get_max_threads #only used for getting the max # of threads on the machine

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef matrixops(int[::1] vec1, double[:,::1] source_mat, double[:,::1] target_mat):
    cdef int[::1] match_matrix =zeros(vec1.shape[0], dtype=int)
    cdef int[::1] rowmatch =zeros(vec1.shape[0], dtype=int)
    cdef int[::1] colmatch =zeros(vec1.shape[0], dtype=int)
    cdef int maxthreads = omp_get_max_threads()
    cdef int row, column, i
    # here's where you'd substitute 
    # for row in prange(source_mat.shape[0], nogil=True, num_threads=maxthreads, schedule='static'): # to use all cores
    for row in range(0,source_mat.shape[0]):
        for column in range(0, source_mat.shape[1]):
        #this is how to avoid the GIL
            for i in range(vec1.shape[0]):
                rowmatch[i]=(row==vec1[i])

            for i in range(vec1.shape[0]):
                colmatch[i]=(column==vec1[i])
            # this part has to be modified to not call Python GIL functions like was done above
            match_matrix=multiply(rowmatch,colmatch.T)
            target_mat=target_mat+(multiply(match_matrix,source_mat[row,column]))

这是你上面的.PYX文件。如果你有幸转换，你通常会看到4核心加速3倍。对不起，我没有成功做到这一点，但使用直接的Python库仍然比使用100倍速的解决方案快3倍。

Answer 2

下面是代码的两个单独更新，这些更新导致了相当大的加速。

首先找出了矢量化解决方案，所以现在一步完成计算。即使在第二次更改后，这也是最快的方法 -

第二 - 将所有pandas数据帧更改为numpy数组。这种变化对for循环代码影响最大 - 现在运行速度提高了几个数量级。

下面的代码计算了所有3种方法，'慢'，'快'和'Xu Mackenzie'，以思考向量化解决方案的朋友命名;-P

#Initialize Variables

import time
import random
import pandas as pd
import numpy as np

n=13000
k=2000
i=0
vec1=[]
for x in range(0, n):
   vec1.append(random.randint(0, k-1))

temp=np.random.random((k,k))
#vec1=pd.DataFrame(vec1)
vec1=np.array(vec1)
#vec=pd.DataFrame(np.arange(0,300))
#vec2=pd.concat([vec,vec1], axis=1)
#sizevec1=vec1.shape[0]
sizevec1=len(vec1)
matshape=(sizevec1,sizevec1)
target_mat=np.zeros(matshape)
#target_mat=pd.DataFrame(target_mat)


source_mat=temp*temp.T
transform_mat=np.zeros((len(source_mat),len(target_mat)))

慢速解决方案

matrixtime = time.time()
for row in range(0,target_mat.shape[0]):
    #print 'row is ', row
    for column in range(0,target_mat.shape[1]):

        #print 'column is', column
        target_mat[row,column] = source_mat.item(int(vec1[row]), int(vec1[column]))
print((time.time() - matrixtime))
target_mat_slow=target_mat
target_mat=np.zeros(matshape)

XU MACKENZIE SOLUTION

matrixtime = time.time()

for i in range(0,len(target_mat)):
  transform_mat[vec1[i],i]=1

temp=np.dot(source_mat,transform_mat)
target_mat=np.dot(temp.T,transform_mat)
target_mat_XM=target_mat
target_mat=np.zeros(matshape)
XM_time= time.time() - matrixtime
print((time.time() - matrixtime))

以前的“快速”解决方案

matrixtime = time.time()
for row in range(0,source_mat.shape[0]):
    print 'row is ', row
    #for column in range(0, source_mat.shape[1]):
    for column in range(0, row):   
        rowmatch = np.array([vec1==row])
        rowmatch = rowmatch*1

        colmatch = np.array([vec1==column])
        colmatch = colmatch*1

        match_matrix=rowmatch*colmatch.T
        target_mat=target_mat+(match_matrix*source_mat[row,column])

print((time.time() - matrixtime))
target_mat_fast=target_mat
target_mat=np.zeros(matshape)

等效性测试

target_mat_slow==target_mat_fast
target_mat_fast==target_mat_XM

Answer 3

我设法提出了一个提供非常好的加速的解决方案，特别是对于更大的矩阵。这依赖于循环遍历较小的矩阵并用它的匹配元素填充大矩阵。

我尝试使用vec1作为具有1000个元素的向量的此解决方案，并发现比前一个方法快100倍。

    import random
    import time
    import pandas as pd
    import numpy as np
    vec1=[]
    for x in range (0, 1000):
        vec1.append(random.randint(0, 19))

    vec1=pd.DataFrame(vec1)
    sizevec1=vec1.shape[0]
    matshape=(sizevec1,sizevec1)
    target_mat=np.zeros(matshape)
    target_mat=pd.DataFrame(target_mat)

    temp=np.random.random((20,20))
    source_mat=temp*temp.T

    ###Slow Method###

    matrixtime = time.time()
    for row in range(0,target_mat.shape[0]):
        for column in range(0,target_mat.shape[1]):
            #print 'row is ', row
            #print 'column is', column
            target_mat.iloc[row,column] = source_mat.item(int(vec1.iloc[row]), int(vec1.iloc[column]))
    print((time.time() - matrixtime))
    target_mat_slow=target_mat



    ###FasterMethod###

    target_mat=np.zeros(matshape)
    target_mat=pd.DataFrame(target_mat)

    matrixtime = time.time()
    for row in range(0,source_mat.shape[0]):
         for column in range(0, source_mat.shape[1]):

            rowmatch = np.array(vec1==row)
            rowmatch = rowmatch*1

            colmatch = np.array(vec1==column)
            colmatch = colmatch*1

            match_matrix=rowmatch*colmatch.T
            target_mat=target_mat+(match_matrix*source_mat[row,column])

    print((time.time() - matrixtime))
    target_mat_fast=target_mat

    #Test Equivalence
    target_mat_slow==target_mat_fast

如何使用python中较小矩阵的项来向量化填充较大的矩阵

3 个答案:

慢速解决方案

XU MACKENZIE SOLUTION

以前的“快速”解决方案

等效性测试