在Cython中包装ScaLAPACK函数

时间:2018-02-01 20:29:24

标签: cython lapack blas intel-mkl scalapack

我使用lapackblas库在cython中实现矩阵求逆和乘法矩阵。这是我的代码:

import numpy as np
cimport numpy as np
cimport cython
import ctypes    
cdef extern from "f2pyptr.h": 
     void *f2py_pointer(object) except NULL

import scipy.linalg.lapack 
import scipy.linalg.blas
ctypedef np.float64_t REAL_t

ctypedef void (*dgetri_ptr) (long *N, double *A, long *LDA, long *IPIV, double *WORK, long *LWORK, long *INFO) nogil
cdef dgetri_ptr dgetri = <dgetri_ptr>f2py_pointer(scipy.linalg.lapack.dgetri._cpointer) 

ctypedef void (*dgetrf_ptr) (long *M, long *N, double *A, long *LDA, long *IPIV, long *INFO) nogil
cdef dgetrf_ptr dgetrf = <dgetrf_ptr>f2py_pointer(scipy.linalg.lapack.dgetrf._cpointer) 

cdef void inverse_matrix(REAL_t* A, long n, long info):
     #Computes the inverse of an LU-factored general matrix using sgetri routine.
     #This routine computes the inverse inv(A) of a general matrix A. Before calling this routine, call sgetrf to factorize A
     cdef np.ndarray[long, ndim=1] ipiv   = np.empty(n, dtype=np.int64)
     cdef np.ndarray[double, ndim=1] work = np.empty(n, dtype=np.float64)
     dgetrf(&n, &n, A, &n, &ipiv[0], &info)
     dgetri(&n, A, &n, &ipiv[0], &work[0], &n, &info)


@cython.wraparound(False)
@cython.boundscheck(False)
cpdef REAL_t[::1,:] inverse(REAL_t[::1,:] A):
      if (A.shape[0] !=A.shape[1]):
         raise TypeError("The input array must be a square matrix")
      cdef long N = A.shape[0]
      cdef long info
      A=np.require(A, requirements=['F'])
      inverse_matrix(&A[0,0], N, info)

      if info < 0:
          raise ValueError('illegal value in %d-th argument of internal getrf' % -info)
      elif info > 0:
          raise ValueError("Diagonal number %d is exactly zero. Singular matrix." % -info)
      return A

ctypedef void (*dgetri_ptr) (long *N, double *A, long *LDA, long *IPIV, double *WORK, long *LWORK, long *INFO) nogil
cdef dgetri_ptr dgetri = <dgetri_ptr>f2py_pointer(scipy.linalg.lapack.dgetri._cpointer) 
ctypedef void (*dgemm_ptr) (char *TRANSA, char *TRANSB, long *M, long *N, long *K, double *ALPHA, double *A, long *LDA, double *B, long *LDB, double *BETA, double *C, long *LDC) nogil  
cdef dgemm_ptr dgemm = <dgemm_ptr>f2py_pointer(scipy.linalg.blas.dgemm._cpointer) 
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef REAL_t[::1,:] dot_product(REAL_t[::1,:] a, REAL_t[::1,:] b, char* TRANSA ='N', char* TRANSB='N', REAL_t ALPHA = 1.0, REAL_t BETA  = 0.0):

      cdef REAL_t[::1,:] c_view = np.zeros((a.shape[0], b.shape[1]), dtype=np.float64, order='F') 
      cdef long M, N, K, LDA, LDB, LDC
      cdef char* cN = 'N'
      cdef char* cT = 'T'
      if (a.shape[1] !=b.shape[0]):
         raise TypeError("The numbers of columns in matrix a must be equal to number of rows in matrix b")
      if (TRANSA   == cN):
         M = a.shape[0] #the number  of rows  of the  matrix op( A )  and of the  matrix  C
      elif (TRANSA == cT):
         M = a.shape[1]
      if (TRANSB   == cN):
         N = b.shape[1]
      elif (TRANSB == cT):
         N = b.shape[0]
      K   = a.shape[1]
      LDA = a.shape[0]
      LDB = b.shape[0]
      LDC = a.shape[0]
      dgemm(TRANSA, TRANSB, &M, &N, &K, &ALPHA, &a[0,0], &LDA, &b[0,0], &LDB, &BETA, &c_view[0,0], &LDC)
      return c_view

我想通过拆分计算并在多个处理器上运行来提高代码的速度。 如何通过在cython代码中包含以下函数来扩展此代码并使用scalapack库:

  1. pdgetri使用LU分解来计算分布式矩阵的逆矩阵。
  2. pdgetrf使用行交换的部分旋转计算一般M-by-N分布矩阵的LU分解。
  3. 转换矩阵的
  4. pdtran_
  5. pdgemm_执行矩阵 - 矩阵乘法之一。
  6. 更新我: 为了能够使用pdgetripdgetrf我应该使用以下方法将它们从 fortran 脚本和pdgemm_ c包装起来脚本?

    scalapack_wrap.f90

    subroutine C_PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, IWORK, LIWORK, INFO ) bind(c, NAME='C_PDGETRI')
               use iso_c_binding, only: c_double, c_int
               implicit none
    
               real(c_double), dimension(N, N), intent(in):: A
               real(c_double), dimension(LWORK), intent(in):: WORK
               integer(c_int), dimension(N), intent(in):: DESCA
               integer(c_int), dimension(N), intent(in)::IPIV
               integer(c_int), dimension(LWORK), intent(in):: IWORK
               integer(c_int), intent(in) :: N, IA, JA, LIWORK, LWORK, INFO
               call PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, IWORK, LIWORK, INFO )
    end subroutine C_PDGETRI
    
    subroutine C_PDGETRF(M, N, A, IA, JA, DESCA, IPIV, INFO) bind(c, NAME='C_PDGETRF')
               use iso_c_binding, only: c_double, c_int
               implicit none
    
               real(c_double), dimension(M, N), intent(in):: A
               integer(c_int), dimension(N), intent(in):: DESCA
               integer(c_int), dimension(N), intent(in)::IPIV
               integer(c_int), intent(in) :: M, N, IA, JA, INFO
               call PDGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO )
    end subroutine C_PDGETRF
    

    scalapack_func.h

    extern void C_PDGETRI( long* N, double* A, int* IA, int* JA, int* DESCA, int* IPIV, double* WORK, long* LWORK, int* IWORK, long* LIWORK, int* INFO );
    extern void C_PDGETRF( long* M, long* N, double* A, int* IA, int* JA, int* DESCA, int* IPIV, int* INFO);
    

    scalapack_funcs.pyx

    import numpy as np
    cimport numpy as np
    cimport cython
    cdef extern from "scalapack_func.h":
         void C_PDGETRI( long* N, double* A, int* IA, int* JA, int* DESCA, int* IPIV, double* WORK, long* LWORK, int* IWORK, long* LIWORK, int* INFO);
         void C_PDGETRF( long* M, long* N, double* A, int* IA, int* JA, int* DESCA, int* IPIV, int* INFO);
    
    cdef double[:,::1] pdgetri_(double[:,::1] A):
         cdef long N      = A.shape[0]
         cdef long LWORK  = A.shape[1]
         cdef long LIWORK = A.shape[1]
         cdef int INFO    = 0
         cdef int IA      = 0 #the row index in the global array A indicating the first row of sub( A )
         cdef int JA      = 0 #The column index in the global array A indicating the first column of sub( A ).
         cdef double[::1] WORK = np.empty(LWORK, dtype=np.float64)
         cdef int[::1] IWORK   = np.empty(LIWORK, dtype=np.int32)
         cdef int[::1] IPIV    = np.empty(N, dtype=np.int32)
         cdef int[::1] DESCA   = np.empty(N, dtype=np.int32)
         C_PDGETRI(&N, &A[0,0],  &IA, &JA, &DESCA[0], &IPIV[0], &WORK[0], &LWORK, &IWORK[0], &LIWORK, &INFO)
         if INFO < 0:
             raise ValueError('illegal value in %d-th argument of internal pdgetri' % -INFO)
         elif INFO > 0:
             raise ValueError("Diagonal number %d is exactly zero. Singular matrix." % -INFO)
         return A 
    
    cdef double[:,::1] pdgetrf_(double[:,::1] A):
         cdef long M      = A.shape[0] #The number of rows to be operated on,
         cdef long N      = A.shape[1] #The number of columns to be operated on
         cdef int INFO    = 0
         cdef int IA      = 0 #the row index in the global array A indicating the first row of sub( A )
         cdef int JA      = 0 #The column index in the global array A indicating the first column of sub( A ).
         cdef int[::1] IPIV    = np.empty(M, dtype=np.int32)
         cdef int[::1] DESCA   = np.empty(M, dtype=np.int32)
         C_PDGETRF(&M, &N, &A[0,0], &IA, &JA, &DESCA[0], &IPIV[0], &INFO)
         if INFO < 0:
             raise ValueError('illegal value in %d-th argument of internal pdgetrf' % -INFO)
         elif INFO > 0:
             raise ValueError("Diagonal number %d is exactly zero. Singular matrix." % -INFO)
         return A   
    @cython.wraparound(False)
    @cython.boundscheck(False)
    cpdef double[:,::1] inverse(double[:,::1] A):     
          if (A.shape[0] !=A.shape[1]):
              raise TypeError("The input array must be a square matrix")
          A=np.require(A, requirements=['F'])
          pdgetrf_(A)
          pdgetri_(A)
          return A
    

    更新II:

    setup.py

    from distutils.core import setup
    from distutils.extension import Extension
    from Cython.Distutils import build_ext
    # This line only needed if building with NumPy in Cython file.
    from numpy import get_include
    import subprocess
    import os
    from os import system
    import warnings
    import mpi4py
    
    fortran_mod_comp = 'mpif90 /usr/local/scalapack-2.0.2/SRC/pdgetri.f -c -o pdgetri_.o -O3 -fPIC'
    print fortran_mod_comp
    system(fortran_mod_comp)
    
    fortran_mod_comp = 'mpif90 /usr/local/scalapack-2.0.2/SRC/pdgetrf.f -c -o pdgetrf_.o -O3 -fPIC'
    print fortran_mod_comp
    system(fortran_mod_comp)
    
    shared_obj_comp = 'mpif90 scalapack_wrap.f90 -c -o scalapack_wrap.o -O3 -fPIC'
    print shared_obj_comp
    system(shared_obj_comp)
    def runcommand(cmd):
        process = subprocess.Popen(cmd.split(), shell=False, stdout=subprocess.PIPE,
                                   stderr=subprocess.STDOUT, universal_newlines=True)
        c = process.communicate()
        if process.returncode != 0:
            raise Exception("Something went wrong whilst running the command: %s" % cmd)
        return c[0]
    
    def whichscalapack():
        # Figure out which Scalapack to use
        if 'MKLROOT' in os.environ:
            return 'intelmkl'
        else:
            return 'netlib'
    
    def whichmpi():
        # Figure out which MPI environment this is
        import re
        try:
            mpiv = runcommand('mpirun -V')
            if re.search('Intel', mpiv):
                return 'intelmpi'
            elif re.search('Open MPI', mpiv):
                return 'openmpi'
        except:
            return 'mpich'
        warnings.warn('Unknown MPI environment.')
        return None
    
    scalapackversion = whichscalapack()
    mpiversion = whichmpi()
    if mpiversion == 'openmpi':
        # Fetch the arguments for linking and compiling.
        mpilinkargs = runcommand('mpicc -showme:link').split()
        mpicompileargs = runcommand('mpicc -showme:compile').split()
    
    if scalapackversion == 'intelmkl':
        # Set library includes (taking into account which MPI library we are using)."
        scl_lib = ['mkl_scalapack_lp64', 'mkl_rt', 'mkl_blacs_'+mpiversion+'_lp64', 'iomp5', 'pthread']
        scl_libdir = [os.environ['MKLROOT']+'/lib/intel64' if 'MKLROOT' in os.environ else '']
    elif scalapackversion == 'netlib':
        scl_lib = ['scalapack', 'gfortran']
        scl_libdir = [ os.path.dirname(runcommand('gfortran -print-file-name=libgfortran.a')) ]
    else:
        raise Exception("Scalapack distribution unsupported. Please modify setup.py manually.")
    
    
    ext_modules = [Extension(# module name:
                             'scalapack_funcs',
                             # source file:
                             sources=['scalapack_funcs.pyx'],
                             # Needed if building with NumPy. This includes the NumPy headers when compiling.
                             include_dirs = [get_include(), mpi4py.get_include()],
                             #include libraries 
                             library_dirs=scl_libdir, libraries=scl_lib,
                             #libraries=["scalapack"],
                             #library_dirs=[library_dirs],
                             # other compile args for gcc
                             extra_compile_args=[ "-fopenmp", "-lmkl_lapack95_lp64", "-lmkl_core", "-lmkl_intel_lp64",  "-lmkl_rt", "-lmkl_intel_thread" ] + mpicompileargs,
                             # other files to link to
                             extra_link_args=['pdgetrf_.o', 'pdgetri_.o', 'scalapack_wrap.o','-fopenmp', "-lmkl_lapack95_lp64", "-lmkl_core", "-lmkl_intel_lp64", "-lmkl_rt", "-lmkl_intel_thread"] + mpilinkargs)]
    
    setup(
          cmdclass = {'build_ext': build_ext},
          ext_modules = ext_modules
          )
    

    代码被编译但

    >>> import scalapack_funcs
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    ImportError: /opt/intel/mkl/lib/intel64/libmkl_scalapack_lp64.so: undefined symbol: dapply_2hv
    

    正如 danny 所指出的那样,问题可能就是使用scalapack库正确编译代码。但我仍然收到错误消息。有什么建议如何摆脱它?

2 个答案:

答案 0 :(得分:1)

看起来你几乎是对的。添加要从中导入函数的特定头文件,并使用FORTRAN函数C_PDGETRI的正确名称。编译时,请务必添加对"your_fortran_header.h"和相应的,先前编译的FORTRAN静态库的适当引用。

cdef extern from "your_fortran_header.h":
     void C_PDGETRI( long* N, double* A, int* IA, int* JA, int* DESCA, int* IPIV, double* WORK, long* LWORK, int* IWORK, int* LIWORK, int* INFO )

cdef double[:,::1] pdgetri_(double[:,::1] A not None):
     cdef long N = A.shape[0]
     cdef long LWORK = A.shape[1]
     cdef long LIWORK = A.shape[1]
     cdef int INFO = 0
     cdef long IA = 0 #the row index in the global array A indicating the first row of sub( A )
     cdef long JA = 0 #The column index in the global array A indicating the first column of sub( A ).
     cdef double[::1] WORK = np.empty(LWORK, dtype=np.float64)
     cdef int[::1] IWORK = np.empty(LIWORK, dtype=np.int32)
     cdef int[::1] IPIV = np.empty(N, dtype=np.int32)
     cdef int[::1] DESCA = np.empty(N, dtype=np.int32)
     C_PDGETRI(&N, &A[0,0], &IA, &JA, &DESCA[0], &IPIV[0], &WORK[0], &LWORK, &IWORK[0], &LIWORK, &INFO)
     return A 

答案 1 :(得分:1)

扩展需要链接到它正在使用的库。 undefined symbol表示标题中包含的库未链接到共享对象,并且无法找到该库的符号。

据推测,ilcm来自scalapack,不知道库名是什么。向libraries添加Extension设置及其所需的所有库。

Extension('scalapack_funcs',
          sources=['scalapack_funcs.pyx'],
          libraries=['scalapack'],
          extra_compile_args=['-fPIC', '-O3'],
          extra_link_args=['pdgetri_.o', 'pdgetrf_.o', 'scalapack_wrap.o'])]