调整dgemm示例代码以使用sgemm(scalapack)

时间:2014-12-19 00:37:37

标签: parallel-processing fortran lapack blas scalapack

我需要让以下程序(来自http://www.netlib.org/scalapack/examples/pblas.tgz)与SGEMM合作。我需要更改什么才能使其正常工作?我对Fortran的了解非常有限,我将其视为一个黑盒子,并将其作为我的虚拟集群的基准。

      PROGRAM PDPBLASDRIVER
*
*  -- PBLAS example code --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*    
*     Written by Antoine Petitet, August 1995 (petitet@cs.utk.edu)
*
*     This program shows how to set the matrix descriptors and call
*     the PBLAS routines.
* 
*     .. Parameters ..
      INTEGER            DBLESZ, MEMSIZ, TOTMEM
      PARAMETER          ( DBLESZ = 8, TOTMEM = 400000000,
     $                     MEMSIZ = TOTMEM / DBLESZ )
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
      DOUBLE PRECISION   ONE
      PARAMETER          ( ONE = 1.0D+0 )
*     ..
*     .. Local Scalars ..
      CHARACTER*80       OUTFILE
      INTEGER            IAM, IASEED, IBSEED, ICSEED, ICTXT, INFO, IPA,
     $                   IPB, IPC, IPW, K, KP, KQ, M, MP, MYCOL, MYROW,
     $                   N, NB, NOUT, NPCOL, NPROCS, NPROW, NQ, WORKSIZ
      DOUBLE PRECISION   BNRM2
*     ..
*     .. Local Arrays ..
      INTEGER            DESCA( DLEN_ ), DESCB( DLEN_ ), DESCC( DLEN_ )
      DOUBLE PRECISION   MEM( MEMSIZ )
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT,
     $                   BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO,
     $                   DESCINIT, IGSUM2D, PDMATGEN, PDPBLASINFO,
     $                   PDNRM2, PDGEMV, PDGEMM, PDLAPRNT
*     ..
*     .. External Functions ..
      INTEGER            NUMROC
      EXTERNAL           NUMROC
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          DBLE, MAX
*     ..
*     .. Executable Statements ..
*
*     Get starting information
*
      CALL BLACS_PINFO( IAM, NPROCS )
      CALL PDPBLASINFO( OUTFILE, NOUT, M, N, K, NB, NPROW, NPCOL, MEM,
     $                  IAM, NPROCS )
* 
*     Define process grid
*
      CALL BLACS_GET( -1, 0, ICTXT )
      CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL )
      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
*
*     Go to bottom of process grid loop if this case doesn't use my
*     process
*
      IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL )
     $   GO TO 20
*
      MP = NUMROC( M, NB, MYROW, 0, NPROW )
      KP = NUMROC( K, NB, MYROW, 0, NPROW )
      KQ = NUMROC( K, NB, MYCOL, 0, NPCOL )
      NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
*
*     Initialize the array descriptor for the matrix A, B and C
*
      CALL DESCINIT( DESCA, M, K, NB, NB, 0, 0, ICTXT, MAX( 1, MP ),
     $               INFO )
      CALL DESCINIT( DESCB, K, N, NB, NB, 0, 0, ICTXT, MAX( 1, KP ),
     $               INFO )
      CALL DESCINIT( DESCC, M, N, NB, NB, 0, 0, ICTXT, MAX( 1, MP ),
     $               INFO )
*
*     Assign pointers into MEM for SCALAPACK arrays, A is
*     allocated starting at position MEM( 1 )
*
      IPA = 1
      IPB = IPA + DESCA( LLD_ )*KQ
      IPC = IPB + DESCB( LLD_ )*NQ
      IPW = IPC + DESCC( LLD_ )*NQ
*
      WORKSIZ = NB
*
*     Check for adequate memory for problem size
*
      INFO = 0
      IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN
         IF( IAM.EQ.0 )
     $      WRITE( NOUT, FMT = 9998 ) 'test', ( IPW+WORKSIZ )*DBLESZ
         INFO = 1
      END IF
*
*     Check all processes for an error
*
      CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 )
      IF( INFO.GT.0 ) THEN
         IF( IAM.EQ.0 )
     $      WRITE( NOUT, FMT = 9999 ) 'MEMORY'
         GO TO 10
      END IF
*
*     Generate random matrices A, B and C
*
      IASEED = 100
      CALL PDMATGEN( ICTXT, 'No transpose', 'No transpose', DESCA( M_ ),
     $               DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ),
     $               MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ),
     $               DESCA( CSRC_ ), IASEED, 0, MP, 0, KQ, MYROW, MYCOL,
     $               NPROW, NPCOL )
      IBSEED = 200
      CALL PDMATGEN( ICTXT, 'No transpose', 'No transpose', DESCB( M_ ),
     $               DESCB( N_ ), DESCB( MB_ ), DESCB( NB_ ),
     $               MEM( IPB ), DESCB( LLD_ ), DESCB( RSRC_ ),
     $               DESCB( CSRC_ ), IBSEED, 0, KP, 0, NQ, MYROW, MYCOL,
     $               NPROW, NPCOL )
      ICSEED = 300
      CALL PDMATGEN( ICTXT, 'No transpose', 'No transpose', DESCC( M_ ),
     $               DESCC( N_ ), DESCC( MB_ ), DESCC( NB_ ),
     $               MEM( IPC ), DESCC( LLD_ ), DESCC( RSRC_ ),
     $               DESCC( CSRC_ ), ICSEED, 0, MP, 0, NQ, MYROW, MYCOL,
     $               NPROW, NPCOL )

*
**********************************************************************
*     Call Level 3 PBLAS routine
**********************************************************************
*
      IF( IAM.EQ.0 ) THEN
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = * )
     $         '***********************************************'
         WRITE( NOUT, FMT = * )
     $         'Example of Level 3 PBLAS routine call: (PDGEMM)'
         WRITE( NOUT, FMT = * )
     $         '***********************************************'
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = * ) ' Matrix A:'
         WRITE( NOUT, FMT = * )
      END IF
*      CALL PDLAPRNT( M, K, MEM( IPA ), 1, 1, DESCA, 0, 0,
*     $               'A', NOUT, MEM( IPW ) )
*
      IF( IAM.EQ.0 ) THEN
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = * ) ' Matrix B:'
         WRITE( NOUT, FMT = * )
      END IF
*      CALL PDLAPRNT( K, N, MEM( IPB ), 1, 1, DESCB, 0, 0,
*     $               'B', NOUT, MEM( IPW ) )

      IF( IAM.EQ.0 ) THEN
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = * ) ' Matrix C:'
         WRITE( NOUT, FMT = * )
      END IF
*      CALL PDLAPRNT( M, N, MEM( IPC ), 1, 1, DESCC, 0, 0,
*     $               'C', NOUT, MEM( IPW ) )
*
      CALL PDGEMM( 'No transpose', 'No transpose', M, N, K, ONE,
     $             MEM( IPA ), 1, 1, DESCA, MEM( IPB ), 1, 1, DESCB,
     $             ONE, MEM( IPC ), 1, 1, DESCC )
*
      IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = * ) ' C := C + A * B'
         WRITE( NOUT, FMT = * )
      END IF
*      CALL PDLAPRNT( M, N, MEM( IPC ), 1, 1, DESCC, 0, 0,
*     $               'C', NOUT, MEM( IPW ) )
*
   10 CONTINUE
*
      CALL BLACS_GRIDEXIT( ICTXT )
*
   20 CONTINUE
*
*     Print ending messages and close output file
*
      IF( IAM.EQ.0 ) THEN
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9997 )
         WRITE( NOUT, FMT = * )
         IF( NOUT.NE.6 .AND. NOUT.NE.0 )
     $      CLOSE ( NOUT )
      END IF
*
      CALL BLACS_EXIT( 0 )
*
 9999 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' )
 9998 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least',
     $        I11 )
 9997 FORMAT( 'END OF TESTS.' )
*
      STOP
*
*     End of PDPBLASDRIVER
*
      END

1 个答案:

答案 0 :(得分:3)

Scalapack库使用命名转换来声明单精度或双精度函数。此声明由scalapack函数的第二个字母完成。" PD *"功能意味着双精度功能,而" PS *"意味着单身。

所以,你应该改变

  • DBLESZ = 8DBLESZ = 4
  • 所有DOUBLE PRECISIONREAL
  • ONE = 1.0D+0ONE = 1.0E+0
  • 所有CALL PD*CALL PS*
  • PDPBLASINFOPDMATGEN等辅助功能执行相同的操作。 PDLAPRNT