Oracle® Developer Studio 12.5:性能库用户指南

退出打印视图

更新时间: 2016 年 6 月
 
 

SuperLU 接口

SuperLU 有两个驱动程序例程,即简单和专业,调用这些例程对一般非对称稀疏方程组求解的方式与 SPSOLVE 中的一次调用接口完全相同。可在单精度、双精度、复数和双精度复数数据类型中使用这些例程以及用户可调用的其他 SuperLU 例程。以下各表中列出了所有外部例程的单精度名称。这些例程都有可用的手册页(第 3P 部分)。有关在应用程序中使用的稀疏矩阵数据结构的说明,另请参见 SuperMatrix(3P) 手册页。

表 5  SuperLU 计算例程
例程
说明
sgstrf
计算因式分解
sgssvx
分解和求解(专业驱动程序)
sgssv
分解和求解(简单驱动程序)
sgstrs
计算三角求解
sgsrfs
改进计算得到的解;提供误差界
slangs
计算 1 范数、弗罗贝尼乌斯范数或无穷范数
sgsequ
计算行和列比例
sgscon
估算条件数的倒数
slaqgs
平衡一般稀疏矩阵
表 6  SuperLU 实用程序例程
例程
说明
LUSolveTime
返回在求解阶段所用的时间
LUFactTime
返回在因式分解阶段所用的时间
LUFactFlops
返回因式分解阶段中的浮点运算数
LUSolveFlops
返回求解阶段中的浮点运算数
sQuerySpace
返回有关内存统计数据的信息
sp_ienv
返回指定的计算机相关参数
sPrintPerf
打印由计算例程收集的统计信息
set_default_options
将控制求解器行为的参数设置为默认选项
StatInit
分配并初始化用于存储性能统计信息的结构
StatFree
释放用于存储性能统计信息的结构
Destroy_Dense_Matrix
取消分配密集格式的 SuperMatrix
Destroy_SuperNode_Matrix
取消分配超节点格式的 SuperMatrix
Destroy_CompCol_Matrix
取消分配压缩稀疏列格式的 SuperMatrix
Destroy_CompCol_Permuted
取消分配已排列的压缩稀疏列格式的 SuperMatrix
Destroy_SuperMatrix_Store
取消分配将矩阵存储在 SuperMatrix 中的实际存储
sCopy_CompCol_Matrix
复制压缩稀疏列格式的 SuperMatrix
sCreate_CompCol_Matrix
分配压缩稀疏列格式的 SuperMatrix
sCreate_Dense_Matrix
分配密集格式的 SuperMatrix
sCreate_CompRow_Matrix
分配压缩稀疏行格式的 SuperMatrix
sCreate_SuperNode_Matrix
分配超节点格式的 SuperMatrix
sp_preorder
排列原始稀疏矩阵的列
sp_sgemm
将 SuperMatrix 乘以稠密矩阵

从 C 调用 SuperLU

SuperLU 例程是用 C 编写的。因此,与列和行相关的索引必须从 0 开始。在以下示例中,将调用双精度简单驱动程序 dgssv() 来计算因子 L 和 U,并对解矩阵求解。

示例 6  SuperLU 简单驱动程序
#include <stdio.h>
#include <sunperf.h>
 
#define M 5
#define N 5
 
int main(int argc, char *argv[])
{
    SuperMatrix A, L, U, B1, B2;
    int      perm_r[M];  /* row permutations from partial pivoting */
    int      perm_c[N];  /* column permutation vector */
    int      info, i;
    superlu_options_t options;
    SuperLUStat_t stat;
    trans_t  trans = NOTRANS;
 
    printf("Example code calling SuperLU simple driver to factor a \n");
    printf("general unsymmetric matrix and solve two right-hand-side matrices\n");
 
   /* the matrix in Harwell-Boeing format. */
    int m = M;
    int n = M;
    int nnz = 12;
    double *dp;
   /* nonzeros of A, column-wise */
    double a[] = {1.0, 2.0, 3.0,  4.0, 5.0,  6.0,
                  7.0, 8.0, 9.0, 10.0, 11.0, 12.0};
   /* row index of nonzeros */
    int asub[] = {0, 1, 4, 1, 2, 4, 0, 2, 0, 3, 3, 4};
   /* column pointers */
    int xa[]   = {0, 3, 6, 8, 10, 12};
 
    /* Create Matrix A in the format expected by SuperLU */
    dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
 
    int nrhs = 1;
    double rhs1[] = {17.0, 6.0, 13.0, 21.0, 21.0};
    double rhs2[] = {17*.3, 6*.3, 13*.3, 21*.3, 21*.3};
 
    /* right-hand side matrix B1, B2 */
    dCreate_Dense_Matrix(&B1, m, nrhs, rhs1, m, SLU_DN, SLU_D, SLU_GE);
    dCreate_Dense_Matrix(&B2, m, nrhs, rhs2, m, SLU_DN, SLU_D, SLU_GE);
 
    /* set options that control behavior of solver to default parameters */
    set_default_options(&options);
    options.ColPerm = NATURAL;
 
    /* Initialize the statistics variables. */
    StatInit(&stat);
    /* factor input matrix and solve the first right-hand-side matrix */
    dgssv(&options, &A, perm_c, perm_r, &L, &U, &B1, &stat, &info);
 
    printf("\nsolution matrix B1:\n");
    dp = (double *) (((NCformat *)B1.Store)->nzval);
    printf("    i    rhs[i]     expected\n");
    for (i=0; i<M; i++)
      printf("%5d   %7.4lf     %7.4lf\n", i, dp[i], 1.0);
    printf("Factor time  = %8.2e sec\n", stat.utime[FACT]);
    printf("Solve time   = %8.2e sec\n\n\n", stat.utime[SOLVE]);
 
    /* solve the second right-hand-side matrix */
    dgstrs(trans, &L, &U, perm_c, perm_r, &B2, &stat, &info);
 
    printf("solution matrix B2:\n");
    dp = (double *) (((NCformat *)B2.Store)->nzval);
    printf("    i    rhs[i]     expected\n");
    for (i=0; i<M; i++)
      printf("%5d   %7.4lf     %7.4lf\n", i, dp[i], 0.3);
    printf("Solve time   = %8.2e sec\n", stat.utime[SOLVE]);
 
    StatFree(&stat);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B1);
    Destroy_SuperMatrix_Store(&B2);
    Destroy_SuperNode_Matrix(&L);
    Destroy_CompCol_Matrix(&U);
}

运行上例:

my_system% cc -xmemalign=8s simple.c -library=sunperf
my_system% a.out
 
Example code calling SuperLU simple driver to factor a
general unsymmetric matrix and solve two right-hand-side matrices
 
solution matrix B1:
    i    rhs[i]     expected
    0    1.0000      1.0000
    1    1.0000      1.0000
    2    1.0000      1.0000
    3    1.0000      1.0000
    4    1.0000      1.0000
Factor time  = 5.43e-02 sec
Solve time   = 6.76e-03 sec
 
 
solution matrix B2:
    i    rhs[i]     expected
    0    0.3000      0.3000
    1    0.3000      0.3000
    2    0.3000      0.3000
    3    0.3000      0.3000
    4    0.3000      0.3000
Solve time   = 6.76e-03 sec
示例 7  SuperLU 专业驱动程序
#include <stdio.h>
#include <sunperf.h>
 
#define M    5
#define N    5
#define NRHS 1
 
int main(int argc, char *argv[])
{
    SuperMatrix A, L, U, B, X;
    int      perm_r[M];  /* row permutations from partial pivoting */
    int      perm_c[N];  /* column permutation vector */
    int      etree[N];   /* elimination tree */
    double   ferr[NRHS]; /* estimated forward error bound */
    double   berr[NRHS]; /* component-wise relative backward error */
    double   C[N], R[M]; /* column and row scale factors */
    double   rpg, rcond;
    char     equed[1];   /* Specifies the form of equilibration that was done */          
    double   *work, *dp; /* user-supplied workspace */
    int      lwork = 0;  /* 0 for workspace to be allocated by system malloc */
    int      info, i;
    superlu_options_t options;
    SuperLUStat_t stat;
    mem_usage_t    mem_usage;
 
    printf("Example code calling SuperLU expert driver\n\n");
 
    /* the matrix in Harwell-Boeing format. */
    int m = M;
    int n = M;
    int nnz = 12;
   /* nonzeros of A, column-wise */
    double a[] = {1.0, 2.0, 3.0,  4.0, 5.0,  6.0,
                  7.0, 8.0, 9.0, 10.0, 11.0, 12.0};
    /* row index of nonzeros */
    int asub[] = {0, 1, 4, 1, 2, 4, 0, 2, 0, 3, 3, 4};
    /* column pointers */
    int xa[]   = {0, 3, 6, 8, 10, 12};
    int nrhs = NRHS;
    double rhs[] = {17.0, 6.0, 13.0, 21.0, 21.0};
 
    /* Create Matrix A in the format expected by SuperLU */
    dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
 
    /* right-hand-side matrix B */
    dCreate_Dense_Matrix(&B, m, nrhs, rhs, m, SLU_DN, SLU_D, SLU_GE);
 
    /*  solution matrix X */
    dCreate_Dense_Matrix(&X, m, nrhs, rhs, m, SLU_DN, SLU_D, SLU_GE);
    set_default_options(&options);
    options.ColPerm = NATURAL;
 
    /* Initialize the statistics variables. */
    StatInit(&stat);
 
    dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork,
           &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info);
    dp = (double *) (((NCformat *)X.Store)->nzval);
    printf("    i    rhs[i]     expected\n");
    for (i=0; i<M; i++)
      printf("%5d   %7.4lf     %7.4lf\n",
             i, dp[i], 1.0);
    printf("Factor time  = %8.2e sec\n", stat.utime[FACT]);
    printf("Solve time   = %8.2e sec\n", stat.utime[SOLVE]);
 
    StatFree(&stat);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B);
    Destroy_SuperNode_Matrix(&L);
    Destroy_CompCol_Matrix(&U);
}

运行上例:

my_system% cc -xmemalign=8s expert.c -library=sunperf
my_system% a.out
Example code calling SuperLU expert driver
 
    i    rhs[i]     expected
    0    1.0000      1.0000
    1    1.0000      1.0000
    2    1.0000      1.0000
    3    1.0000      1.0000
    4    1.0000      1.0000
Factor time  = 1.25e-03 sec
Solve time   = 1.70e-04 sec

从 Fortran 调用 SuperLU

从 Fortran 调用 SuperLU 的最简单方式是通过 SPSOLVE 接口。可选择 SuperLU,通过 SPSOLVE 中的初始化例程 DGSSIN() 的输入参数 MTXTYP 对非对称稀疏矩阵求解。一次调用接口例程 DGSSFS() 中也有相同的参数。

下表列出了 MTXTYP 的有效选项。要调用 SuperLU,可选择 's0' 或 'S0' 作为矩阵类型。由于 SPSOLVE 是基于 Fortran 的,因此与输入矩阵相关的所有列和行索引都必须从 1 开始。然而,如果通过 DGSSIN()DGSSFS() 调用 SuperLU(通过设置 MTXTYP = 's0' 或 'S0'),则这些索引必须从 0 开始。

表 7  DGSSIN()DGSSFS() 的矩阵类型选项
选项
矩阵类型
求解器
'sp' 或 'SP'
对称结构,正定值
SPSOLVE
'ss' 或 'SS'
对称结构,对称值
SPSOLVE
'su' 或 'SU'
对称结构,非对称值
SPSOLVE
'uu' 或 'UU'
非对称结构,非对称值
SPSOLVE
's0' 或 'S0'
非对称结构,非对称值
SuperLU

在调用例程 DGSSIN() 之后必须调用例程 DGSSOR(),以执行填充简化排序和符号因式分解。可使用一个字符参数 (ORDMTHD) 选择所需的排序方法。一次调用接口例程 DGSSFS() 中也存在此参数。下表列出了 SPSOLVE 和 SuperLU 的有效排序方法。也可以通过调用 DGSSUO() 而不是 DGSSOR() 来为求解器提供特定的排序方法。输入排列数组必须从 0 开始。

表 8  DGSSOR()DGSSFS() 的矩阵排序选项
选项
排序方法
求解器
'nat' 或 'NAT'
自然排序(无排序)
SPSOLVE、SuperLU
'mmd' 或 'MMD'
A'*A 的最小度(默认值)
SPSOLVE、SuperLU
'gnd' 或 'GND'
一般嵌套分割
SPSOLVE
'spm' 或 'SPM'
基于 A'+A 的最小度排序
SuperLU
'sam' 或 'SAM'
近似最小度列
SuperLU

如上所示,一般嵌套分割方法在 SuperLU 中不可用。另一方面,基于 A'+A 的最小度排序和近似最小度列排序在 SPSOLVE 中不可用。

SuperLU 示例

以下代码示例显示了如何通过标准接口和 SPSOLVE 的一次调用接口选择 SuperLU 来对一般非对称方程组进行因式分解和求解。

示例 8  通过 SPSOLVE 标准接口调用 SuperLU
      program SLU
 
c  This program is an example driver that calls the regular interface of SPSOLVE
c  to invoke SuperLU to factor and solve a general unsymmetric system.
 
      implicit none
      integer           neqns, ier, msglvl, outunt, ldrhs, nrhs, i
      character         mtxtyp*2, pivot*1, ordmthd*3
      double precision  handle(150)
      integer           colstr(6), rowind(12)
      double precision  values(12), rhs(5), xexpct(5)
 
c  Sparse matrix structure and value arrays.  Coefficient matrix
c    is a general unsymmetric sparse matrix.
c    Ax = b, (solve for x) where:
 
c       1.0   0.0   7.0   9.0   0.0      1.0       17.0
c       2.0   4.0   0.0   0.0   0.0      1.0        6.0
c  A =  0.0   5.0   8.0   0.0   0.0  x = 1.0   b = 13.0
c       0.0   0.0   0.0  10.0  11.0      1.0       21.0
c       3.0   6.0   0.0   0.0  12.0      1.0       21.0
 
c  Array indices must be zero-based for calling SuperLU
      data colstr / 0, 3, 6, 8, 10, 12 /
      data rowind / 0, 1, 4, 1, 2, 4, 0, 2, 0, 3, 3, 4 /
      data values / 1.0, 2.0, 3.0,  4.0, 5.0,  6.0,
     $              7.0, 8.0, 9.0, 10.0, 11.0, 12.0 /
      data rhs    / 17.0, 6.0, 13.0, 21.0, 21.0 /
      data xexpct / 1.0d0, 1.0d0, 1.0d0, 1.0d0, 1.0d0 /
 
c  initialize solver
      mtxtyp= 's0'
      pivot = 'n'
      neqns  = 5
      outunt = 6
      msglvl = 0
 
c  call regular interface
      call dgssin(mtxtyp, pivot,  neqns , colstr, rowind,outunt, msglvl,
     &            handle, ier)
      if ( ier .ne. 0 ) goto 110
 
c  ordering and symbolic factorization
      ordmthd = 'mmd'
      call dgssor(ordmthd, handle, ier)
      if ( ier .ne. 0 ) goto 110
 
c  numeric factorization
      call dgssfa ( neqns, colstr, rowind, values, handle, ier )
      if ( ier .ne. 0 ) goto 110
 
c  solution
      nrhs   = 1
      ldrhs  = 5
      call dgsssl ( nrhs, rhs, ldrhs, handle, ier )
      if ( ier .ne. 0 ) goto 110
 
c  deallocate sparse solver storage
      call dgssda ( handle, ier )
      if ( ier .ne. 0 ) goto 110
 
c  print values of sol
      write(6,200) 'i', 'rhs(i)', 'expected rhs(i)', 'error'
      do i = 1, neqns
        write(6,300) i, rhs(i), xexpct(i), (rhs(i)-xexpct(i))
      enddo
      stop
 
  110 continue
c call to sparse solver returns an error
      write ( 6 , 400 )
     &      ' example: FAILED sparse solver error number = ', ier
      stop
 
  200 format(4x,a1,3x,a6,3x,a15,4x,a6)
  300 format(i5,3x,f5.2,7x,f5.2,8x,e10.2)     ! i/sol/xexpct values
  400 format(a60,i20)   ! fail message, sparse solver error number
      end

运行上例:

my_system% f95 -dalign slu.f -library=sunperf
my_system% a.out
 
    i   rhs(i)   expected rhs(i)     error
    1    1.00        1.00          0.00E+00
    2    1.00        1.00         -0.33E-15
    3    1.00        1.00          0.22E-15
    4    1.00        1.00         -0.11E-15
    5    1.00        1.00          0.22E-15
 
示例 9  通过一次调用 SPSOLVE 接口调用 SuperLU
program SLU_SINGLE
c  This program is an example driver that calls the regular interface of SPSOLVE
c  to invoke SuperLU to factor and solve a general unsymmetric system.
 
      implicit none
      integer           neqns, ier, msglvl, outunt, ldrhs, nrhs, i
      character         mtxtyp*2, pivot*1, ordmthd*3
      double precision  handle(150)
      integer           colstr(6), rowind(12)
      double precision  values(12), rhs(5), xexpct(5)
 
c  Sparse matrix structure and value arrays.  Coefficient matrix
c    is a general unsymmetric sparse matrix.
c    Ax = b, (solve for x) where:
 
c       1.0   0.0   7.0   9.0   0.0      1.0       17.0
c       2.0   4.0   0.0   0.0   0.0      1.0        6.0
c  A =  0.0   5.0   8.0   0.0   0.0  x = 1.0   b = 13.0
c       0.0   0.0   0.0  10.0  11.0      1.0       21.0
c       3.0   6.0   0.0   0.0  12.0      1.0       21.0
 
c  Array indices must be zero-based for calling SuperLU
      data colstr / 0, 3, 6, 8, 10, 12 /
      data rowind / 0, 1, 4, 1, 2, 4, 0, 2, 0, 3, 3, 4 /
      data values / 1.0, 2.0, 3.0,  4.0, 5.0,  6.0,
     $              7.0, 8.0, 9.0, 10.0, 11.0, 12.0 /
      data rhs    / 17.0, 6.0, 13.0, 21.0, 21.0 /
      data xexpct / 1.0d0, 1.0d0, 1.0d0, 1.0d0, 1.0d0 /
 
c  initialize solver
      mtxtyp= 's0'
      pivot = 'n'
      neqns  = 5
      outunt = 6
      msglvl = 0
      ordmthd = 'mmd'
      nrhs = 1
      ldrhs = 5
 
c  One-call routine of SPSOLVE
      call dgssfs (mtxtyp, pivot, neqns , colstr, rowind,
     &             values, nrhs , rhs, ldrhs , ordmthd,
     &             outunt, msglvl, handle, ier)
      if ( ier .ne. 0 ) goto 110
 
c  deallocate sparse solver storage
      call dgssda ( handle, ier )
      if ( ier .ne. 0 ) goto 110
 
c  print values of sol
      write(6,200) 'i', 'rhs(i)', 'expected rhs(i)', 'error'
      do i = 1, neqns
        write(6,300) i, rhs(i), xexpct(i), (rhs(i)-xexpct(i))
      enddo
      stop
 
  110 continue
c call to sparse solver returns an error
      write ( 6 , 400 )
     &      ' example: FAILED sparse solver error number = ', ier
      stop

  200 format(4x,a1,3x,a6,3x,a15,4x,a6)
  300 format(i5,3x,f5.2,7x,f5.2,8x,e10.2)     ! i/sol/xexpct values
  400 format(a60,i20)   ! fail message, sparse solver error number
      end

运行上例:

my_system% f95 -dalign slu_single.f -library=sunperf
my_system% a.out

    i   rhs(i)   expected rhs(i)     error
    1    1.00        1.00          0.00E+00
    2    1.00        1.00         -0.33E-15
    3    1.00        1.00          0.22E-15
    4    1.00        1.00         -0.11E-15
    5    1.00        1.00          0.22E-15