SuperLU 接口

语言：

SuperLU 有两个驱动程序例程，即简单和专业，调用这些例程对一般非对称稀疏方程组求解的方式与 SPSOLVE 中的一次调用接口完全相同。可在单精度、双精度、复数和双精度复数数据类型中使用这些例程以及用户可调用的其他 SuperLU 例程。以下各表中列出了所有外部例程的单精度名称。这些例程都有可用的手册页（第 3P 部分）。有关在应用程序中使用的稀疏矩阵数据结构的说明，另请参见 SuperMatrix(3P) 手册页。

表 5 SuperLU 计算例程

例程	说明
`sgstrf`	计算因式分解
`sgssvx`	分解和求解（专业驱动程序）
`sgssv`	分解和求解（简单驱动程序）
`sgstrs`	计算三角求解
`sgsrfs`	改进计算得到的解；提供误差界
`slangs`	计算 1 范数、弗罗贝尼乌斯范数或无穷范数
`sgsequ`	计算行和列比例
`sgscon`	估算条件数的倒数
`slaqgs`	平衡一般稀疏矩阵

表 6 SuperLU 实用程序例程

例程	说明
`LUSolveTime`	返回在求解阶段所用的时间
`LUFactTime`	返回在因式分解阶段所用的时间
`LUFactFlops`	返回因式分解阶段中的浮点运算数
`LUSolveFlops`	返回求解阶段中的浮点运算数
`sQuerySpace`	返回有关内存统计数据的信息
`sp_ienv`	返回指定的计算机相关参数
`sPrintPerf`	打印由计算例程收集的统计信息
`set_default_options`	将控制求解器行为的参数设置为默认选项
`StatInit`	分配并初始化用于存储性能统计信息的结构
`StatFree`	释放用于存储性能统计信息的结构
`Destroy_Dense_Matrix`	取消分配密集格式的 SuperMatrix
`Destroy_SuperNode_Matrix`	取消分配超节点格式的 SuperMatrix
`Destroy_CompCol_Matrix`	取消分配压缩稀疏列格式的 SuperMatrix
`Destroy_CompCol_Permuted`	取消分配已排列的压缩稀疏列格式的 SuperMatrix
`Destroy_SuperMatrix_Store`	取消分配将矩阵存储在 SuperMatrix 中的实际存储
`sCopy_CompCol_Matrix`	复制压缩稀疏列格式的 SuperMatrix
`sCreate_CompCol_Matrix`	分配压缩稀疏列格式的 SuperMatrix
`sCreate_Dense_Matrix`	分配密集格式的 SuperMatrix
`sCreate_CompRow_Matrix`	分配压缩稀疏行格式的 SuperMatrix
`sCreate_SuperNode_Matrix`	分配超节点格式的 SuperMatrix
`sp_preorder`	排列原始稀疏矩阵的列
`sp_sgemm`	将 SuperMatrix 乘以稠密矩阵

从 C 调用 SuperLU

SuperLU 例程是用 C 编写的。因此，与列和行相关的索引必须从 0 开始。在以下示例中，将调用双精度简单驱动程序 dgssv() 来计算因子 L 和 U，并对解矩阵求解。

示例 6 SuperLU 简单驱动程序

#include <stdio.h>
#include <sunperf.h>
 
#define M 5
#define N 5
 
int main(int argc, char *argv[])
{
    SuperMatrix A, L, U, B1, B2;
    int      perm_r[M];  /* row permutations from partial pivoting */
    int      perm_c[N];  /* column permutation vector */
    int      info, i;
    superlu_options_t options;
    SuperLUStat_t stat;
    trans_t  trans = NOTRANS;
 
    printf("Example code calling SuperLU simple driver to factor a \n");
    printf("general unsymmetric matrix and solve two right-hand-side matrices\n");
 
   /* the matrix in Harwell-Boeing format. */
    int m = M;
    int n = M;
    int nnz = 12;
    double *dp;
   /* nonzeros of A, column-wise */
    double a[] = {1.0, 2.0, 3.0,  4.0, 5.0,  6.0,
                  7.0, 8.0, 9.0, 10.0, 11.0, 12.0};
   /* row index of nonzeros */
    int asub[] = {0, 1, 4, 1, 2, 4, 0, 2, 0, 3, 3, 4};
   /* column pointers */
    int xa[]   = {0, 3, 6, 8, 10, 12};
 
    /* Create Matrix A in the format expected by SuperLU */
    dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
 
    int nrhs = 1;
    double rhs1[] = {17.0, 6.0, 13.0, 21.0, 21.0};
    double rhs2[] = {17*.3, 6*.3, 13*.3, 21*.3, 21*.3};
 
    /* right-hand side matrix B1, B2 */
    dCreate_Dense_Matrix(&B1, m, nrhs, rhs1, m, SLU_DN, SLU_D, SLU_GE);
    dCreate_Dense_Matrix(&B2, m, nrhs, rhs2, m, SLU_DN, SLU_D, SLU_GE);
 
    /* set options that control behavior of solver to default parameters */
    set_default_options(&options);
    options.ColPerm = NATURAL;
 
    /* Initialize the statistics variables. */
    StatInit(&stat);
    /* factor input matrix and solve the first right-hand-side matrix */
    dgssv(&options, &A, perm_c, perm_r, &L, &U, &B1, &stat, &info);
 
    printf("\nsolution matrix B1:\n");
    dp = (double *) (((NCformat *)B1.Store)->nzval);
    printf("    i    rhs[i]     expected\n");
    for (i=0; i<M; i++)
      printf("%5d   %7.4lf     %7.4lf\n", i, dp[i], 1.0);
    printf("Factor time  = %8.2e sec\n", stat.utime[FACT]);
    printf("Solve time   = %8.2e sec\n\n\n", stat.utime[SOLVE]);
 
    /* solve the second right-hand-side matrix */
    dgstrs(trans, &L, &U, perm_c, perm_r, &B2, &stat, &info);
 
    printf("solution matrix B2:\n");
    dp = (double *) (((NCformat *)B2.Store)->nzval);
    printf("    i    rhs[i]     expected\n");
    for (i=0; i<M; i++)
      printf("%5d   %7.4lf     %7.4lf\n", i, dp[i], 0.3);
    printf("Solve time   = %8.2e sec\n", stat.utime[SOLVE]);
 
    StatFree(&stat);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B1);
    Destroy_SuperMatrix_Store(&B2);
    Destroy_SuperNode_Matrix(&L);
    Destroy_CompCol_Matrix(&U);
}

运行上例：

my_system% cc -xmemalign=8s simple.c -library=sunperf
my_system% a.out
 
Example code calling SuperLU simple driver to factor a
general unsymmetric matrix and solve two right-hand-side matrices
 
solution matrix B1:
    i    rhs[i]     expected
    0    1.0000      1.0000
    1    1.0000      1.0000
    2    1.0000      1.0000
    3    1.0000      1.0000
    4    1.0000      1.0000
Factor time  = 5.43e-02 sec
Solve time   = 6.76e-03 sec
 
 
solution matrix B2:
    i    rhs[i]     expected
    0    0.3000      0.3000
    1    0.3000      0.3000
    2    0.3000      0.3000
    3    0.3000      0.3000
    4    0.3000      0.3000
Solve time   = 6.76e-03 sec

示例 7 SuperLU 专业驱动程序

#include <stdio.h>
#include <sunperf.h>
 
#define M    5
#define N    5
#define NRHS 1
 
int main(int argc, char *argv[])
{
    SuperMatrix A, L, U, B, X;
    int      perm_r[M];  /* row permutations from partial pivoting */
    int      perm_c[N];  /* column permutation vector */
    int      etree[N];   /* elimination tree */
    double   ferr[NRHS]; /* estimated forward error bound */
    double   berr[NRHS]; /* component-wise relative backward error */
    double   C[N], R[M]; /* column and row scale factors */
    double   rpg, rcond;
    char     equed[1];   /* Specifies the form of equilibration that was done */          
    double   *work, *dp; /* user-supplied workspace */
    int      lwork = 0;  /* 0 for workspace to be allocated by system malloc */
    int      info, i;
    superlu_options_t options;
    SuperLUStat_t stat;
    mem_usage_t    mem_usage;
 
    printf("Example code calling SuperLU expert driver\n\n");
 
    /* the matrix in Harwell-Boeing format. */
    int m = M;
    int n = M;
    int nnz = 12;
   /* nonzeros of A, column-wise */
    double a[] = {1.0, 2.0, 3.0,  4.0, 5.0,  6.0,
                  7.0, 8.0, 9.0, 10.0, 11.0, 12.0};
    /* row index of nonzeros */
    int asub[] = {0, 1, 4, 1, 2, 4, 0, 2, 0, 3, 3, 4};
    /* column pointers */
    int xa[]   = {0, 3, 6, 8, 10, 12};
    int nrhs = NRHS;
    double rhs[] = {17.0, 6.0, 13.0, 21.0, 21.0};
 
    /* Create Matrix A in the format expected by SuperLU */
    dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
 
    /* right-hand-side matrix B */
    dCreate_Dense_Matrix(&B, m, nrhs, rhs, m, SLU_DN, SLU_D, SLU_GE);
 
    /*  solution matrix X */
    dCreate_Dense_Matrix(&X, m, nrhs, rhs, m, SLU_DN, SLU_D, SLU_GE);
    set_default_options(&options);
    options.ColPerm = NATURAL;
 
    /* Initialize the statistics variables. */
    StatInit(&stat);
 
    dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork,
           &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info);
    dp = (double *) (((NCformat *)X.Store)->nzval);
    printf("    i    rhs[i]     expected\n");
    for (i=0; i<M; i++)
      printf("%5d   %7.4lf     %7.4lf\n",
             i, dp[i], 1.0);
    printf("Factor time  = %8.2e sec\n", stat.utime[FACT]);
    printf("Solve time   = %8.2e sec\n", stat.utime[SOLVE]);
 
    StatFree(&stat);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B);
    Destroy_SuperNode_Matrix(&L);
    Destroy_CompCol_Matrix(&U);
}

运行上例：

my_system% cc -xmemalign=8s expert.c -library=sunperf
my_system% a.out
Example code calling SuperLU expert driver
 
    i    rhs[i]     expected
    0    1.0000      1.0000
    1    1.0000      1.0000
    2    1.0000      1.0000
    3    1.0000      1.0000
    4    1.0000      1.0000
Factor time  = 1.25e-03 sec
Solve time   = 1.70e-04 sec

从 Fortran 调用 SuperLU

从 Fortran 调用 SuperLU 的最简单方式是通过 SPSOLVE 接口。可选择 SuperLU，通过 SPSOLVE 中的初始化例程 DGSSIN() 的输入参数 MTXTYP 对非对称稀疏矩阵求解。一次调用接口例程 DGSSFS() 中也有相同的参数。

下表列出了 MTXTYP 的有效选项。要调用 SuperLU，可选择 's0' 或 'S0' 作为矩阵类型。由于 SPSOLVE 是基于 Fortran 的，因此与输入矩阵相关的所有列和行索引都必须从 1 开始。然而，如果通过 DGSSIN() 或 DGSSFS() 调用 SuperLU（通过设置 MTXTYP = 's0' 或 'S0'），则这些索引必须从 0 开始。

表 7 DGSSIN() 和 DGSSFS() 的矩阵类型选项

选项	矩阵类型	求解器
'sp' 或 'SP'	对称结构，正定值	SPSOLVE
'ss' 或 'SS'	对称结构，对称值	SPSOLVE
'su' 或 'SU'	对称结构，非对称值	SPSOLVE
'uu' 或 'UU'	非对称结构，非对称值	SPSOLVE
's0' 或 'S0'	非对称结构，非对称值	SuperLU

在调用例程 DGSSIN() 之后必须调用例程 DGSSOR()，以执行填充简化排序和符号因式分解。可使用一个字符参数 (ORDMTHD) 选择所需的排序方法。一次调用接口例程 DGSSFS() 中也存在此参数。下表列出了 SPSOLVE 和 SuperLU 的有效排序方法。也可以通过调用 DGSSUO() 而不是 DGSSOR() 来为求解器提供特定的排序方法。输入排列数组必须从 0 开始。

表 8 DGSSOR() 和 DGSSFS() 的矩阵排序选项

选项	排序方法	求解器
'nat' 或 'NAT'	自然排序（无排序）	SPSOLVE、SuperLU
'mmd' 或 'MMD'	A'*A 的最小度（默认值）	SPSOLVE、SuperLU
'gnd' 或 'GND'	一般嵌套分割	SPSOLVE
'spm' 或 'SPM'	基于 A'+A 的最小度排序	SuperLU
'sam' 或 'SAM'	近似最小度列	SuperLU

如上所示，一般嵌套分割方法在 SuperLU 中不可用。另一方面，基于 A'+A 的最小度排序和近似最小度列排序在 SPSOLVE 中不可用。

SuperLU 示例

以下代码示例显示了如何通过标准接口和 SPSOLVE 的一次调用接口选择 SuperLU 来对一般非对称方程组进行因式分解和求解。

示例 8 通过 SPSOLVE 标准接口调用 SuperLU

      program SLU
 
c  This program is an example driver that calls the regular interface of SPSOLVE
c  to invoke SuperLU to factor and solve a general unsymmetric system.
 
      implicit none
      integer           neqns, ier, msglvl, outunt, ldrhs, nrhs, i
      character         mtxtyp*2, pivot*1, ordmthd*3
      double precision  handle(150)
      integer           colstr(6), rowind(12)
      double precision  values(12), rhs(5), xexpct(5)
 
c  Sparse matrix structure and value arrays.  Coefficient matrix
c    is a general unsymmetric sparse matrix.
c    Ax = b, (solve for x) where:
 
c       1.0   0.0   7.0   9.0   0.0      1.0       17.0
c       2.0   4.0   0.0   0.0   0.0      1.0        6.0
c  A =  0.0   5.0   8.0   0.0   0.0  x = 1.0   b = 13.0
c       0.0   0.0   0.0  10.0  11.0      1.0       21.0
c       3.0   6.0   0.0   0.0  12.0      1.0       21.0
 
c  Array indices must be zero-based for calling SuperLU
      data colstr / 0, 3, 6, 8, 10, 12 /
      data rowind / 0, 1, 4, 1, 2, 4, 0, 2, 0, 3, 3, 4 /
      data values / 1.0, 2.0, 3.0,  4.0, 5.0,  6.0,
     $              7.0, 8.0, 9.0, 10.0, 11.0, 12.0 /
      data rhs    / 17.0, 6.0, 13.0, 21.0, 21.0 /
      data xexpct / 1.0d0, 1.0d0, 1.0d0, 1.0d0, 1.0d0 /
 
c  initialize solver
      mtxtyp= 's0'
      pivot = 'n'
      neqns  = 5
      outunt = 6
      msglvl = 0
 
c  call regular interface
      call dgssin(mtxtyp, pivot,  neqns , colstr, rowind,outunt, msglvl,
     &            handle, ier)
      if ( ier .ne. 0 ) goto 110
 
c  ordering and symbolic factorization
      ordmthd = 'mmd'
      call dgssor(ordmthd, handle, ier)
      if ( ier .ne. 0 ) goto 110
 
c  numeric factorization
      call dgssfa ( neqns, colstr, rowind, values, handle, ier )
      if ( ier .ne. 0 ) goto 110
 
c  solution
      nrhs   = 1
      ldrhs  = 5
      call dgsssl ( nrhs, rhs, ldrhs, handle, ier )
      if ( ier .ne. 0 ) goto 110
 
c  deallocate sparse solver storage
      call dgssda ( handle, ier )
      if ( ier .ne. 0 ) goto 110
 
c  print values of sol
      write(6,200) 'i', 'rhs(i)', 'expected rhs(i)', 'error'
      do i = 1, neqns
        write(6,300) i, rhs(i), xexpct(i), (rhs(i)-xexpct(i))
      enddo
      stop
 
  110 continue
c call to sparse solver returns an error
      write ( 6 , 400 )
     &      ' example: FAILED sparse solver error number = ', ier
      stop
 
  200 format(4x,a1,3x,a6,3x,a15,4x,a6)
  300 format(i5,3x,f5.2,7x,f5.2,8x,e10.2)     ! i/sol/xexpct values
  400 format(a60,i20)   ! fail message, sparse solver error number
      end

运行上例：

my_system% f95 -dalign slu.f -library=sunperf
my_system% a.out
 
    i   rhs(i)   expected rhs(i)     error
    1    1.00        1.00          0.00E+00
    2    1.00        1.00         -0.33E-15
    3    1.00        1.00          0.22E-15
    4    1.00        1.00         -0.11E-15
    5    1.00        1.00          0.22E-15

示例 9 通过一次调用 SPSOLVE 接口调用 SuperLU

program SLU_SINGLE
c  This program is an example driver that calls the regular interface of SPSOLVE
c  to invoke SuperLU to factor and solve a general unsymmetric system.
 
      implicit none
      integer           neqns, ier, msglvl, outunt, ldrhs, nrhs, i
      character         mtxtyp*2, pivot*1, ordmthd*3
      double precision  handle(150)
      integer           colstr(6), rowind(12)
      double precision  values(12), rhs(5), xexpct(5)
 
c  Sparse matrix structure and value arrays.  Coefficient matrix
c    is a general unsymmetric sparse matrix.
c    Ax = b, (solve for x) where:
 
c       1.0   0.0   7.0   9.0   0.0      1.0       17.0
c       2.0   4.0   0.0   0.0   0.0      1.0        6.0
c  A =  0.0   5.0   8.0   0.0   0.0  x = 1.0   b = 13.0
c       0.0   0.0   0.0  10.0  11.0      1.0       21.0
c       3.0   6.0   0.0   0.0  12.0      1.0       21.0
 
c  Array indices must be zero-based for calling SuperLU
      data colstr / 0, 3, 6, 8, 10, 12 /
      data rowind / 0, 1, 4, 1, 2, 4, 0, 2, 0, 3, 3, 4 /
      data values / 1.0, 2.0, 3.0,  4.0, 5.0,  6.0,
     $              7.0, 8.0, 9.0, 10.0, 11.0, 12.0 /
      data rhs    / 17.0, 6.0, 13.0, 21.0, 21.0 /
      data xexpct / 1.0d0, 1.0d0, 1.0d0, 1.0d0, 1.0d0 /
 
c  initialize solver
      mtxtyp= 's0'
      pivot = 'n'
      neqns  = 5
      outunt = 6
      msglvl = 0
      ordmthd = 'mmd'
      nrhs = 1
      ldrhs = 5
 
c  One-call routine of SPSOLVE
      call dgssfs (mtxtyp, pivot, neqns , colstr, rowind,
     &             values, nrhs , rhs, ldrhs , ordmthd,
     &             outunt, msglvl, handle, ier)
      if ( ier .ne. 0 ) goto 110
 
c  deallocate sparse solver storage
      call dgssda ( handle, ier )
      if ( ier .ne. 0 ) goto 110
 
c  print values of sol
      write(6,200) 'i', 'rhs(i)', 'expected rhs(i)', 'error'
      do i = 1, neqns
        write(6,300) i, rhs(i), xexpct(i), (rhs(i)-xexpct(i))
      enddo
      stop
 
  110 continue
c call to sparse solver returns an error
      write ( 6 , 400 )
     &      ' example: FAILED sparse solver error number = ', ier
      stop

  200 format(4x,a1,3x,a6,3x,a15,4x,a6)
  300 format(i5,3x,f5.2,7x,f5.2,8x,e10.2)     ! i/sol/xexpct values
  400 format(a60,i20)   ! fail message, sparse solver error number
      end

运行上例：

my_system% f95 -dalign slu_single.f -library=sunperf
my_system% a.out

    i   rhs(i)   expected rhs(i)     error
    1    1.00        1.00          0.00E+00
    2    1.00        1.00         -0.33E-15
    3    1.00        1.00          0.22E-15
    4    1.00        1.00         -0.11E-15
    5    1.00        1.00          0.22E-15