Oracle® Solaris Studio 12.4:数值计算指南

退出打印视图

更新时间: 2015 年 1 月
 
 

示例

此附录提供如何完成一些常见任务的示例。这些示例使用 Fortran 或 ANSI C 编写,多数依赖于 libmlibsunmath 的当前版本。这些示例已在 Oracle Solaris 10 Update 10 OS 或更高版本上使用 Oracle Solaris Studio 12.4 进行了测试。C 示例是使用 –lsunmath –lm 选项编译的。

A.1 IEEE 运算

以下示例显示了一种检查浮点数的十六进制表示形式的方法。请注意,您还可以使用调试器来查找已存储数据的十六进制表示形式。

以下 C 程序输出双精度的近似 π 值,以及单精度无穷大:

示例 A-1  双精度示例
#include <math.h>
#include <sunmath.h>
 
int main() {
 union {
   float         flt;
   unsigned     un;
 } r;
 union {
   double      dbl;
   unsigned    un[2];
 } d;
 
 /* double precision */
 d.dbl = M_PI;
 (void) printf("DP Approx pi = %08x %08x = %18.17e \n",
     d.un[0], d.un[1], d.dbl);
 
 /* single precision */
 r.flt = infinityf();
 (void) printf("Single Precision %8.7e : %08x \n", 
     r.flt, r.un);
 
 return 0;
}

在基于 SPARC 的系统上,使用 –lsunmath 编译时,上述程序的输出如下所示:

DP Approx pi = 400921fb 54442d18 = 3.14159265358979312e+00 
Single Precision Infinity: 7f800000 

以下 Fortran 程序分别以两种格式输出最小的正规数:

示例 A-2  分别以两种格式输出最小的正归数(续)
 program print_ieee_values
c
c the purpose of the implicit statements is to ensure
c that the floatingpoint pseudo-intrinsic functions
c are declared with the correct type
c
 implicit real*16 (q)
 implicit double precision (d)
 implicit real (r)
 real*16           z
 double precision  x
 real              r
c
 z = q_min_normal()
 write(*,7) z, z
 7 format('min normal, quad: ',1pe47.37e4,/,' in hex ',z32.32)
c
 x = d_min_normal()
 
 write(*,14) x, x
 14 format('min normal, double: ',1pe23.16,' in hex ',z16.16)
c
 r = r_min_normal()
 write(*,27) r, r
 27 format('min normal, single: ',1pe14.7,' in hex ',z8.8)
c
 end

在基于 SPARC 的系统上,对应的输出显示如下:

min normal, quad:   3.3621031431120935062626778173217526026E-4932
 in hex 00010000000000000000000000000000
min normal, double:  2.2250738585072014-308 in hex 0010000000000000
min normal, single:  1.1754944E-38 in hex 00800000

A.2 数学库

本节显示使用数学库中函数的示例。

A.2.1 随机数字生成器

以下示例调用随机数字生成器以生成数字数组,并使用计时函数测量计算指定数字的 EXP 所用的时间:

示例 A-3  随机数字生成器
#ifdef DP
#define GENERIC double precision
#else
#define GENERIC real
#endif
#define SIZE 400000

 program example
c
 implicit GENERIC (a-h,o-z)
 GENERIC x(SIZE), y, lb, ub
 real tarray(2), u1, u2
c
c compute EXP on random numbers in [-ln2/2,ln2/2]
 lb = -0.3465735903
 ub = 0.3465735903
c
c generate array of random numbers
#ifdef DP
 call d_init_addrans()
 call d_addrans(x,SIZE,lb,ub)
#else
 call r_init_addrans()
 call r_addrans(x,SIZE,lb,ub)
#endif
c
c start the clock
 call dtime(tarray)
 u1 = tarray(1)
c
c compute exponentials
 do 16 i=1,SIZE
  y = exp(x(i))
 16 continue
c
c get the elapsed time
 call dtime(tarray)
 u2 = tarray(1)
 print *,'time used by EXP is ',u2-u1
 print *,'last values for x and exp(x) are ',x(SIZE),y
c
 call flush(6)
 end

要编译上述示例,请将源代码放在后缀为 F(不是 f)的文件中,这样编译器将自动调用预处理程序,然后在命令行上指定 –DSP–DDP 以选择单精度或双精度。

此示例显示如何使用 d_addrans 函数来生成均匀分布在用户指定范围中的随机数据块:

示例 A-4  使用 d_addrans 函数
/*
 * test SIZE*LOOPS random arguments to sin in the range
 * [0, threshold] where
 * threshold = 3E30000000000000 (3.72529029846191406e-09)
 */

#include <math.h>
#include <sunmath.h>
#define SIZE 10000
#define LOOPS 100
int main()
{
 double  x[SIZE], y[SIZE];
 int  i, j, n;
 double  lb, ub;
 union {
  unsigned    u[2];
  double    d;
 }  upperbound;

 upperbound.u[0] = 0x3e300000;
 upperbound.u[1] = 0x00000000;

 /* initialize the random number generator */
 d_init_addrans_();

 /* test (SIZE * LOOPS) arguments to sin */
 for (j = 0; j < LOOPS; j++) {

   /*
   * generate a vector, x, of length SIZE, 
   * of random numbers to use as
   * input to the trig functions.
   */
   n = SIZE;
   ub = upperbound.d;
   lb = 0.0;

   d_addrans_(x, &n, &lb, &ub);

   for (i = 0; i < n; i++)
     y[i] = sin(x[i]);

   /* is sin(x) == x?  It ought to, for tiny x. */
   for (i = 0; i < n; i++)
    if (x[i] != y[i])
      printf(
      " OOPS: %d sin(%18.17e)=%18.17e \n",
      i, x[i], y[i]);
 }
 printf(" comparison ended; no differences\n");
 ieee_retrospective_();
 return 0;
}

A.2.2 IEEE 推荐函数

以下 Fortran 示例使用 IEEE 标准推荐的一些函数:

示例 A-5  IEEE 推荐函数
c
c Demonstrate how to call 5 of the more interesting IEEE 
c recommended functions from Fortran. These are implemented 
c with "bit-twiddling", and so are as efficient as you could 
c hope. The IEEE standard for floating-point arithmetic 
c doesn't require these, but recommends that they be 
c included in any IEEE programming environment.
c
c For example, to accomplish 
c  y = x * 2**n, 
c since the hardware stores numbers in base 2,
c shift the exponent by n places. 
c                

c Refer to 
c ieee_functions(3m)
c libm_double(3f)
c libm_single(3f)
c
c The 5 functions demonstrated here are:
c
c ilogb(x): returns the base 2 unbiased exponent of x in 
c   integer format
c signbit(x): returns the sign bit, 0 or 1
c copysign(x,y): returns x with y's sign bit
c nextafter(x,y): next representable number after x, in 
c   the direction y
c scalbn(x,n): x * 2**n
c
c function    double precision      single precision
c --------------------------------------------------------
c ilogb(x)     i = id_ilogb(x)      i = ir_ilogb(r)
c signbit(x)    i = id_signbit(x)      i = ir_signbit(r)
c copysign(x,y)    x = d_copysign(x,y)      r = r_copysign(r,s)
c nextafter(x,y)    z = d_nextafter(x,y)      r = r_nextafter(r,s)
c scalbn(x,n)    x = d_scalbn(x,n)      r = r_scalbn(r,n)
 program ieee_functions_demo
 implicit double precision (d)
 implicit real (r) 
 double precision   x, y, z, direction
 real               r, s, t, r_direction
 integer            i, scale

 print *
 print *, 'DOUBLE PRECISION EXAMPLES:'
 print *

 x = 32.0d0
 i = id_ilogb(x)
 write(*,1) x, i
 1 format(' The base 2 exponent of ', F4.1, ' is ', I2)

 x = -5.5d0
 y = 12.4d0
 z = d_copysign(x,y)
 write(*,2) x, y, z
 2    format(F5.1, ' was given the sign of ', F4.1,
     *   ' and is now ', F4.1)


 x = -5.5d0
 i = id_signbit(x)
 print *, 'The sign bit of ', x, ' is ', i

 x = d_min_subnormal()
 direction = -d_infinity()
 y = d_nextafter(x, direction)
 write(*,3) x
 3 format(' Starting from ', 1PE23.16E3,
     -   ', the next representable number ')
 write(*,4) direction, y
 4 format('    towards ', F4.1, ' is ', 1PE23.16E3)

 x = d_min_subnormal()
 direction = 1.0d0
    y = d_nextafter(x, direction)
 write(*,3) x
 write(*,4) direction, y
 x = 2.0d0
 scale = 3
 y = d_scalbn(x, scale)
 write (*,5) x, scale, y
 5 format(' Scaling ', F4.1, ' by 2**', I1, ' is ', F4.1)
 print *
 print *, 'SINGLE PRECISION EXAMPLES:'
 print *

 r = 32.0
 i = ir_ilogb(r)
 write (*,1) r, i

 r = -5.5
 i = ir_signbit(r)
 print *, 'The sign bit of ', r, ' is ', i

 r = -5.5
 s = 12.4
 t = r_copysign(r,s)
 write (*,2) r, s, t

 r = r_min_subnormal()
 r_direction = -r_infinity()
 s = r_nextafter(r, r_direction)
 write(*,3) r
 write(*,4) r_direction, s

 r = r_min_subnormal()
 r_direction = 1.0e0
 s = r_nextafter(r, r_direction)
 write(*,3) r
 write(*,4) r_direction, s

 r = 2.0
 scale = 3
 s = r_scalbn(r, scale)
 write (*,5) r, scale, y

 print *
 end

此程序的输出显示在以下示例中。

示例 A-6  示例 A-5 的输出
DOUBLE PRECISION EXAMPLES:

The base 2 exponent of 32.0 is  5
-5.5 was given the sign of 12.4 and is now  5.5
The sign bit of    -5.5 is   1
Starting from  4.9406564584124654E-324, the next representable
   number towards -Inf is  0.0000000000000000E+000
Starting from  4.9406564584124654E-324, the next representable
   number towards  1.0 is  9.8813129168249309E-324
Scaling  2.0 by 2**3 is 16.0

SINGLE PRECISION EXAMPLES:

The base 2 exponent of 32.0 is  5
The sign bit of    -5.5 is   1
-5.5 was given the sign of 12.4 and is now  5.5
Starting from  1.4012984643248171E-045, the next representable
   number towards -Inf is  0.0000000000000000E+000
Starting from  1.4012984643248171E-045, the next representable
   number towards  1.0 is  2.8025969286496341E-045
Scaling  2.0 by 2**3 is 16.0

如果使用 f95 编译器和 -f77 兼容性选项,将显示以下附加消息。

 Note: IEEE floating-point exception flags raised: 
    Inexact;  Underflow; 
 IEEE floating-point exception traps enabled: 
    overflow;  division by zero;  invalid operation; 
 See the Numerical Computation Guide, ieee_flags(3M), ieee_handler(3M) 

A.2.3 IEEE 特殊值

以下 C 程序调用多个 ieee_values(3m) 函数:

#include <math.h> 
#include <sunmath.h>
 
int main() 
{ 
 double       x; 
 float       r; 
 
 x = quiet_nan(0); 
 printf("quiet NaN: %.16e = %08x %08x \n", 
    x, ((int *) &x)[0], ((int *) &x)[1]); 
 
 x = nextafter(max_subnormal(), 0.0);                
 printf("nextafter(max_subnormal,0) = %.16e\n",x);
 printf("                           = %08x %08x\n", 
   ((int *) &x)[0], ((int *) &x)[1]); 
 
 r = min_subnormalf(); 
 printf("single precision min subnormal = %.8e = %08x\n",
      r, ((int *) &r)[0]); 
 
 return 0; 
}

在链接时,请记住同时指定 -lsunmath-lm

在基于 SPARC 的系统上,输出显示类似下文:

quiet NaN: NaN = 7ff80000 00000000 
nextafter(max_subnormal,0) = 2.2250738585072004e-308
 = 000fffff fffffffe
single precision min subnormal = 1.40129846e-45 = 00000001

由于 x86 体系结构是“小尾数法”,x86 上的输出略有不同,其中将保留双精度数值的十六进制表示形式的高位字和低位字:

quiet NaN: NaN = ffffffff 7fffffff 
nextafter(max_subnormal,0) = 2.2250738585072004e-308 
                           = fffffffe 000fffff
single precision min subnormal = 1.40129846e-45 = 00000001 

使用 ieee_values 函数的 Fortran 程序应该谨慎声明这些函数类型:

 program print_ieee_values
c
c the purpose of the implicit statements is to insure
c that the floating-point pseudo-instrinsic
c functions are declared with the correct type
c
 implicit real*16 (q)
 implicit double precision (d)
 implicit real (r)
 real*16 z, zero, one
 double precision    x
 real                r
c
 zero = 0.0
 one = 1.0
 z = q_nextafter(zero, one)
 x = d_infinity()
 r = r_max_normal()
c
 print *, z
 print *, x
 print *, r
c
 end

在 SPARC 上,输出显示如下:

6.475175119438025110924438958227646E-4966
Inf
3.4028235E+38

A.2.4 ieee_flags-舍入方向

以下示例演示了如何将舍入模式设置为向零取整

#include <math.h>
#include <sunmath.h>

int main()
{
 int         i;
 double      x, y;
 char        *out_1, *out_2, *dummy;

 /* get prevailing rounding direction */
 i = ieee_flags("get", "direction", "", &out_1);

 x = sqrt(.5);
 printf("With rounding direction %s, \n", out_1);
 printf("sqrt(.5) = 0x%08x 0x%08x = %16.15e\n",
        ((int *) &x)[0], ((int *) &x)[1], x);

 /* set rounding direction */
 if (ieee_flags("set", "direction", "tozero", &dummy) != 0)
   printf("Not able to change rounding direction!\n");
 i = ieee_flags("get", "direction", "", &out_2);

 x = sqrt(.5);
 /*
  * restore original rounding direction before printf, since
  * printf is also affected by the current rounding direction
  */
 if (ieee_flags("set", "direction", out_1, &dummy) != 0)
   printf("Not able to change rounding direction!\n");
 printf("\nWith rounding direction %s,\n", out_2);
 printf("sqrt(.5) = 0x%08x 0x%08x = %16.15e\n",
        ((int *) &x)[0], ((int *) &x)[1], x);

 return 0;
}

上一个简短的舍入方向程序的以下输出显示了在 SPARC 上向零取整的效果:

demo% cc rounding_direction.c -lsunmath -lm
demo% a.out 
With rounding direction nearest, 
sqrt(.5) = 0x3fe6a09e 0x667f3bcd  = 7.071067811865476e-01 

With rounding direction tozero, 
sqrt(.5) = 0x3fe6a09e 0x667f3bcc  = 7.071067811865475e-01 
demo%

上一个简短的舍入方向程序的以下输出显示了在 x86 上向零取整的效果:

demo% cc rounding_direction.c -lsunmath -lm
demo% a.out 
With rounding direction nearest, 
sqrt(.5) = 0x667f3bcd 0x3fe6a09e  = 7.071067811865476e-01 

With rounding direction tozero, 
sqrt(.5) = 0x667f3bcc 0x3fe6a09e  = 7.071067811865475e-01 
demo% 

要在 Fortran 程序中设置向零取整方向,请使用以下示例:

program ieee_flags_demo
character*16 out

i = ieee_flags('set', 'direction', 'tozero', out)
if (i.ne.0) print *, 'not able to set rounding direction'
 
i = ieee_flags('get', 'direction', '', out)
print *, 'Rounding direction is: ', out
 
end

输出如下所示:

demo% f95 ieee_flags_demo.f
demo% a.out
 Rounding direction is: tozero          

如果使用 f95 编译器和 -f77 兼容性选项编译程序,则输出中将包括以下附加消息。

demo% f95 ieee_flags_demo.f -f77
demo% a.out
 Note: Rounding direction toward zero 
 IEEE floating-point exception traps enabled: 
    overflow;  division by zero;  invalid operation; 
 See the Numerical Computation Guide, ieee_flags(3M), ieee_handler(3M) 

A.2.5 C99 浮点环境函数

下一个示例说明了如何使用多个 C99 浮点环境函数。norm 函数计算欧几里得向量范数,并且使用环境函数来处理下溢和溢出。主程序使用缩放的向量调用此函数以确保出现下溢和溢出,如追溯诊断输出所示

示例 A-7  C99 浮点环境函数
#include <stdio.h>
#include <math.h>
#include <sunmath.h>
#include <fenv.h>

/*
*  Compute the euclidean norm of the vector x avoiding
*  premature underflow or overflow
*/
double norm(int n, double *x)
{
    fenv_t  env;
    double  s, b, d, t;
    int     i, f;

    /* save the environment, clear flags, and establish nonstop
       exception handling */
    feholdexcept(&env);

    /* attempt to compute the dot product x.x */
    d = 1.0; /* scale factor */
    s = 0.0;
    for (i = 0; i < n; i++)
        s += x[i] * x[i];

    /* check for underflow or overflow */
    f = fetestexcept(FE_UNDERFLOW | FE_OVERFLOW);
    if (f & FE_OVERFLOW) {
        /* first attempt overflowed, try again scaling down */
        feclearexcept(FE_OVERFLOW);
        b = scalbn(1.0, -640);
        d = 1.0 / b;
        s = 0.0;
        for (i = 0; i < n; i++) {
            t = b * x[i];
            s += t * t;
        }
    }
    else if (f & FE_UNDERFLOW && s < scalbn(1.0, -970)) {
        /* first attempt underflowed, try again scaling up */
        b = scalbn(1.0, 1022);
        d = 1.0 / b;
        s = 0.0;
        for (i = 0; i < n; i++) {
            t = b * x[i];
            s += t * t;
        }
    }

    /* hide any underflows that have occurred so far */
    feclearexcept(FE_UNDERFLOW);

    /* restore the environment, raising any other exceptions
       that have occurred */
    feupdateenv(&env);

    /* take the square root and undo any scaling */
    return d * sqrt(s);
}

int main()
{
    double x[100], l, u;
    int    n = 100;

    fex_set_log(stdout);

    l = 0.0;
    u = min_normal();
    d_lcrans_(x, &n, &l, &u);
    printf("norm: %g\n", norm(n, x));
    l = sqrt(max_normal());
    u = l * 2.0;
    d_lcrans_(x, &n, &l, &u);
    printf("norm: %g\n", norm(n, x));

    return 0;
}

在基于 SPARC 的系统上,编译和运行此程序会生成类似下文的输出:

demo% cc norm.c -lsunmath -lm
demo% a.out
Floating point underflow at 0x000153a8 __d_lcrans_, nonstop mode
  0x000153b4  __d_lcrans_
  0x00011594  main
Floating point underflow at 0x00011244 norm, nonstop mode
  0x00011248  norm
  0x000115b4  main
norm: 1.32533e-307
Floating point overflow at 0x00011244 norm, nonstop mode
  0x00011248  norm
  0x00011660  main
norm: 2.02548e+155

以下代码示例显示 fesetprec 函数在基于 x86 的系统上的效果。此函数在基于 SPARC 的系统上不可用。while 循环通过查找在加 1 时完全舍入的 2 的最大次幂,尝试确定可用精度。显示第一个循环时,在使用扩展精度计算所有中间结果的类似于基于 x86 的系统的体系结构上,此技术并不总是按照预期工作。因此,fesetprec 函数可用于确保所有结果将舍入到所需精度,如第二个循环所示。

示例 A-8  fesetprec 函数 (x86)
#include <math.h>
#include <fenv.h>

int main()
{
    double  x;

    x = 1.0;
    while (1.0 + x != 1.0)
        x *= 0.5;
    printf("%d significant bits\n", -ilogb(x));

    fesetprec(FE_DBLPREC);
    x = 1.0;
    while (1.0 + x != 1.0)
        x *= 0.5;
    printf("%d significant bits\n", -ilogb(x));

    return 0;
}

在 x86 系统上使用 cc A8.c –lm –xarch=386 编译时将生成

64 significant bits
53 significant bit

最后,以下示例显示一种方法,在多线程程序中使用环境函数将浮点模式从父线程传播到子线程,并恢复在子线程与父线程连接时在其中引发的异常标志。有关编写多线程程序的更多信息,请参见Multithreaded Programming Guide

示例 A-9  在多线程程序中使用环境函数
#include <thread.h>
#include <fenv.h>

fenv_t  env;

void * child(void *p)
{
    /* inherit the parent's environment on entry */
    fesetenv(&env);
    ...
    /* save the child's environment before exit */
    fegetenv(&env);
}

void parent()
{
    thread_t tid;
    void *arg;
    ...
    /* save the parent's environment before creating the child */
    fegetenv(&env);
    thr_create(NULL, NULL, child, arg, NULL, &tid);
    ...
    /* join with the child */
    thr_join(tid, NULL, &arg);
    /* merge exception flags raised in the child into the
       parent's environment */
    fex_merge_flags(&env);
    ...
}

A.3 异常和异常处理

A.3.1 ieee_flags-已发生异常

通常情况下,用户程序会检查清除已发生异常位。以下示例是一个检查已发生异常标志的 C 程序。

示例 A-10  检查已发生异常标志
#include <sunmath.h>
#include <sys/ieeefp.h>

int main()
{
 int     code, inexact, division, underflow, overflow, invalid;
 double  x;
 char    *out;

 /* cause an underflow exception */
 x = max_subnormal() / 2.0;

 /* this statement insures that the previous */
 /* statement is not optimized away          */
 printf("x = %g\n",x);

 /* find out which exceptions are raised */
 code = ieee_flags("get", "exception", "", &out);

 /* decode the return value */
 inexact =      (code >> fp_inexact)     & 0x1;
 underflow =    (code >> fp_underflow)   & 0x1;
 division =     (code >> fp_division)    & 0x1;
 overflow =     (code >> fp_overflow)    & 0x1;
 invalid =      (code >> fp_invalid)     & 0x1;

      /* "out" is the raised exception with the highest priority */
 printf(" Highest priority exception is: %s\n", out);
 /* The value 1 means the exception is raised, */
 /* 0 means it isn't.                          */
 printf("%d %d %d %d %d\n", invalid, overflow, division,
  underflow, inexact);
 ieee_retrospective_();
 return 0;
}

运行上一程序的输出如下所示:

demo% a.out 
x = 1.11254e-308
 Highest priority exception is: underflow
0 0 0 1 1
 Note:IEEE floating-point exception flags raised:   
    Inexact;  Underflow; 
 See the Numerical Computation Guide, ieee_flags(3M)

从 Fortran 可以完成相同操作:

示例 A-11  检查已发生异常标志-Fortran
/*
A Fortran example that: 
    *  causes an underflow exception
    *  uses ieee_flags to determine which exceptions are raised
    *  decodes the integer value returned by ieee_flags 
    *  clears all outstanding exceptions
Remember to save this program in a file with the suffix .F, so that
the c preprocessor is invoked to bring in the header file
floatingpoint.h. 
*/
#include <floatingpoint.h> 

    program decode_accrued_exceptions
    double precision   x
    integer            accrued, inx, div, under, over, inv
    character*16       out
    double precision   d_max_subnormal
c Cause an underflow exception
    x = d_max_subnormal() / 2.0

c Find out which exceptions are raised
    accrued = ieee_flags('get', 'exception', '', out)

c Decode value returned by ieee_flags using bit-shift intrinsics
    inx   = and(rshift(accrued, fp_inexact)  , 1)
    under = and(rshift(accrued, fp_underflow), 1)
    div   = and(rshift(accrued, fp_division) , 1)
    over  = and(rshift(accrued, fp_overflow) , 1)
    inv   = and(rshift(accrued, fp_invalid)  , 1)

c The exception with the highest priority is returned in "out"
    print *, "Highest priority exception is ", out

c The value 1 means the exception is raised; 0 means it is not
    print *, inv, over, div, under, inx

c Clear all outstanding exceptions
    i = ieee_flags('clear', 'exception', 'all', out)
    end

输出如下所示:

 Highest priority exception is underflow       
   0  0  0  1  1

虽然用户程序设置异常标志是不太常见的情况,不过可以这么做。 以下 C 示例中演示了这种情况。

#include <sunmath.h>
 
int main()
{
 int     code;
 char    *out;
 
 if (ieee_flags("clear", "exception", "all", &out) != 0)
     printf("could not clear exceptions\n");
 if (ieee_flags("set", "exception", "division", &out) != 0)
     printf("could not set exception\n");
 code = ieee_flags("get", "exception", "", &out);
 printf("out is: %s , fp exception code is: %X \n",
  out, code);
 
 return 0;
}

在 SPARC 上,上一个程序的输出为:

out is: division , fp exception code is: 2 

在 x86 上,输出为:

out is: division , fp exception code is: 4

A.3.2 ieee_handler:捕获异常


注 -  以下示例仅适用于 Oracle Solaris OS。

以下 Fortran 程序安装信号处理程序以定位异常,该程序仅适用于基于 SPARC 的系统:

示例 A-12  在下溢时自陷 (SPARC)
  program demo
c declare signal handler function
 external fp_exc_hdl
 double precision d_min_normal
 double precision x
c set up signal handler
 i = ieee_handler('set', 'common', fp_exc_hdl)
 if (i.ne.0) print *, 'ieee trapping not supported here'
c cause an underflow exception (it will not be trapped)
 x = d_min_normal() / 13.0
 print *, 'd_min_normal() / 13.0 = ', x
c cause an overflow exception
c the value printed out is unrelated to the result
 x = 1.0d300
 x = x * x
 print *, '1.0d300*1.0d300 = ', x
 end
c
c the floating-point exception handling function
c
 integer function fp_exc_hdl(sig, sip, uap)
 integer sig, code, addr
 character label*16
c
c The structure /siginfo/ is a translation of siginfo_t
c from <sys/siginfo.h>
c
 structure /fault/
 integer address
 end structure
 structure /siginfo/
 integer si_signo
 integer si_code
 integer si_errno
 record /fault/ fault
 end structure

 record /siginfo/ sip
c See <sys/machsig.h> for list of FPE codes
c Figure out the name of the SIGFPE
 code = sip.si_code
 if (code.eq.3) label = 'division'
 if (code.eq.4) label = 'overflow'
 if (code.eq.5) label = 'underflow'
 if (code.eq.6) label = 'inexact'
 if (code.eq.7) label = 'invalid'
 addr = sip.fault.address
c Print information about the signal that happened
 write (*,77) code, label, addr
 77  format ('floating-point exception code ', i2, ',',
     *  a17, ',', ' at address ', z8 )
 end

使用 –f77 编译上面的代码时,输出如下所示:

 d_min_normal() / 13.0 =     1.7115952757748-309
floating-point exception code  4, overflow        , at address    1131C
 1.0d300*1.0d300 =     1.0000000000000+300
 Note: IEEE floating-point exception flags raised: 
    Inexact;  Underflow; 
 IEEE floating-point exception traps enabled:
    overflow; division by zero; invalid operation; 
 See the Numerical Computation Guide, ieee_flags(3M),
 ieee_handler(3M) 

以下为 SPARC 系统上的更复杂的 C 示例:

示例 A-13  在无效、被零除、溢出、下溢和不精确时自陷 (SPARC)
/*
 * Generate the 5 IEEE exceptions: invalid, division,
 * overflow, underflow and inexact.
 *
 * Trap on any floating point exception, print a message,
* and continue.*
 * Note that you could also inquire about raised exceptions by
*    i = ieee("get","exception","",&out);* where out contains the name of the highest exception
 * raised, and i can be decoded to find out about all the
 * exceptions raised.
 */

#include <sunmath.h>
#include <signal.h>
#include <siginfo.h>
#include <ucontext.h>

extern void trap_all_fp_exc(int sig, siginfo_t *sip,
  ucontext_t *uap);

int main()
{
 double  x, y, z;
 char  *out;

 /*
  * Use ieee_handler to establish "trap_all_fp_exc"
  * as the signal handler to use whenever any floating
  * point exception occurs.
  */

 if (ieee_handler("set", "all", trap_all_fp_exc) != 0)
  printf(" IEEE trapping not supported here.\n");
 /* disable trapping (uninteresting) inexact exceptions */
 if (ieee_handler("set", "inexact", SIGFPE_IGNORE) != 0)
   printf("Trap handler for inexact not cleared.\n");
 /* raise invalid */
 if (ieee_flags("clear", "exception", "all", &out) != 0)
   printf(" could not clear exceptions\n");
 printf("1. Invalid: signaling_nan(0) * 2.5\n");
 x = signaling_nan(0);
 y = 2.5;
 z = x * y;

 /* raise division */
 if (ieee_flags("clear", "exception", "all", &out) != 0)
  printf(" could not clear exceptions\n");
 printf("2. Div0: 1.0 / 0.0\n");
 x = 1.0;
 y = 0.0;
 z = x / y;

 /* raise overflow */
 if (ieee_flags("clear", "exception", "all", &out) != 0)
   printf(" could not clear exceptions\n");
 printf("3. Overflow: -max_normal() - 1.0e294\n");
 x = -max_normal();
 y = -1.0e294;
 z = x + y;

 /* raise underflow */
 if (ieee_flags("clear", "exception", "all", &out) != 0)
   printf(" could not clear exceptions\n");
 printf("4. Underflow: min_normal() * min_normal()\n");
 x = min_normal();
 y = x;
 z = x * y;

 /* enable trapping on inexact exception */
 if (ieee_handler("set", "inexact", trap_all_fp_exc) != 0)
   printf("Could not set trap handler for inexact.\n");
 /* raise inexact */
 if (ieee_flags("clear", "exception", "all", &out) != 0)
   printf(" could not clear exceptions\n");
 printf("5. Inexact: 2.0 / 3.0\n");
 x = 2.0;
 y = 3.0;
 z = x / y;
 /* don't trap on inexact */
 if (ieee_handler("set", "inexact", SIGFPE_IGNORE) != 0)
   printf(" could not reset inexact trap\n");

 /* check that we're not trapping on inexact anymore */
 if (ieee_flags("clear", "exception", "all", &out) != 0)
   printf(" could not clear exceptions\n");
 printf("6. Inexact trapping disabled; 2.0 / 3.0\n");
 x = 2.0;
 y = 3.0;
 z = x / y;

 /* find out if there are any outstanding exceptions */
 ieee_retrospective_();

 /* exit gracefully */
 return 0;
}

void trap_all_fp_exc(int sig, siginfo_t *sip, ucontext_t *uap) {
 char  *label = "undefined";

/* see /usr/include/sys/machsig.h for SIGFPE codes */
 switch (sip->si_code) {
 case FPE_FLTRES:
   label = "inexact";
   break;
 case FPE_FLTDIV:
   label = "division";
   break;
 case FPE_FLTUND:
   label = "underflow";
   break;
 case FPE_FLTINV:
   label = "invalid";
   break;
 case FPE_FLTOVF:
   label = "overflow";
   break;
 }

 printf(
 " signal %d, sigfpe code %d: %s exception at address %x\n",
   sig, sip->si_code, label, sip->__data.__fault.__addr);
}

输出以下类似内容:

1. Invalid: signaling_nan(0) * 2.5
   signal 8, sigfpe code 7: invalid exception at address 10da8
2. Div0: 1.0 / 0.0
   signal 8, sigfpe code 3: division exception at address 10e44
3. Overflow: -max_normal() - 1.0e294
   signal 8, sigfpe code 4: overflow exception at address 10ee8
4. Underflow: min_normal() * min_normal()
   signal 8, sigfpe code 5: underflow exception at address 10f80
5. Inexact: 2.0 / 3.0
   signal 8, sigfpe code 6: inexact exception at address 1106c
6. Inexact trapping disabled; 2.0 / 3.0
Note: IEEE floating-point exception traps enabled:  
   underflow; overflow; division by zero; invalid operation; 
See the Numerical Computation Guide, ieee_handler(3M)

以下代码显示了如何使用 ieee_handler 和 include 文件来修改 SPARC 上特定异常情况的缺省结果:

示例 A-14  修改异常情况的缺省结果
/*
 * Cause a division by zero exception and use the
 * signal handler to substitute MAXDOUBLE (or MAXFLOAT)
 * as the result.
 *
 * compile with the flag -Xa
 */
 
#include <values.h>
#include <siginfo.h>
#include <ucontext.h>

void division_handler(int sig, siginfo_t *sip, ucontext_t *uap);

int main() {
 double     x, y, z;
 float     r, s, t;
 char     *out;

 /*
  * Use ieee_handler to establish division_handler as the
  * signal handler to use for the IEEE exception division.
  */
 if (ieee_handler("set","division",division_handler)!=0) {
  printf(" IEEE trapping not supported here.\n");
 }

 /* Cause a division-by-zero exception */
 x = 1.0;
 y = 0.0;
 z = x / y;

 /*
  * Check to see that the user-supplied value, MAXDOUBLE,
  * is indeed substituted in place of the IEEE default
  * value, infinity.
  */
 printf("double precision division: %g/%g = %g \n",x,y,z);

 /* Cause a division-by-zero exception */
 r = 1.0;
 s = 0.0;
 t = r / s;

 /*
  * Check to see that the user-supplied value, MAXFLOAT,
  * is indeed substituted in place of the IEEE default
  * value, infinity.
  */
 printf("single precision division: %g/%g = %g \n",r,s,t);

 ieee_retrospective_();

 return 0;
}

void division_handler(int sig, siginfo_t *sip, ucontext_t *uap) {
 int       inst;
 unsigned       rd, mask, single_prec=0;
 float       f_val = MAXFLOAT;
 double       d_val = MAXDOUBLE;
 long       *f_val_p = (long *) &f_val;

 /* Get instruction that caused exception. */
 inst = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr;

 /*
  * Decode the destination register. Bits 29:25 encode the
  * destination register for any SPARC floating point
  * instruction.
  */
 mask = 0x1f;
 rd = (mask & (inst >> 25));

 /*
  * Is this a single precision or double precision
  * instruction?  Bits 5:6 encode the precision of the
  * opcode; if bit 5 is 1, it's sp, else, dp.
  */

 mask = 0x1;
 single_prec = (mask & (inst >> 5));

 /* put user-defined value into destination register */
 if (single_prec) {
  uap->uc_mcontext.fpregs.fpu_fr.fpu_regs[rd] =
   f_val_p[0];
 } else {
 uap->uc_mcontext.fpregs.fpu_fr.fpu_dregs[rd/2] = d_val;
 }
}

以下为预期的输出:

double precision division: 1/0 = 1.79769e+308 
single precision division: 1/0 = 3.40282e+38 
Note: IEEE floating-point exception traps enabled: 
   division by zero; 
See the Numerical Computation Guide, ieee_handler(3M)

A.3.3 ieee_handler:出现异常时中止

可以使用 ieee_handler 来强制程序在出现特定浮点异常时中止:

#include <floatingpoint.h>
 program abort
c
 ieeer = ieee_handler('set', 'division', SIGFPE_ABORT)
 if (ieeer .ne. 0) print *, ' ieee trapping not supported'
 r = 14.2
 s = 0.0
 r = r/s
c
 print *, 'you should not see this; system should abort'
c
 end 

A.3.4 libm 异常处理功能

下面的示例显示了如何使用 libm 中提供的一些异常处理功能。第一个示例基于以下任务:提供数字 x 和系数 a0a1、...、aN,以及 b0b1、...、bN-1,对函数 f(x) 及其一阶导数 f'(x) 求值,其中 f() 是连分数:

f(x) =a0 + b0 / (x + a1 + b1 / (x + .../ (x + aN-1 + bN-1 / (x + aN))...))。

在 IEEE 运算中直接计算 f():即使其中一个中间除法溢出或者被零除,该标准指定的缺省值(带正确符号的无穷大)也会得到正确结果。另一方面,计算 f'() 可能会更困难,因为对其最简单的求值方法具有可去奇点。如果计算过程遇到这些奇点之一,则会尝试对不定式 0/0、0*无穷大或无穷大/无穷大之一求值,这些不定式会引发无效运算异常。W. Kahan 建议了一种方法,使用称为预替换的功能来处理这些异常。

预替换是对 IEEE 缺省异常响应的扩展,允许用户预先指定在异常运算中要替换的结果值。在 libm 中使用异常处理工具,程序可以通过在 FEX_CUSTOM 异常处理模式下安装处理程序,方便地实现预替换功能。使用此模式时,处理程序只需将值存储在数据结构中,传递到处理程序的信息参数指向该数据结构,这样就能为异常运算的结果提供任意值。以下样例程序使用通过 FEX_CUSTOM 处理程序实现的预替换功能来计算连分数及其导数。

示例 A-15  使用 FEX_CUSTOM 处理程序计算连分数及其导数
#include <stdio.h>
#include <sunmath.h>
#include <fenv.h>
volatile double p;
void handler(int ex, fex_info_t *info)
{
    info->res.type = fex_double;
    if (ex == FEX_INV_ZMI)
        info->res.val.d = p;
    else
        info->res.val.d = infinity();
}

/*
*  Evaluate the continued fraction given by coefficients a[j] and
*  b[j] at the point x; return the function value in *pf and the
*  derivative in *pf1
*/
void continued_fraction(int N, double *a, double *b,
    double x, double *pf, double *pf1)
{
    fex_handler_t    oldhdl; /* for saving/restoring handlers */
    volatile double  t;
    double           f, f1, d, d1, q;
    int              j;

    fex_getexcepthandler(&oldhdl, FEX_DIVBYZERO | FEX_INVALID);

    fex_set_handling(FEX_DIVBYZERO, FEX_NONSTOP, NULL);
    fex_set_handling(FEX_INV_ZDZ | FEX_INV_IDI | FEX_INV_ZMI,
        FEX_CUSTOM, handler);

    f1 = 0.0;
    f = a[N];
    for (j = N - 1; j >= 0; j--) {
        d = x + f;
        d1 = 1.0 + f1;
        q = b[j] / d;
        /* the following assignment to the volatile variable t
           is needed to maintain the correct sequencing between
           assignments to p and evaluation of f1 */
        t = f1 = (-d1 / d) * q;
        p = b[j-1] * d1 / b[j];
        f = a[j] + q;
    }

    fex_setexcepthandler(&oldhdl, FEX_DIVBYZERO | FEX_INVALID);

    *pf = f;
    *pf1 = f1;
}

/* For the following coefficients, x = -3, 1, 4, and 5 will all
   encounter intermediate exceptions */
double a[] = { -1.0, 2.0, -3.0, 4.0, -5.0 };
double b[] = { 2.0, 4.0, 6.0, 8.0 };

int main()
{
    double  x, f, f1;
    int     i;

    feraiseexcept(FE_INEXACT); /* prevent logging of inexact */
    fex_set_log(stdout);
    fex_set_handling(FEX_COMMON, FEX_ABORT, NULL);
    for (i = -5; i <= 5; i++) {
        x = i;
        continued_fraction(4, a, b, x, &f, &f1);
        printf("f(% g) = %12g, f'(% g) = %12g\n", x, f, x, f1);
    }
    return 0;
}

多个有关程序的注释是按顺序排列的。在进入时,函数 continued_fraction 保存当前针对被零除和所有无效运算异常的异常处理模式。然后,它为被零除建立连续异常处理,并且为三个不定式建立 FEX_CUSTOM 处理程序。此处理程序会将 0/0 以及无穷大/无穷大替换为无穷大,而将 0*无穷大替换为全局变量 p 的值。请注意,在每次执行函数求值循环时都必须重新计算 p,以提供正确值来替换后续的 0*无穷大这一无效运算。另请注意,p 必须声明为 volatile 以防止编译器删除它,因为它在循环中的其他位置未显式涉及。最后,对于使用 p 来提供预替换值的计算过程,为了防止编译器将 p 的赋值移动到该计算过程之上或之下(这会导致异常),该计算过程的结果还将赋值给 volatile 变量(程序中的变量名为 t)。对 fex_setexcepthandler 的最终调用将恢复被零除和无效运算的原始处理模式。

主程序通过调用 fex_set_log 函数来启用对追溯诊断的日志记录。执行操作之前,它会引发不精确标志;这具有防止记录不精确异常的效果。请记住,在 FEX_NONSTOP 模式中,如果引发了其标志,则不记录异常;如回顾诊断一节中所述。主程序还会为常见异常建立 FEX_ABORT 模式,以确保任何未由 continued_fraction 处理的不常见异常将导致程序终止。最后,程序在多个不同点对特定连分数求值。如以下样例输出中所示,计算过程并未真正遇到中间异常:

f(-5) =     -1.59649,   f'(-5) =      -0.1818
f(-4) =     -1.87302,   f'(-4) =    -0.428193
Floating point division by zero at 0x08048dbe continued_fraction, nonstop mode
  0x08048dc1  continued_fraction
  0x08048eda  main
Floating point invalid operation (inf/inf) at 0x08048dcf continued_fraction, handler: handler
  0x08048dd2  continued_fraction
  0x08048eda  main
Floating point invalid operation (0*inf) at 0x08048dd2 continued_fraction, handler: handler
  0x08048dd8  continued_fraction
  0x08048eda  main
f(-3) =           -3,   f'(-3) =     -3.16667
f(-2) = -4.44089e-16,   f'(-2) =     -3.41667
f(-1) =     -1.22222,   f'(-1) =    -0.444444
f( 0) =     -1.33333,   f'( 0) =     0.203704
f( 1) =           -1,   f'( 1) =     0.333333
f( 2) =    -0.777778,   f'( 2) =      0.12037
f( 3) =    -0.714286,   f'( 3) =    0.0272109
f( 4) =    -0.666667,   f'( 4) =     0.203704
f( 5) =    -0.777778,   f'( 5) =    0.0185185

f'(x) 计算中,出现在 x = 1、4 和 5 时的异常不会生成追溯诊断消息,因为它们在程序中出现的位置与 x = –3 时出现的异常位置相同。

以上程序可能不是处理在连分数及其导数求值过程中出现的异常的最有效方法。原因之一是每次迭代循环时都必须重新计算预替换值,不论是否需要。在这种情况下,预替换值的计算涉及到浮点除法,在现代的 SPARC 和 x86 处理器上,浮点除法是相对较慢的运算。此外,循环本身涉及到两个除法,由于大部分 SPARC 和 x86 处理器无法交叠执行两个不同的除法运算,除法很可能成为循环中的瓶颈;添加另一个除法还会加剧这一瓶颈。

可以重新编写循环,这样只需要一次除法,特别是在预替换值的计算不需要涉及到除法的情况下。要以这种方式重新编写循环,用户必须预先计算 b 数组中系数的相邻元素比率。这将消除多次除法运算的瓶颈,但是不会消除预替换值计算中涉及的所有算术运算。此外,需要对预替换值进行赋值以及对预替换为 volatile 变量的运算结果进行赋值,这会带来额外的内存操作,从而降低程序运行速度。虽然对于防止编译器重新排序特定关键运算而言,这些赋值是必要的,但是也会有效地阻止编译器重新排序其他无关运算。因此,在本例中通过预替换处理异常需要额外的内存运算,并会预先排除一些在其他情况下可能可用的优化。能否更高效地处理这些异常?

在缺少特定硬件支持来实现快速预替换的情况下,处理本例中异常的最有效方法是使用标志,如以下版本所示:

示例 A-16  使用标志处理异常(续)
#include <stdio.h>
#include <math.h>
#include <fenv.h>

/*
*  Evaluate the continued fraction given by coefficients a[j] and
*  b[j] at the point x; return the function value in *pf and the
*  derivative in *pf1
*/
void continued_fraction(int N, double *a, double *b,
    double         x, double *pf, double *pf1)
{
    fex_handler_t  oldhdl;
    fexcept_t      oldinvflag;
    double         f, f1, d, d1, pd1, q;
    int            j;

    fex_getexcepthandler(&oldhdl, FEX_DIVBYZERO | FEX_INVALID);
    fegetexceptflag(&oldinvflag, FE_INVALID);

    fex_set_handling(FEX_DIVBYZERO | FEX_INV_ZDZ | FEX_INV_IDI |
        FEX_INV_ZMI, FEX_NONSTOP, NULL);
    feclearexcept(FE_INVALID);

    f1 = 0.0;
    f = a[N];
    for (j = N - 1; j >= 0; j--) {
        d = x + f;
        d1 = 1.0 + f1;
        q = b[j] / d;
        f1 = (-d1 / d) * q;
        f = a[j] + q;
    }

    if (fetestexcept(FE_INVALID)) {
        /* recompute and test for NaN */
        f1 = pd1 = 0.0;
        f = a[N];
        for (j = N - 1; j >= 0; j--) {
            d = x + f;
            d1 = 1.0 + f1;
            q = b[j] / d;
            f1 = (-d1 / d) * q;
            if (isnan(f1))
                f1 = b[j] * pd1 / b[j+1];
            pd1 = d1;
            f = a[j] + q;
        }
    }

    fesetexceptflag(&oldinvflag, FE_INVALID);
    fex_setexcepthandler(&oldhdl, FEX_DIVBYZERO | FEX_INVALID);

    *pf = f;
    *pf1 = f1;
}

在此版本中,第一个循环尝试以缺省的连续模式计算 f(x)f'(x)。如果引发了无效标志,则第二个循环将重新计算 f(x)f'(x),显式测试是否出现 NaN。通常,不会出现无效运算异常,因此程序仅执行第一个循环。此循环不引用 volatile 变量,也没有额外的算术运算,因此运行速度将实现编译器所能达到的最大速度。实现这种效率的代价是需要编写与第一个循环几乎相同的第二个循环,用于处理出现异常的情况。这一折衷是浮点异常处理所带来的典型难题。

A.3.5 在 Fortran 程序中使用 libm 异常处理

libm 中的异常处理工具主要在 C/C++ 程序中使用,不过利用 Sun Fortran 语言的互操作性功能,您也可以从 Fortran 程序调用某些 libm 函数。


注 - 如果需要一致的行为,请不要在同一个程序中同时使用 libm 异常处理函数和 ieee_flags 以及 ieee_handler 函数。

以下示例显示了使用预替换来对连分数及其导数进行求值的 Fortran 程序版本(仅限 SPARC):

示例 A-17  使用预替换对连分数及其导数进行求值 (SPARC)
c
c Presubstitution handler
c
 subroutine handler(ex, info)
 structure /fex_numeric_t/
 integer type
 union
 map
 integer i
 end map
 map
 integer*8 l
 end map
 map
 real f
 end map
 map
 real*8 d
 end map
 map
 real*16 q
 end map
 end union
 end structure
 
 structure /fex_info_t/
 integer op, flags
 record /fex_numeric_t/ op1, op2, res
 end structure
 integer ex
 record /fex_info_t/ info
 common /presub/ p
 double precision p, d_infinity
 volatile p
c 4 = fex_double; see <fenv.h> for this and other constants
 info.res.type = 4
c x'80' = FEX_INV_ZMI
 if (loc(ex) .eq. x'80') then
 info.res.d = p
 else
 info.res.d = d_infinity()
 endif
 return
 end
c
c Evaluate the continued fraction given by coefficients a(j) and
c b(j) at the point x; return the function value in f and the
c derivative in f1
c
 subroutine continued_fraction(n, a, b, x, f, f1)
 integer n
 double precision a(*), b(*), x, f, f1
 common /presub/ p
 integer j, oldhdl
 dimension oldhdl(24)
 double precision d, d1, q, p, t
 volatile p, t
 data ixff2/x'ff2'/
 data ix2/x'2'/
 data ixb0/x'b0'/
 
 external fex_getexcepthandler, fex_setexcepthandler
 external fex_set_handling, handler
c$pragma c(fex_getexcepthandler, fex_setexcepthandler)
c$pragma c(fex_set_handling)
c x'ff2' = FEX_DIVBYZERO | FEX_INVALID
 call fex_getexcepthandler(oldhdl, %val(ixff2))
c x'2' = FEX_DIVBYZERO, 0 = FEX_NONSTOP
 call fex_set_handling(%val(ix2), %val(0), %val(0))
c x'b0' = FEX_INV_ZDZ | FEX_INV_IDI | FEX_INV_ZMI, 3 = FEX_CUSTOM
 call fex_set_handling(%val(ixb0), %val(3), handler)
 f1 = 0.0d0
 f = a(n+1)
 do j = n, 1, -1
 d = x + f
 d1 = 1.0d0 + f1
 q = b(j) / d
 f1 = (-d1 / d) * q
c
c the following assignment to the volatile variable t
c is needed to maintain the correct sequencing between
c assignments to p and evaluation of f1
 t = f1
 p = b(j-1) * d1 / b(j)
 f = a(j) + q
 end do
 call fex_setexcepthandler(oldhdl, %val(ixff2))
 return
 end
 
c Main program
c
 program cf
 integer i
 double precision a, b, x, f, f1
 dimension a(5), b(4)
 data a /-1.0d0, 2.0d0, -3.0d0, 4.0d0, -5.0d0/
 data b /2.0d0, 4.0d0, 6.0d0, 8.0d0/
 data ixffa/x'ffa'/

 external fex_set_handling
c$pragma c(fex_set_handling)
c x'ffa' = FEX_COMMON, 1 = FEX_ABORT
 call fex_set_handling(%val(ixffa), %val(1), %val(0))
 do i = -5, 5
 x = dble(i)
 call continued_fraction(4, a, b, x, f, f1)
 write (*, 1) i, f, i, f1
 end do
 1  format('f(', I2, ') = ', G12.6, ', f''(', I2, ') = ', G12.6)
 end
 

此程序使用 –f77 标志编译的输出如下所示:

f(-5) = -1.59649    , f'(-5) = -.181800
f(-4) = -1.87302    , f'(-4) = -.428193
f(-3) = -3.00000    , f'(-3) = -3.16667
f(-2) = -.444089E-15, f'(-2) = -3.41667
f(-1) = -1.22222    , f'(-1) = -.444444
f( 0) = -1.33333    , f'( 0) = 0.203704
f( 1) = -1.00000    , f'( 1) = 0.333333
f( 2) = -.777778    , f'( 2) = 0.120370
f( 3) = -.714286    , f'( 3) = 0.272109E-01
f( 4) = -.666667    , f'( 4) = 0.203704
f( 5) = -.777778    , f'( 5) = 0.185185E-01
 Note: IEEE floating-point exception flags raised:
    Inexact;  Division by Zero;  Underflow; Invalid Operation;
 IEEE floating-point exception traps enabled:
    overflow;  division by zero;  invalid operation;
 See the Numerical Computation Guide, ieee_flags(3M),
ieee_handler(3M)

A.4 其他

A.4.1 sigfpe:捕获整型异常

上节中显示了使用 ieee_handler 的示例。通常而言,可以选择使用 ieee_handlersigfpe 时,推荐使用前者。


注 -  sigfpe 仅在 Oracle Solaris OS 上可用。

在要使用的处理程序是 sigfpe 时,有诸如捕获整型运算异常等实例。在基于 SPARC 的系统上,Example A–18 在整数被零除时自陷。

示例 A-18  捕获整型异常
/* Generate the integer division by zero exception */
#include <signal.h>
#include <siginfo.h>
#include <ucontext.h>
void int_handler(int sig, siginfo_t *sip, ucontext_t *uap);
int main() {
int a, b, c;
/*
* Use sigfpe(3) to establish "int_handler" as the signal handler
* to use on integer division by zero
*/
/*
* Integer division-by-zero aborts unless a signal
* handler for integer division by zero is set up
*/
sigfpe(FPE_INTDIV, int_handler);

a = 4;
b = 0;
c = a / b;
printf("%d / %d = %d\n\n", a, b, c);
return 0;
}
void int_handler(int sig, siginfo_t *sip, ucontext_t *uap) {
printf("Signal %d, code %d, at addr %x\n",
sig, sip->si_code, sip->__data.__fault.__addr);
/*
* automatically for floating-point exceptions but not for
* integer division by zero.
*/
uap->uc_mcontext.gregs[REG_PC] =
uap->uc_mcontext.gregs[REG_nPC];
}

A.4.2 从 C 调用 Fortran

以下为 C 驱动程序调用 Fortran 子例程的简单示例。 有关使用 C 和 Fortran 的更多信息,请参阅Oracle Solaris Studio 12.4:C 用户指南 Oracle Solaris Studio 12.4:Fortran 用户指南 。以下为 C 驱动程序(将其保存在名为 driver.c 的文件中):

示例 A-19  从 C 调用 Fortran
/*
 * a demo program that shows:
 * 1. how to call f95 subroutine from C, passing an array argument
 * 2. how to call single precision f95 function from C
 * 3. how to call double precision f95 function from C
 */

extern int      demo_one_(double *);
extern float    demo_two_(float *);
extern double   demo_three_(double *);

int main()
{
 double array[3][4];
 float f, g;
 double x, y;
 int i, j;

 for (i = 0; i < 3; i++)
  for (j = 0; j < 4; j++)
   array[i][j] = i + 2*j;

 g = 1.5;
 y = g;

 /* pass an array to a fortran function (print the array) */
 demo_one_(&array[0][0]);
 printf(" from the driver\n");
 for (i = 0; i < 3; i++) {
  for (j = 0; j < 4; j++)
   printf("    array[%d][%d] = %e\n",
    i, j, array[i][j]);
  printf("\n");
 }

 /* call a single precision fortran function */
 f = demo_two_(&g);
 printf(
        " f = sin(g) from a single precision fortran function\n");
 printf("    f, g: %8.7e, %8.7e\n", f, g);
 printf("\n");

 /* call a double precision fortran function */
 x = demo_three_(&y);
 printf(
  " x = sin(y) from a double precision fortran function\n");
 printf("    x, y: %18.17e, %18.17e\n", x, y);

 ieee_retrospective_();
 return 0;
}

将 Fortran 子例程保存在名为 drivee.f 的文件中:

subroutine demo_one(array)
double precision array(4,3)
print *, 'from the fortran routine:'
do 10 i =1,4
 do 20 j = 1,3
  print *, '   array[', i, '][', j, '] = ', array(i,j)
20 continue
print *
10 continue
return
end

real function demo_two(number)
real number
demo_two = sin(number)
return
end

double precision function demo_three(number)
double precision number
demo_three = sin(number)
return 
end

执行编译和链接:

cc -c driver.c
f95 -c drivee.f
 demo_one:
 demo_two:
 demo_three:
f95 -o driver driver.o drivee.o

输出结果看起来类似于下文:

 from the fortran routine:
    array[ 1 ][ 1 ] =  0.0E+0
    array[ 1 ][ 2 ] =  1.0
    array[ 1 ][ 3 ] =  2.0

    array[ 2 ][ 1 ] =  2.0
    array[ 2 ][ 2 ] =  3.0
    array[ 2 ][ 3 ] =  4.0

    array[ 3 ][ 1 ] =  4.0
    array[ 3 ][ 2 ] =  5.0
    array[ 3 ][ 3 ] =  6.0

    array[ 4 ][ 1 ] =  6.0
    array[ 4 ][ 2 ] =  7.0
    array[ 4 ][ 3 ] =  8.0

 from the driver
    array[0][0] = 0.000000e+00
    array[0][1] = 2.000000e+00
    array[0][2] = 4.000000e+00
    array[0][3] = 6.000000e+00

    array[1][0] = 1.000000e+00
    array[1][1] = 3.000000e+00
    array[1][2] = 5.000000e+00
    array[1][3] = 7.000000e+00

    array[2][0] = 2.000000e+00
    array[2][1] = 4.000000e+00
    array[2][2] = 6.000000e+00
    array[2][3] = 8.000000e+00

 f = sin(g) from a single precision fortran function
    f, g: 9.9749500e-01, 1.5000000e+00

 x = sin(y) from a double precision fortran function
    x, y: 9.97494986604054446e-01, 1.50000000000000000e+00

A.4.3 有用的调试命令

下表显示了 SPARC 体系结构调试命令的示例。

表 A-1  一些调试命令 (SPARC)
操作
dbx
adb
在函数上设置断点
stop in myfunct
myfunct:b
在行号上设置断点
stop at 29
n/a
在绝对地址上设置断点
n/a
23a8:b
在相对地址上设置断点
n/a
main+0x40:b
运行直至遇到断点
run
:r
检查源代码
list
<pc,10?ia
检查 fp 寄存器:IEEE 单精度
print $f0
<f0=X
检查 fp 寄存器:(十六进制)等值十进制
print -fx $f0
<f0=f
检查 fp 寄存器:IEEE 双精度
print $f0f1
<f0=X; <f1=X
检查 fp 寄存器:(十六进制)等值十进制
print -flx $f0f1
<f0=F
检查所有 fp 寄存器
regs -F
$x for f0-f15
$X for f16-f31
检查所有寄存器
regs
$r; $x; $X
检查 fp 状态寄存器
print -fllx $fsr
<fsr=X
将单精度 1.0 放在 f0
assign $f0=1.0
3f800000>f0
将双精度 1.0 放在 f0/f1
assign $f0f1=1.0
3ff00000>f0; 0>f1
继续执行
cont
:c
单步
step (or next)
:s
退出调试器
quit
$q

显示浮点数时,应该注意,寄存器的大小为 32 位,单精度浮点数将占据 32 位(因此可以填入一个寄存器),而双精度浮点数占据 64 位(因此使用两个寄存器来存放双精度数)。在十六进制表示形式中,32 位对应于 8 个十六进制位。以下 FPU 寄存器快照使用 adb 显示,其排列如下:

<name of fpu register> <IEEE hex value> <single precision> <double precision>

第三列存放第二列中显示的十六进制模式的单精度十进制解释。第四列解释寄存器对。例如,f11 行的第四列将 f10f11 解释为 64 位 IEEE 双精度数。

由于 f10f11 用于存放双精度值,因此在 f10 行上,将该值的第一个 32 位 7ff00000 解释为 +NaN 是没有任何意义的。将全部 64 位 7ff00000 00000000 解释为 +Infinity 就是有意义的转换。

adb 命令 $x 用于显示前 16 个浮点数据寄存器,同时还显示 fsr(浮点状态寄存器):

$x
fsr    40020
f0 400921fb      +2.1426990e+00 
f1 54442d18      +3.3702806e+12      +3.1415926535897931e+00 
f2        2      +2.8025969e-45 
f3        0      +0.0000000e+00      +4.2439915819305446e-314 
f4 40000000      +2.0000000e+00 
f5        0      +0.0000000e+00      +2.0000000000000000e+00 
f6 3de0b460      +1.0971904e-01 
f7        0      +0.0000000e+00      +1.2154188766544394e-10 
f8 3de0b460      +1.0971904e-01 
f9        0      +0.0000000e+00      +1.2154188766544394e-10 
f10 7ff00000      +NaN 
f11        0      +0.0000000e+00      +Infinity 
f12 ffffffff      -NaN 
f13 ffffffff      -NaN                 -NaN 
f14 ffffffff      -NaN 
f15 ffffffff      -NaN                 -NaN 

下表显示了 x86 体系结构调试命令的示例:

表 A-2  一些调试命令 (x86)
操作
dbx
adb
在函数上设置断点
stop in myfunct
myfunct:b
在行号上设置断点
stop at 29
n/a
在绝对地址上设置断点
n/a
23a8:b
在相对地址上设置断点
n/a
main+0x40:b
运行直至遇到断点
run
:r
检查源代码
list
<pc,10?ia
检查 fp 寄存器
print $st0
...
print $st7
$x
检查所有寄存器
refs -F
$r
检查 fp 状态寄存器
print -fx $fstat
<fstat=X
继续执行
cont
:c
单步
step (or next)
:s
退出调试器
quit
$q

以下示例显示了两种方法,用于在与 adb 中的例程 myfunction 对应的代码开头设置断点。首先可以使用:

myfunction:b

其次,您可以确定与对应于 myfunction 的代码片段开头部分相对应的绝对地址,然后在该绝对地址上设置断点:

myfunction=X 
   23a8 
23a8:b 

Fortran 程序中使用 f95 编译的主子例程在 adb 中名为 MAIN_。要在 adb 中的 MAIN_ 处设置断点,请键入:

   MAIN_:b

检查浮点寄存器的内容时,dbx 命令 regs –F 显示的十六进制值是 base-16 表示形式,而不是数字的十进制表示形式。对于基于 SPARC 的系统,adb 命令 $x$X 同时显示十六进制表示形式和十进制值。对于基于 x86 的系统,adb 命令 $x 只显示十进制值。对于基于 SPARC 的系统,双精度值显示奇数编号寄存器旁边的十进制值。

由于操作系统在进程首次使用浮点单元之前禁用浮点单元,因此在所调试的程序访问浮点寄存器之前,您无法修改浮点寄存器。

x86 上的对应输出类似于以下内容:

$x
80387 chip is present.
cw      0x137f
sw      0x3920
cssel 0x17  ipoff 0x2d93                datasel 0x1f  dataoff 0x5740
 
 st[0]  +3.24999988079071044921875 e-1            VALID
 st[1]  +5.6539133243479549034419688 e73          EMPTY
 st[2]  +2.0000000000000008881784197              EMPTY
 st[3]  +1.8073218308070440556016047 e-1          EMPTY
 st[4]  +7.9180300235748291015625 e-1             EMPTY
 st[5]  +4.201639036693904927233234 e-13          EMPTY
 st[6]  +4.201639036693904927233234 e-13          EMPTY
 st[7]  +2.7224999213218694649185636              EMPTY

注 -  对于 x86,cw 是控制字,sw 是状态字。