IEEE 标准建议:在捕获到下溢和溢出时,系统应当为陷阱处理程序提供一种方法来替换已封装指数的结果,即,该值与下溢或溢出运算的舍入结果值相一致(指数封装在其常用范围的末端这一情况除外),从而有效地按 2 的幂缩放结果。在选择比例因子时,使下溢或溢出结果尽可能接近指数范围的中间位置处的值,这样将减少以后的计算进一步出现下溢或溢出的可能性。通过跟踪所发生的下溢或溢出的数量,程序可通过缩放最终结果来补偿封装的指数。这种下溢/溢出“计数模式”可用于在计算中产生准确结果,不然,结果将超出可用浮点格式的范围。有关更多信息,请参见 P.Sterbenz 编著的《Floating-Point Computation》。
在基于 SPARC 的系统上,当浮点指令导致捕获异常时,系统会使目标寄存器保持不变。因此,为了替换已封装指数的结果,下溢/溢出处理程序必须对指令解码、检查操作数寄存器并生成缩放结果本身。以下示例显示了一个执行这三个步骤的处理程序。
示例 4-4 对基于 SPARC 的系统替换 IEEE 捕获的下溢/溢出处理程序结果#include <stdio.h>
#include <ieeefp.h>
#include <math.h>
#include <sunmath.h>
#include <siginfo.h>
#include <ucontext.h>
#ifdef V8PLUS
/* The upper 32 floating point registers are stored in an area
pointed to by uap->uc_mcontext.xrs.xrs_prt. Note that this
pointer is valid ONLY when uap->uc_mcontext.xrs.xrs_id ==
XRS_ID (defined in sys/procfs.h). */
#include <assert.h>
#include <sys/procfs.h>
#define FPxreg(x) ((prxregset_t*)uap->uc_mcontext.xrs.xrs_ptr)
->pr_un.pr_v8p.pr_xfr.pr_regs[(x)]
#endif
#define FPreg(x) uap->uc_mcontext.fpregs.fpu_fr.fpu_regs[(x)]
/*
* Supply the IEEE 754 default result for trapped under/overflow
*/
void
ieee_trapped_default(int sig, siginfo_t *sip, ucontext_t *uap)
{
unsigned instr, opf, rs1, rs2, rd;
long double qs1, qs2, qd, qscl;
double ds1, ds2, dd, dscl;
float fs1, fs2, fd, fscl;
/* get the instruction that caused the exception */
instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr;
/* extract the opcode and source and destination register
numbers */
opf = (instr >> 5) & 0x1ff;
rs1 = (instr >> 14) & 0x1f;
rs2 = instr & 0x1f;
rd = (instr >> 25) & 0x1f;
/* get the operands */
switch (opf & 3) {
case 1: /* single precision */
fs1 = *(float*)&FPreg(rs1);
fs2 = *(float*)&FPreg(rs2);
break;
case 2: /* double precision */
#ifdef V8PLUS
if (rs1 & 1)
{
assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID);
ds1 = *(double*)&FPxreg(rs1 & 0x1e);
}
else
ds1 = *(double*)&FPreg(rs1);
if (rs2 & 1)
{
assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID);
ds2 = *(double*)&FPxreg(rs2 & 0x1e);
}
else
ds2 = *(double*)&FPreg(rs2);
#else
ds1 = *(double*)&FPreg(rs1);
ds2 = *(double*)&FPreg(rs2);
#endif
break;
case 3: /* quad precision */
#ifdef V8PLUS
if (rs1 & 1)
{
assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID);
qs1 = *(long double*)&FPxreg(rs1 & 0x1e);
}
else
qs1 = *(long double*)&FPreg(rs1);
if (rs2 & 1)
{
assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID);
qs2 = *(long double*)&FPxreg(rs2 & 0x1e);
}
else
qs2 = *(long double*)&FPreg(rs2);
#else
qs1 = *(long double*)&FPreg(rs1);
qs2 = *(long double*)&FPreg(rs2);
#endif
break;
}
/* set up scale factors */
if (sip->si_code == FPE_FLTOVF) {
fscl = scalbnf(1.0f, -96);
dscl = scalbn(1.0, -768);
qscl = scalbnl(1.0, -12288);
} else {
fscl = scalbnf(1.0f, 96);
dscl = scalbn(1.0, 768);
qscl = scalbnl(1.0, 12288);
}
/* disable traps and generate the scaled result */
fpsetmask(0);
switch (opf) {
case 0x41: /* add single */
fd = fscl * (fscl * fs1 + fscl * fs2);
break;
case 0x42: /* add double */
dd = dscl * (dscl * ds1 + dscl * ds2);
break;
case 0x43: /* add quad */
qd = qscl * (qscl * qs1 + qscl * qs2);
break;
case 0x45: /* subtract single */
fd = fscl * (fscl * fs1 - fscl * fs2);
break;
case 0x46: /* subtract double */
dd = dscl * (dscl * ds1 - dscl * ds2);
break;
case 0x47: /* subtract quad */
qd = qscl * (qscl * qs1 - qscl * qs2);
break;
case 0x49: /* multiply single */
fd = (fscl * fs1) * (fscl * fs2);
break;
case 0x4a: /* multiply double */
dd = (dscl * ds1) * (dscl * ds2);
break;
case 0x4b: /* multiply quad */
qd = (qscl * qs1) * (qscl * qs2);
break;
case 0x4d: /* divide single */
fd = (fscl * fs1) / (fs2 / fscl);
break;
case 0x4e: /* divide double */
dd = (dscl * ds1) / (ds2 / dscl);
break;
case 0x4f: /* divide quad */
qd = (qscl * qs1) / (qs2 / dscl);
break;
case 0xc6: /* convert double to single */
fd = (float) (fscl * (fscl * ds1));
break;
case 0xc7: /* convert quad to single */
fd = (float) (fscl * (fscl * qs1));
break;
case 0xcb: /* convert quad to double */
dd = (double) (dscl * (dscl * qs1));
break;
}
/* store the result in the destination */
if (opf & 0x80) {
/* conversion operation */
if (opf == 0xcb) {
/* convert quad to double */
#ifdef V8PLUS
if (rd & 1)
{
assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID);
*(double*)&FPxreg(rd & 0x1e) = dd;
}
else
*(double*)&FPreg(rd) = dd;
#else
*(double*)&FPreg(rd) = dd;
#endif
} else
/* convert quad/double to single */
*(float*)&FPreg(rd) = fd;
} else {
/* arithmetic operation */
switch (opf & 3) {
case 1: /* single precision */
*(float*)&FPreg(rd) = fd;
break;
case 2: /* double precision */
#ifdef V8PLUS
if (rd & 1)
{
assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID);
*(double*)&FPxreg(rd & 0x1e) = dd;
}
else
*(double*)&FPreg(rd) = dd;
#else
*(double*)&FPreg(rd) = dd;
#endif
break;
case 3: /* quad precision */
#ifdef V8PLUS
if (rd & 1)
{
assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID);
*(long double*)&FPxreg(rd & 0x1e) = qd;
}
else
*(long double*)&FPreg(rd & 0x1e) = qd;
#else
*(long double*)&FPreg(rd & 0x1e) = qd;
#endif
break;
}
}
}
int
main()
{
volatile float a, b;
volatile double x, y;
ieee_handler("set", "underflow", ieee_trapped_default);
ieee_handler("set", "overflow", ieee_trapped_default);
a = b = 1.0e30f;
a *= b; /* overflow; will be wrapped to a moderate number */
printf( "%g\n", a );
a /= b;
printf( "%g\n", a );
a /= b; /* underflow; will wrap back */
printf( "%g\n", a );
x = y = 1.0e300;
x *= y; /* overflow; will be wrapped to a moderate number */
printf( "%g\n", x );
x /= y;
printf( "%g\n", x );
x /= y; /* underflow; will wrap back */
printf( "%g\n", x );
ieee_retrospective(stdout);
return 0;
}
在上例中,变量 a、b、x 和 y 都已被声明为 volatile,其目的仅在于防止编译器在编译时对 a * b 等求值。在典型的用法中,将不需要进行 volatile 声明。
上述程序的输出结果如下所示:
159.309
1.59309e-28
1
4.14884e+137
4.14884e-163
1
Note: IEEE floating-point exception traps enabled:
underflow; overflow;
See the Numerical Computation Guide, ieee_handler(3M)
在基于 x86 的系统上,当浮点指令导致捕获到下溢或溢出,而且它的目标是寄存器时,浮点硬件会提供已封装指数的结果。但是,当出现捕获到的有关浮点存储指令的下溢或溢出时,如果该存储指令属于存储并弹出指令,则该硬件将在不完成存储且不弹出栈的情况下进行捕获。因此,为了实现计数模式,当出现有关存储指令的陷阱时,下溢/溢出处理程序必须生成缩放结果并修复栈。Example 4–5 阐释了此类处理程序。
示例 4-5 对基于 x86 的系统替换 IEEE 捕获的下溢/溢出处理程序结果#include <stdio.h>
#include <ieeefp.h>
#include <math.h>
#include <sunmath.h>
#include <siginfo.h>
#include <ucontext.h>
/* offsets into the saved fp environment */
#define CW 0 /* control word */
#define SW 1 /* status word */
#define TW 2 /* tag word */
#define OP 4 /* opcode */
#define EA 5 /* operand address */
#define FPenv(x) uap->uc_mcontext.fpregs.fp_reg_set.
fpchip_state.state[(x)]
#define FPreg(x) *(long double *)(10*(x)+(char*)&uap->
uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[7])/*
* Supply the IEEE 754 default result for trapped under/overflow
*/
void
ieee_trapped_default(int sig, siginfo_t *sip, ucontext_t *uap)
{
double dscl;
float fscl;
unsigned sw, op, top;
int mask, e;
/* preserve flags for untrapped exceptions */
sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status;
FPenv(SW) |= (sw & (FPenv(CW) & 0x3f));
/* if the excepting instruction is a store, scale the stack
top, store it, and pop the stack if need be */
fpsetmask(0);
op = FPenv(OP) >> 16;
switch (op & 0x7f8) {
case 0x110:
case 0x118:
case 0x150:
case 0x158:
case 0x190:
case 0x198:
fscl = scalbnf(1.0f, (sip->si_code == FPE_FLTOVF)?
-96 : 96);
*(float *)FPenv(EA) = (FPreg(0) * fscl) * fscl;
if (op & 8) {
/* pop the stack */
FPreg(0) = FPreg(1);
FPreg(1) = FPreg(2);
FPreg(2) = FPreg(3);
FPreg(3) = FPreg(4);
FPreg(4) = FPreg(5);
FPreg(5) = FPreg(6);
FPreg(6) = FPreg(7);
top = (FPenv(SW) >> 10) & 0xe;
FPenv(TW) |= (3 << top);
top = (top + 2) & 0xe;
FPenv(SW) = (FPenv(SW) & ~0x3800) | (top << 10);
}
break;
case 0x510:
case 0x518:
case 0x550:
case 0x558:
case 0x590:
case 0x598:
dscl = scalbn(1.0, (sip->si_code == FPE_FLTOVF)?
-768 : 768);
*(double *)FPenv(EA) = (FPreg(0) * dscl) * dscl;
if (op & 8) {
/* pop the stack */
FPreg(0) = FPreg(1);
FPreg(1) = FPreg(2);
FPreg(2) = FPreg(3);
FPreg(3) = FPreg(4);
FPreg(4) = FPreg(5);
FPreg(5) = FPreg(6);
FPreg(6) = FPreg(7);
top = (FPenv(SW) >> 10) & 0xe;
FPenv(TW) |= (3 << top);
top = (top + 2) & 0xe;
FPenv(SW) = (FPenv(SW) & ~0x3800) | (top << 10);
}
break;
}
}
int main()
{
volatile float a, b;
volatile double x, y;
ieee_handler("set", "underflow", ieee_trapped_default);
ieee_handler("set", "overflow", ieee_trapped_default);
a = b = 1.0e30f;
a *= b;
printf( "%g\n", a );
a /= b;
printf( "%g\n", a );
a /= b;
printf( "%g\n", a );
x = y = 1.0e300;
x *= y;
printf( "%g\n", x );
x /= y;
printf( "%g\n", x );
x /= y;
printf( "%g\n", x );
ieee_retrospective(stdout);
return 0;
}
正如在基于 SPARC 的系统上并使用 –xarch=386 进行编译一样,上述程序在 x86 上的输出结果是:
159.309
1.59309e-28
1
4.14884e+137
4.14884e-163
1
Note: IEEE floating-point exception traps enabled:
underflow; overflow;
See the Numerical Computation Guide, ieee_handler(3M)
C/C++ 程序可以使用 libm 中的 fex_set_handling 函数来为下溢和溢出安装 FEX_CUSTOM 处理程序。在基于 SPARC 的系统上,提供给类似处理程序的信息总是包括导致了异常的运算以及操作数,这些信息足以允许该处理程序计算 IEEE 已封装指数的结果,如上所示。在基于 x86 的系统上,可用信息并不总是指出导致了异常的特定运算;例如,当异常由某个超越指令引发时,info->op 参数将设置为 fex_other。(有关定义,请参见 fenv.h 文件。)而且,x86 硬件自动提供已封装指数的结果,如果异常指令的目标是浮点寄存器,这将覆盖某个操作数。
幸运的是,fex_set_handling 功能为在 FEX_CUSTOM 模式下安装的处理程序提供了一种简单的方法,用来替换下溢或溢出运算的 IEEE 已封装指数的结果。当捕获到其中的任一异常时,该处理程序会进行如下设置:
info->res.type = fex_nodata;
以指出应当提供已封装指数的结果。下面的示例显示了此类处理程序:
#include <stdio.h>
#include <fenv.h>
void handler(int ex, fex_info_t *info) {
info->res.type = fex_nodata;
}
int main()
{
volatile float a, b;
volatile double x, y;
fex_set_log(stderr);
fex_set_handling(FEX_UNDERFLOW | FEX_OVERFLOW, FEX_CUSTOM,
handler);
a = b = 1.0e30f;
a *= b; /* overflow; will be wrapped to a moderate number */
printf("%g\n", a);
a /= b;
printf("%g\n", a);
a /= b; /* underflow; will wrap back */
printf("%g\n", a);
x = y = 1.0e300;
x *= y; /* overflow; will be wrapped to a moderate number */
printf("%g\n", x);
x /= y;
printf("%g\n", x);
x /= y; /* underflow; will wrap back */
printf("%g\n", x);
return 0;
}
上述程序的输出结果类似于如下内容:
Floating point overflow at 0x00010924 main, handler: handler 0x00010928 main 159.309 1.59309e-28 Floating point underflow at 0x00010994 main, handler: handler 0x00010998 main 1 Floating point overflow at 0x000109e4 main, handler: handler 0x000109e8 main 4.14884e+137 4.14884e-163 Floating point underflow at 0x00010a4c main, handler: handler 0x00010a50 main 1
上述程序示例在 SPARC、–xarch=sse2 的 x86 以及 –xarch=386 的 x86 上有效。