IEEE 标准建议:在捕获到下溢和溢出时,系统应当为陷阱处理程序提供一种方法来替换已封装指数的结果,即,该值与下溢或溢出运算的舍入结果值相一致(指数封装在其常用范围的末端这一情况除外),从而有效地按 2 的幂缩放结果。在选择比例因子时,使下溢或溢出结果尽可能接近指数范围的中间位置处的值,这样将减少以后的计算进一步出现下溢或溢出的可能性。通过跟踪所发生的下溢或溢出的数量,程序可通过缩放最终结果来补偿封装的指数。这种下溢/溢出“计数模式”可用于在计算中产生准确结果,不然,结果将超出可用浮点格式的范围。有关更多信息,请参见 P.Sterbenz 编著的《Floating-Point Computation》。
在基于 SPARC 的系统上,当浮点指令导致捕获异常时,系统会使目标寄存器保持不变。因此,为了替换已封装指数的结果,下溢/溢出处理程序必须对指令解码、检查操作数寄存器并生成缩放结果本身。以下示例显示了一个执行这三个步骤的处理程序。
示例 4-4 对基于 SPARC 的系统替换 IEEE 捕获的下溢/溢出处理程序结果#include <stdio.h> #include <ieeefp.h> #include <math.h> #include <sunmath.h> #include <siginfo.h> #include <ucontext.h> #ifdef V8PLUS /* The upper 32 floating point registers are stored in an area pointed to by uap->uc_mcontext.xrs.xrs_prt. Note that this pointer is valid ONLY when uap->uc_mcontext.xrs.xrs_id == XRS_ID (defined in sys/procfs.h). */ #include <assert.h> #include <sys/procfs.h> #define FPxreg(x) ((prxregset_t*)uap->uc_mcontext.xrs.xrs_ptr) ->pr_un.pr_v8p.pr_xfr.pr_regs[(x)] #endif #define FPreg(x) uap->uc_mcontext.fpregs.fpu_fr.fpu_regs[(x)] /* * Supply the IEEE 754 default result for trapped under/overflow */ void ieee_trapped_default(int sig, siginfo_t *sip, ucontext_t *uap) { unsigned instr, opf, rs1, rs2, rd; long double qs1, qs2, qd, qscl; double ds1, ds2, dd, dscl; float fs1, fs2, fd, fscl; /* get the instruction that caused the exception */ instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr; /* extract the opcode and source and destination register numbers */ opf = (instr >> 5) & 0x1ff; rs1 = (instr >> 14) & 0x1f; rs2 = instr & 0x1f; rd = (instr >> 25) & 0x1f; /* get the operands */ switch (opf & 3) { case 1: /* single precision */ fs1 = *(float*)&FPreg(rs1); fs2 = *(float*)&FPreg(rs2); break; case 2: /* double precision */ #ifdef V8PLUS if (rs1 & 1) { assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID); ds1 = *(double*)&FPxreg(rs1 & 0x1e); } else ds1 = *(double*)&FPreg(rs1); if (rs2 & 1) { assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID); ds2 = *(double*)&FPxreg(rs2 & 0x1e); } else ds2 = *(double*)&FPreg(rs2); #else ds1 = *(double*)&FPreg(rs1); ds2 = *(double*)&FPreg(rs2); #endif break; case 3: /* quad precision */ #ifdef V8PLUS if (rs1 & 1) { assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID); qs1 = *(long double*)&FPxreg(rs1 & 0x1e); } else qs1 = *(long double*)&FPreg(rs1); if (rs2 & 1) { assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID); qs2 = *(long double*)&FPxreg(rs2 & 0x1e); } else qs2 = *(long double*)&FPreg(rs2); #else qs1 = *(long double*)&FPreg(rs1); qs2 = *(long double*)&FPreg(rs2); #endif break; } /* set up scale factors */ if (sip->si_code == FPE_FLTOVF) { fscl = scalbnf(1.0f, -96); dscl = scalbn(1.0, -768); qscl = scalbnl(1.0, -12288); } else { fscl = scalbnf(1.0f, 96); dscl = scalbn(1.0, 768); qscl = scalbnl(1.0, 12288); } /* disable traps and generate the scaled result */ fpsetmask(0); switch (opf) { case 0x41: /* add single */ fd = fscl * (fscl * fs1 + fscl * fs2); break; case 0x42: /* add double */ dd = dscl * (dscl * ds1 + dscl * ds2); break; case 0x43: /* add quad */ qd = qscl * (qscl * qs1 + qscl * qs2); break; case 0x45: /* subtract single */ fd = fscl * (fscl * fs1 - fscl * fs2); break; case 0x46: /* subtract double */ dd = dscl * (dscl * ds1 - dscl * ds2); break; case 0x47: /* subtract quad */ qd = qscl * (qscl * qs1 - qscl * qs2); break; case 0x49: /* multiply single */ fd = (fscl * fs1) * (fscl * fs2); break; case 0x4a: /* multiply double */ dd = (dscl * ds1) * (dscl * ds2); break; case 0x4b: /* multiply quad */ qd = (qscl * qs1) * (qscl * qs2); break; case 0x4d: /* divide single */ fd = (fscl * fs1) / (fs2 / fscl); break; case 0x4e: /* divide double */ dd = (dscl * ds1) / (ds2 / dscl); break; case 0x4f: /* divide quad */ qd = (qscl * qs1) / (qs2 / dscl); break; case 0xc6: /* convert double to single */ fd = (float) (fscl * (fscl * ds1)); break; case 0xc7: /* convert quad to single */ fd = (float) (fscl * (fscl * qs1)); break; case 0xcb: /* convert quad to double */ dd = (double) (dscl * (dscl * qs1)); break; } /* store the result in the destination */ if (opf & 0x80) { /* conversion operation */ if (opf == 0xcb) { /* convert quad to double */ #ifdef V8PLUS if (rd & 1) { assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID); *(double*)&FPxreg(rd & 0x1e) = dd; } else *(double*)&FPreg(rd) = dd; #else *(double*)&FPreg(rd) = dd; #endif } else /* convert quad/double to single */ *(float*)&FPreg(rd) = fd; } else { /* arithmetic operation */ switch (opf & 3) { case 1: /* single precision */ *(float*)&FPreg(rd) = fd; break; case 2: /* double precision */ #ifdef V8PLUS if (rd & 1) { assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID); *(double*)&FPxreg(rd & 0x1e) = dd; } else *(double*)&FPreg(rd) = dd; #else *(double*)&FPreg(rd) = dd; #endif break; case 3: /* quad precision */ #ifdef V8PLUS if (rd & 1) { assert(uap->uc_mcontext.xrs.xrs_id == XRS_ID); *(long double*)&FPxreg(rd & 0x1e) = qd; } else *(long double*)&FPreg(rd & 0x1e) = qd; #else *(long double*)&FPreg(rd & 0x1e) = qd; #endif break; } } } int main() { volatile float a, b; volatile double x, y; ieee_handler("set", "underflow", ieee_trapped_default); ieee_handler("set", "overflow", ieee_trapped_default); a = b = 1.0e30f; a *= b; /* overflow; will be wrapped to a moderate number */ printf( "%g\n", a ); a /= b; printf( "%g\n", a ); a /= b; /* underflow; will wrap back */ printf( "%g\n", a ); x = y = 1.0e300; x *= y; /* overflow; will be wrapped to a moderate number */ printf( "%g\n", x ); x /= y; printf( "%g\n", x ); x /= y; /* underflow; will wrap back */ printf( "%g\n", x ); ieee_retrospective(stdout); return 0; }
在上例中,变量 a、b、x 和 y 都已被声明为 volatile,其目的仅在于防止编译器在编译时对 a * b 等求值。在典型的用法中,将不需要进行 volatile 声明。
上述程序的输出结果如下所示:
159.309 1.59309e-28 1 4.14884e+137 4.14884e-163 1 Note: IEEE floating-point exception traps enabled: underflow; overflow; See the Numerical Computation Guide, ieee_handler(3M)
在基于 x86 的系统上,当浮点指令导致捕获到下溢或溢出,而且它的目标是寄存器时,浮点硬件会提供已封装指数的结果。但是,当出现捕获到的有关浮点存储指令的下溢或溢出时,如果该存储指令属于存储并弹出指令,则该硬件将在不完成存储且不弹出栈的情况下进行捕获。因此,为了实现计数模式,当出现有关存储指令的陷阱时,下溢/溢出处理程序必须生成缩放结果并修复栈。Example 4–5 阐释了此类处理程序。
示例 4-5 对基于 x86 的系统替换 IEEE 捕获的下溢/溢出处理程序结果#include <stdio.h> #include <ieeefp.h> #include <math.h> #include <sunmath.h> #include <siginfo.h> #include <ucontext.h> /* offsets into the saved fp environment */ #define CW 0 /* control word */ #define SW 1 /* status word */ #define TW 2 /* tag word */ #define OP 4 /* opcode */ #define EA 5 /* operand address */ #define FPenv(x) uap->uc_mcontext.fpregs.fp_reg_set. fpchip_state.state[(x)] #define FPreg(x) *(long double *)(10*(x)+(char*)&uap-> uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[7])/* * Supply the IEEE 754 default result for trapped under/overflow */ void ieee_trapped_default(int sig, siginfo_t *sip, ucontext_t *uap) { double dscl; float fscl; unsigned sw, op, top; int mask, e; /* preserve flags for untrapped exceptions */ sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status; FPenv(SW) |= (sw & (FPenv(CW) & 0x3f)); /* if the excepting instruction is a store, scale the stack top, store it, and pop the stack if need be */ fpsetmask(0); op = FPenv(OP) >> 16; switch (op & 0x7f8) { case 0x110: case 0x118: case 0x150: case 0x158: case 0x190: case 0x198: fscl = scalbnf(1.0f, (sip->si_code == FPE_FLTOVF)? -96 : 96); *(float *)FPenv(EA) = (FPreg(0) * fscl) * fscl; if (op & 8) { /* pop the stack */ FPreg(0) = FPreg(1); FPreg(1) = FPreg(2); FPreg(2) = FPreg(3); FPreg(3) = FPreg(4); FPreg(4) = FPreg(5); FPreg(5) = FPreg(6); FPreg(6) = FPreg(7); top = (FPenv(SW) >> 10) & 0xe; FPenv(TW) |= (3 << top); top = (top + 2) & 0xe; FPenv(SW) = (FPenv(SW) & ~0x3800) | (top << 10); } break; case 0x510: case 0x518: case 0x550: case 0x558: case 0x590: case 0x598: dscl = scalbn(1.0, (sip->si_code == FPE_FLTOVF)? -768 : 768); *(double *)FPenv(EA) = (FPreg(0) * dscl) * dscl; if (op & 8) { /* pop the stack */ FPreg(0) = FPreg(1); FPreg(1) = FPreg(2); FPreg(2) = FPreg(3); FPreg(3) = FPreg(4); FPreg(4) = FPreg(5); FPreg(5) = FPreg(6); FPreg(6) = FPreg(7); top = (FPenv(SW) >> 10) & 0xe; FPenv(TW) |= (3 << top); top = (top + 2) & 0xe; FPenv(SW) = (FPenv(SW) & ~0x3800) | (top << 10); } break; } } int main() { volatile float a, b; volatile double x, y; ieee_handler("set", "underflow", ieee_trapped_default); ieee_handler("set", "overflow", ieee_trapped_default); a = b = 1.0e30f; a *= b; printf( "%g\n", a ); a /= b; printf( "%g\n", a ); a /= b; printf( "%g\n", a ); x = y = 1.0e300; x *= y; printf( "%g\n", x ); x /= y; printf( "%g\n", x ); x /= y; printf( "%g\n", x ); ieee_retrospective(stdout); return 0; }
正如在基于 SPARC 的系统上并使用 –xarch=386 进行编译一样,上述程序在 x86 上的输出结果是:
159.309 1.59309e-28 1 4.14884e+137 4.14884e-163 1 Note: IEEE floating-point exception traps enabled: underflow; overflow; See the Numerical Computation Guide, ieee_handler(3M)
C/C++ 程序可以使用 libm 中的 fex_set_handling 函数来为下溢和溢出安装 FEX_CUSTOM 处理程序。在基于 SPARC 的系统上,提供给类似处理程序的信息总是包括导致了异常的运算以及操作数,这些信息足以允许该处理程序计算 IEEE 已封装指数的结果,如上所示。在基于 x86 的系统上,可用信息并不总是指出导致了异常的特定运算;例如,当异常由某个超越指令引发时,info->op 参数将设置为 fex_other。(有关定义,请参见 fenv.h 文件。)而且,x86 硬件自动提供已封装指数的结果,如果异常指令的目标是浮点寄存器,这将覆盖某个操作数。
幸运的是,fex_set_handling 功能为在 FEX_CUSTOM 模式下安装的处理程序提供了一种简单的方法,用来替换下溢或溢出运算的 IEEE 已封装指数的结果。当捕获到其中的任一异常时,该处理程序会进行如下设置:
info->res.type = fex_nodata;
以指出应当提供已封装指数的结果。下面的示例显示了此类处理程序:
#include <stdio.h> #include <fenv.h> void handler(int ex, fex_info_t *info) { info->res.type = fex_nodata; } int main() { volatile float a, b; volatile double x, y; fex_set_log(stderr); fex_set_handling(FEX_UNDERFLOW | FEX_OVERFLOW, FEX_CUSTOM, handler); a = b = 1.0e30f; a *= b; /* overflow; will be wrapped to a moderate number */ printf("%g\n", a); a /= b; printf("%g\n", a); a /= b; /* underflow; will wrap back */ printf("%g\n", a); x = y = 1.0e300; x *= y; /* overflow; will be wrapped to a moderate number */ printf("%g\n", x); x /= y; printf("%g\n", x); x /= y; /* underflow; will wrap back */ printf("%g\n", x); return 0; }
上述程序的输出结果类似于如下内容:
Floating point overflow at 0x00010924 main, handler: handler 0x00010928 main 159.309 1.59309e-28 Floating point underflow at 0x00010994 main, handler: handler 0x00010998 main 1 Floating point overflow at 0x000109e4 main, handler: handler 0x000109e8 main 4.14884e+137 4.14884e-163 Floating point underflow at 0x00010a4c main, handler: handler 0x00010a50 main 1
上述程序示例在 SPARC、–xarch=sse2 的 x86 以及 –xarch=386 的 x86 上有效。