diff options
author | Yunhong Jiang <yunhong.jiang@intel.com> | 2015-08-04 12:17:53 -0700 |
---|---|---|
committer | Yunhong Jiang <yunhong.jiang@intel.com> | 2015-08-04 15:44:42 -0700 |
commit | 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch) | |
tree | 1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/arch/parisc/math-emu | |
parent | 98260f3884f4a202f9ca5eabed40b1354c489b29 (diff) |
Add the rt linux 4.1.3-rt3 as base
Import the rt linux 4.1.3-rt3 as OPNFV kvm base.
It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and
the base is:
commit 0917f823c59692d751951bf5ea699a2d1e2f26a2
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat Jul 25 12:13:34 2015 +0200
Prepare v4.1.3-rt3
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
We lose all the git history this way and it's not good. We
should apply another opnfv project repo in future.
Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Diffstat (limited to 'kernel/arch/parisc/math-emu')
37 files changed, 15183 insertions, 0 deletions
diff --git a/kernel/arch/parisc/math-emu/Makefile b/kernel/arch/parisc/math-emu/Makefile new file mode 100644 index 000000000..0bd63b08a --- /dev/null +++ b/kernel/arch/parisc/math-emu/Makefile @@ -0,0 +1,19 @@ +# +# Makefile for the linux/parisc floating point code +# + +# See arch/parisc/math-emu/README +ccflags-y := -Wno-parentheses -Wno-implicit-function-declaration \ + -Wno-uninitialized -Wno-strict-prototypes -Wno-return-type \ + -Wno-implicit-int + +obj-y := frnd.o driver.o decode_exc.o fpudispatch.o denormal.o \ + dfmpy.o sfmpy.o sfsqrt.o dfsqrt.o dfadd.o fmpyfadd.o \ + sfadd.o dfsub.o sfsub.o fcnvfxt.o fcnvff.o fcnvxf.o \ + fcnvfx.o fcnvuf.o fcnvfu.o fcnvfut.o dfdiv.o sfdiv.o \ + dfrem.o sfrem.o dfcmp.o sfcmp.o + +# Math emulation code beyond the FRND is required for 712/80i and +# other very old or stripped-down PA-RISC CPUs -- not currently supported + +obj-$(CONFIG_MATH_EMULATION) += unimplemented-math-emulation.o diff --git a/kernel/arch/parisc/math-emu/README b/kernel/arch/parisc/math-emu/README new file mode 100644 index 000000000..1a0124ef0 --- /dev/null +++ b/kernel/arch/parisc/math-emu/README @@ -0,0 +1,11 @@ +All files except driver.c are snapshots from the HP-UX kernel. They've +been modified as little as possible. Even though they don't fit the +Linux coding style, please leave them in their funny format just in case +someone in the future, with access to HP-UX source code, is generous +enough to update our copies with later changes from HP-UX -- it'll +make their 'diff' job easier if our code is relatively unmodified. + +Required Disclaimer: Hewlett-Packard makes no implied or expressed +warranties about this code nor any promises to maintain or test it +in any way. This copy of this snapshot is no longer the property +of Hewlett-Packard. diff --git a/kernel/arch/parisc/math-emu/cnv_float.h b/kernel/arch/parisc/math-emu/cnv_float.h new file mode 100644 index 000000000..933423fa5 --- /dev/null +++ b/kernel/arch/parisc/math-emu/cnv_float.h @@ -0,0 +1,376 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifdef __NO_PA_HDRS + PA header file -- do not include this header file for non-PA builds. +#endif + +/* + * Some more constants + */ +#define SGL_FX_MAX_EXP 30 +#define DBL_FX_MAX_EXP 62 +#define QUAD_FX_MAX_EXP 126 + +#define Dintp1(object) (object) +#define Dintp2(object) (object) + +#define Duintp1(object) (object) +#define Duintp2(object) (object) + +#define Qintp0(object) (object) +#define Qintp1(object) (object) +#define Qintp2(object) (object) +#define Qintp3(object) (object) + + +/* + * These macros will be used specifically by the convert instructions. + * + * + * Single format macros + */ + +#define Sgl_to_dbl_exponent(src_exponent,dest) \ + Deposit_dexponent(dest,src_exponent+(DBL_BIAS-SGL_BIAS)) + +#define Sgl_to_dbl_mantissa(src_mantissa,destA,destB) \ + Deposit_dmantissap1(destA,src_mantissa>>3); \ + Dmantissap2(destB) = src_mantissa << 29 + +#define Sgl_isinexact_to_fix(sgl_value,exponent) \ + ((exponent < (SGL_P - 1)) ? \ + (Sall(sgl_value) << (SGL_EXP_LENGTH + 1 + exponent)) : FALSE) + +#define Int_isinexact_to_sgl(int_value) (int_value << 33 - SGL_EXP_LENGTH) + +#define Sgl_roundnearest_from_int(int_value,sgl_value) \ + if (int_value & 1<<(SGL_EXP_LENGTH - 2)) /* round bit */ \ + if ((int_value << 34 - SGL_EXP_LENGTH) || Slow(sgl_value)) \ + Sall(sgl_value)++ + +#define Dint_isinexact_to_sgl(dint_valueA,dint_valueB) \ + ((Dintp1(dint_valueA) << 33 - SGL_EXP_LENGTH) || Dintp2(dint_valueB)) + +#define Sgl_roundnearest_from_dint(dint_valueA,dint_valueB,sgl_value) \ + if (Dintp1(dint_valueA) & 1<<(SGL_EXP_LENGTH - 2)) \ + if ((Dintp1(dint_valueA) << 34 - SGL_EXP_LENGTH) || \ + Dintp2(dint_valueB) || Slow(sgl_value)) Sall(sgl_value)++ + +#define Dint_isinexact_to_dbl(dint_value) \ + (Dintp2(dint_value) << 33 - DBL_EXP_LENGTH) + +#define Dbl_roundnearest_from_dint(dint_opndB,dbl_opndA,dbl_opndB) \ + if (Dintp2(dint_opndB) & 1<<(DBL_EXP_LENGTH - 2)) \ + if ((Dintp2(dint_opndB) << 34 - DBL_EXP_LENGTH) || Dlowp2(dbl_opndB)) \ + if ((++Dallp2(dbl_opndB))==0) Dallp1(dbl_opndA)++ + +#define Sgl_isone_roundbit(sgl_value,exponent) \ + ((Sall(sgl_value) << (SGL_EXP_LENGTH + 1 + exponent)) >> 31) + +#define Sgl_isone_stickybit(sgl_value,exponent) \ + (exponent < (SGL_P - 2) ? \ + Sall(sgl_value) << (SGL_EXP_LENGTH + 2 + exponent) : FALSE) + + +/* + * Double format macros + */ + +#define Dbl_to_sgl_exponent(src_exponent,dest) \ + dest = src_exponent + (SGL_BIAS - DBL_BIAS) + +#define Dbl_to_sgl_mantissa(srcA,srcB,dest,inexact,guard,sticky,odd) \ + Shiftdouble(Dmantissap1(srcA),Dmantissap2(srcB),29,dest); \ + guard = Dbit3p2(srcB); \ + sticky = Dallp2(srcB)<<4; \ + inexact = guard | sticky; \ + odd = Dbit2p2(srcB) + +#define Dbl_to_sgl_denormalized(srcA,srcB,exp,dest,inexact,guard,sticky,odd,tiny) \ + Deposit_dexponent(srcA,1); \ + tiny = TRUE; \ + if (exp >= -2) { \ + if (exp == 0) { \ + inexact = Dallp2(srcB) << 3; \ + guard = inexact >> 31; \ + sticky = inexact << 1; \ + Shiftdouble(Dmantissap1(srcA),Dmantissap2(srcB),29,dest); \ + odd = dest << 31; \ + if (inexact) { \ + switch(Rounding_mode()) { \ + case ROUNDPLUS: \ + if (Dbl_iszero_sign(srcA)) { \ + dest++; \ + if (Sgl_isone_hidden(dest)) \ + tiny = FALSE; \ + dest--; \ + } \ + break; \ + case ROUNDMINUS: \ + if (Dbl_isone_sign(srcA)) { \ + dest++; \ + if (Sgl_isone_hidden(dest)) \ + tiny = FALSE; \ + dest--; \ + } \ + break; \ + case ROUNDNEAREST: \ + if (guard && (sticky || odd)) { \ + dest++; \ + if (Sgl_isone_hidden(dest)) \ + tiny = FALSE; \ + dest--; \ + } \ + break; \ + } \ + } \ + /* shift right by one to get correct result */ \ + guard = odd; \ + sticky = inexact; \ + inexact |= guard; \ + dest >>= 1; \ + Deposit_dsign(srcA,0); \ + Shiftdouble(Dallp1(srcA),Dallp2(srcB),30,dest); \ + odd = dest << 31; \ + } \ + else { \ + inexact = Dallp2(srcB) << (2 + exp); \ + guard = inexact >> 31; \ + sticky = inexact << 1; \ + Deposit_dsign(srcA,0); \ + if (exp == -2) dest = Dallp1(srcA); \ + else Variable_shift_double(Dallp1(srcA),Dallp2(srcB),30-exp,dest); \ + odd = dest << 31; \ + } \ + } \ + else { \ + Deposit_dsign(srcA,0); \ + if (exp > (1 - SGL_P)) { \ + dest = Dallp1(srcA) >> (- 2 - exp); \ + inexact = Dallp1(srcA) << (34 + exp); \ + guard = inexact >> 31; \ + sticky = (inexact << 1) | Dallp2(srcB); \ + inexact |= Dallp2(srcB); \ + odd = dest << 31; \ + } \ + else { \ + dest = 0; \ + inexact = Dallp1(srcA) | Dallp2(srcB); \ + if (exp == (1 - SGL_P)) { \ + guard = Dhidden(srcA); \ + sticky = Dmantissap1(srcA) | Dallp2(srcB); \ + } \ + else { \ + guard = 0; \ + sticky = inexact; \ + } \ + odd = 0; \ + } \ + } \ + exp = 0 + +#define Dbl_isinexact_to_fix(dbl_valueA,dbl_valueB,exponent) \ + (exponent < (DBL_P-33) ? \ + Dallp2(dbl_valueB) || Dallp1(dbl_valueA) << (DBL_EXP_LENGTH+1+exponent) : \ + (exponent < (DBL_P-1) ? Dallp2(dbl_valueB) << (exponent + (33-DBL_P)) : \ + FALSE)) + +#define Dbl_isoverflow_to_int(exponent,dbl_valueA,dbl_valueB) \ + ((exponent > SGL_FX_MAX_EXP + 1) || Dsign(dbl_valueA)==0 || \ + Dmantissap1(dbl_valueA)!=0 || (Dallp2(dbl_valueB)>>21)!=0 ) + +#define Dbl_isone_roundbit(dbl_valueA,dbl_valueB,exponent) \ + ((exponent < (DBL_P - 33) ? \ + Dallp1(dbl_valueA) >> ((30 - DBL_EXP_LENGTH) - exponent) : \ + Dallp2(dbl_valueB) >> ((DBL_P - 2) - exponent)) & 1) + +#define Dbl_isone_stickybit(dbl_valueA,dbl_valueB,exponent) \ + (exponent < (DBL_P-34) ? \ + (Dallp2(dbl_valueB) || Dallp1(dbl_valueA)<<(DBL_EXP_LENGTH+2+exponent)) : \ + (exponent<(DBL_P-2) ? (Dallp2(dbl_valueB) << (exponent + (34-DBL_P))) : \ + FALSE)) + + +/* Int macros */ + +#define Int_from_sgl_mantissa(sgl_value,exponent) \ + Sall(sgl_value) = \ + (unsigned)(Sall(sgl_value) << SGL_EXP_LENGTH)>>(31 - exponent) + +#define Int_from_dbl_mantissa(dbl_valueA,dbl_valueB,exponent) \ + Shiftdouble(Dallp1(dbl_valueA),Dallp2(dbl_valueB),22,Dallp1(dbl_valueA)); \ + if (exponent < 31) Dallp1(dbl_valueA) >>= 30 - exponent; \ + else Dallp1(dbl_valueA) <<= 1 + +#define Int_negate(int_value) int_value = -int_value + + +/* Dint macros */ + +#define Dint_from_sgl_mantissa(sgl_value,exponent,dresultA,dresultB) \ + {Sall(sgl_value) <<= SGL_EXP_LENGTH; /* left-justify */ \ + if (exponent <= 31) { \ + Dintp1(dresultA) = 0; \ + Dintp2(dresultB) = (unsigned)Sall(sgl_value) >> (31 - exponent); \ + } \ + else { \ + Dintp1(dresultA) = Sall(sgl_value) >> (63 - exponent); \ + Dintp2(dresultB) = Sall(sgl_value) << (exponent - 31); \ + }} + + +#define Dint_from_dbl_mantissa(dbl_valueA,dbl_valueB,exponent,destA,destB) \ + {if (exponent < 32) { \ + Dintp1(destA) = 0; \ + if (exponent <= 20) \ + Dintp2(destB) = Dallp1(dbl_valueA) >> 20-exponent; \ + else Variable_shift_double(Dallp1(dbl_valueA),Dallp2(dbl_valueB), \ + 52-exponent,Dintp2(destB)); \ + } \ + else { \ + if (exponent <= 52) { \ + Dintp1(destA) = Dallp1(dbl_valueA) >> 52-exponent; \ + if (exponent == 52) Dintp2(destB) = Dallp2(dbl_valueB); \ + else Variable_shift_double(Dallp1(dbl_valueA),Dallp2(dbl_valueB), \ + 52-exponent,Dintp2(destB)); \ + } \ + else { \ + Variable_shift_double(Dallp1(dbl_valueA),Dallp2(dbl_valueB), \ + 84-exponent,Dintp1(destA)); \ + Dintp2(destB) = Dallp2(dbl_valueB) << exponent-52; \ + } \ + }} + +#define Dint_setzero(dresultA,dresultB) \ + Dintp1(dresultA) = 0; \ + Dintp2(dresultB) = 0 + +#define Dint_setone_sign(dresultA,dresultB) \ + Dintp1(dresultA) = ~Dintp1(dresultA); \ + if ((Dintp2(dresultB) = -Dintp2(dresultB)) == 0) Dintp1(dresultA)++ + +#define Dint_set_minint(dresultA,dresultB) \ + Dintp1(dresultA) = (unsigned int)1<<31; \ + Dintp2(dresultB) = 0 + +#define Dint_isone_lowp2(dresultB) (Dintp2(dresultB) & 01) + +#define Dint_increment(dresultA,dresultB) \ + if ((++Dintp2(dresultB))==0) Dintp1(dresultA)++ + +#define Dint_decrement(dresultA,dresultB) \ + if ((Dintp2(dresultB)--)==0) Dintp1(dresultA)-- + +#define Dint_negate(dresultA,dresultB) \ + Dintp1(dresultA) = ~Dintp1(dresultA); \ + if ((Dintp2(dresultB) = -Dintp2(dresultB))==0) Dintp1(dresultA)++ + +#define Dint_copyfromptr(src,destA,destB) \ + Dintp1(destA) = src->wd0; \ + Dintp2(destB) = src->wd1 +#define Dint_copytoptr(srcA,srcB,dest) \ + dest->wd0 = Dintp1(srcA); \ + dest->wd1 = Dintp2(srcB) + + +/* other macros */ + +#define Find_ms_one_bit(value, position) \ + { \ + int var; \ + for (var=8; var >=1; var >>= 1) { \ + if (value >> 32 - position) \ + position -= var; \ + else position += var; \ + } \ + if ((value >> 32 - position) == 0) \ + position--; \ + else position -= 2; \ + } + + +/* + * Unsigned int macros + */ +#define Duint_copyfromptr(src,destA,destB) \ + Dint_copyfromptr(src,destA,destB) +#define Duint_copytoptr(srcA,srcB,dest) \ + Dint_copytoptr(srcA,srcB,dest) + +#define Suint_isinexact_to_sgl(int_value) \ + (int_value << 32 - SGL_EXP_LENGTH) + +#define Sgl_roundnearest_from_suint(suint_value,sgl_value) \ + if (suint_value & 1<<(SGL_EXP_LENGTH - 1)) /* round bit */ \ + if ((suint_value << 33 - SGL_EXP_LENGTH) || Slow(sgl_value)) \ + Sall(sgl_value)++ + +#define Duint_isinexact_to_sgl(duint_valueA,duint_valueB) \ + ((Duintp1(duint_valueA) << 32 - SGL_EXP_LENGTH) || Duintp2(duint_valueB)) + +#define Sgl_roundnearest_from_duint(duint_valueA,duint_valueB,sgl_value) \ + if (Duintp1(duint_valueA) & 1<<(SGL_EXP_LENGTH - 1)) \ + if ((Duintp1(duint_valueA) << 33 - SGL_EXP_LENGTH) || \ + Duintp2(duint_valueB) || Slow(sgl_value)) Sall(sgl_value)++ + +#define Duint_isinexact_to_dbl(duint_value) \ + (Duintp2(duint_value) << 32 - DBL_EXP_LENGTH) + +#define Dbl_roundnearest_from_duint(duint_opndB,dbl_opndA,dbl_opndB) \ + if (Duintp2(duint_opndB) & 1<<(DBL_EXP_LENGTH - 1)) \ + if ((Duintp2(duint_opndB) << 33 - DBL_EXP_LENGTH) || Dlowp2(dbl_opndB)) \ + if ((++Dallp2(dbl_opndB))==0) Dallp1(dbl_opndA)++ + +#define Suint_from_sgl_mantissa(src,exponent,result) \ + Sall(result) = (unsigned)(Sall(src) << SGL_EXP_LENGTH)>>(31 - exponent) + +#define Sgl_isinexact_to_unsigned(sgl_value,exponent) \ + Sgl_isinexact_to_fix(sgl_value,exponent) + +#define Duint_from_sgl_mantissa(sgl_value,exponent,dresultA,dresultB) \ + {unsigned int val = Sall(sgl_value) << SGL_EXP_LENGTH; \ + if (exponent <= 31) { \ + Dintp1(dresultA) = 0; \ + Dintp2(dresultB) = val >> (31 - exponent); \ + } \ + else { \ + Dintp1(dresultA) = val >> (63 - exponent); \ + Dintp2(dresultB) = exponent <= 62 ? val << (exponent - 31) : 0; \ + } \ + } + +#define Duint_setzero(dresultA,dresultB) \ + Dint_setzero(dresultA,dresultB) + +#define Duint_increment(dresultA,dresultB) Dint_increment(dresultA,dresultB) + +#define Duint_isone_lowp2(dresultB) Dint_isone_lowp2(dresultB) + +#define Suint_from_dbl_mantissa(srcA,srcB,exponent,dest) \ + Shiftdouble(Dallp1(srcA),Dallp2(srcB),21,dest); \ + dest = (unsigned)dest >> 31 - exponent + +#define Dbl_isinexact_to_unsigned(dbl_valueA,dbl_valueB,exponent) \ + Dbl_isinexact_to_fix(dbl_valueA,dbl_valueB,exponent) + +#define Duint_from_dbl_mantissa(dbl_valueA,dbl_valueB,exponent,destA,destB) \ + Dint_from_dbl_mantissa(dbl_valueA,dbl_valueB,exponent,destA,destB) diff --git a/kernel/arch/parisc/math-emu/dbl_float.h b/kernel/arch/parisc/math-emu/dbl_float.h new file mode 100644 index 000000000..0c2fa9a95 --- /dev/null +++ b/kernel/arch/parisc/math-emu/dbl_float.h @@ -0,0 +1,847 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifdef __NO_PA_HDRS + PA header file -- do not include this header file for non-PA builds. +#endif + +/* 32-bit word grabbing functions */ +#define Dbl_firstword(value) Dallp1(value) +#define Dbl_secondword(value) Dallp2(value) +#define Dbl_thirdword(value) dummy_location +#define Dbl_fourthword(value) dummy_location + +#define Dbl_sign(object) Dsign(object) +#define Dbl_exponent(object) Dexponent(object) +#define Dbl_signexponent(object) Dsignexponent(object) +#define Dbl_mantissap1(object) Dmantissap1(object) +#define Dbl_mantissap2(object) Dmantissap2(object) +#define Dbl_exponentmantissap1(object) Dexponentmantissap1(object) +#define Dbl_allp1(object) Dallp1(object) +#define Dbl_allp2(object) Dallp2(object) + +/* dbl_and_signs ANDs the sign bits of each argument and puts the result + * into the first argument. dbl_or_signs ors those same sign bits */ +#define Dbl_and_signs( src1dst, src2) \ + Dallp1(src1dst) = (Dallp1(src2)|~((unsigned int)1<<31)) & Dallp1(src1dst) +#define Dbl_or_signs( src1dst, src2) \ + Dallp1(src1dst) = (Dallp1(src2)&((unsigned int)1<<31)) | Dallp1(src1dst) + +/* The hidden bit is always the low bit of the exponent */ +#define Dbl_clear_exponent_set_hidden(srcdst) Deposit_dexponent(srcdst,1) +#define Dbl_clear_signexponent_set_hidden(srcdst) \ + Deposit_dsignexponent(srcdst,1) +#define Dbl_clear_sign(srcdst) Dallp1(srcdst) &= ~((unsigned int)1<<31) +#define Dbl_clear_signexponent(srcdst) \ + Dallp1(srcdst) &= Dmantissap1((unsigned int)-1) + +/* Exponent field for doubles has already been cleared and may be + * included in the shift. Here we need to generate two double width + * variable shifts. The insignificant bits can be ignored. + * MTSAR f(varamount) + * VSHD srcdst.high,srcdst.low => srcdst.low + * VSHD 0,srcdst.high => srcdst.high + * This is very difficult to model with C expressions since the shift amount + * could exceed 32. */ +/* varamount must be less than 64 */ +#define Dbl_rightshift(srcdstA, srcdstB, varamount) \ + {if((varamount) >= 32) { \ + Dallp2(srcdstB) = Dallp1(srcdstA) >> (varamount-32); \ + Dallp1(srcdstA)=0; \ + } \ + else if(varamount > 0) { \ + Variable_shift_double(Dallp1(srcdstA), Dallp2(srcdstB), \ + (varamount), Dallp2(srcdstB)); \ + Dallp1(srcdstA) >>= varamount; \ + } } +/* varamount must be less than 64 */ +#define Dbl_rightshift_exponentmantissa(srcdstA, srcdstB, varamount) \ + {if((varamount) >= 32) { \ + Dallp2(srcdstB) = Dexponentmantissap1(srcdstA) >> (varamount-32); \ + Dallp1(srcdstA) &= ((unsigned int)1<<31); /* clear expmant field */ \ + } \ + else if(varamount > 0) { \ + Variable_shift_double(Dexponentmantissap1(srcdstA), Dallp2(srcdstB), \ + (varamount), Dallp2(srcdstB)); \ + Deposit_dexponentmantissap1(srcdstA, \ + (Dexponentmantissap1(srcdstA)>>varamount)); \ + } } +/* varamount must be less than 64 */ +#define Dbl_leftshift(srcdstA, srcdstB, varamount) \ + {if((varamount) >= 32) { \ + Dallp1(srcdstA) = Dallp2(srcdstB) << (varamount-32); \ + Dallp2(srcdstB)=0; \ + } \ + else { \ + if ((varamount) > 0) { \ + Dallp1(srcdstA) = (Dallp1(srcdstA) << (varamount)) | \ + (Dallp2(srcdstB) >> (32-(varamount))); \ + Dallp2(srcdstB) <<= varamount; \ + } \ + } } +#define Dbl_leftshiftby1_withextent(lefta,leftb,right,resulta,resultb) \ + Shiftdouble(Dallp1(lefta), Dallp2(leftb), 31, Dallp1(resulta)); \ + Shiftdouble(Dallp2(leftb), Extall(right), 31, Dallp2(resultb)) + +#define Dbl_rightshiftby1_withextent(leftb,right,dst) \ + Extall(dst) = (Dallp2(leftb) << 31) | ((unsigned int)Extall(right) >> 1) | \ + Extlow(right) + +#define Dbl_arithrightshiftby1(srcdstA,srcdstB) \ + Shiftdouble(Dallp1(srcdstA),Dallp2(srcdstB),1,Dallp2(srcdstB));\ + Dallp1(srcdstA) = (int)Dallp1(srcdstA) >> 1 + +/* Sign extend the sign bit with an integer destination */ +#define Dbl_signextendedsign(value) Dsignedsign(value) + +#define Dbl_isone_hidden(dbl_value) (Is_dhidden(dbl_value)!=0) +/* Singles and doubles may include the sign and exponent fields. The + * hidden bit and the hidden overflow must be included. */ +#define Dbl_increment(dbl_valueA,dbl_valueB) \ + if( (Dallp2(dbl_valueB) += 1) == 0 ) Dallp1(dbl_valueA) += 1 +#define Dbl_increment_mantissa(dbl_valueA,dbl_valueB) \ + if( (Dmantissap2(dbl_valueB) += 1) == 0 ) \ + Deposit_dmantissap1(dbl_valueA,dbl_valueA+1) +#define Dbl_decrement(dbl_valueA,dbl_valueB) \ + if( Dallp2(dbl_valueB) == 0 ) Dallp1(dbl_valueA) -= 1; \ + Dallp2(dbl_valueB) -= 1 + +#define Dbl_isone_sign(dbl_value) (Is_dsign(dbl_value)!=0) +#define Dbl_isone_hiddenoverflow(dbl_value) (Is_dhiddenoverflow(dbl_value)!=0) +#define Dbl_isone_lowmantissap1(dbl_valueA) (Is_dlowp1(dbl_valueA)!=0) +#define Dbl_isone_lowmantissap2(dbl_valueB) (Is_dlowp2(dbl_valueB)!=0) +#define Dbl_isone_signaling(dbl_value) (Is_dsignaling(dbl_value)!=0) +#define Dbl_is_signalingnan(dbl_value) (Dsignalingnan(dbl_value)==0xfff) +#define Dbl_isnotzero(dbl_valueA,dbl_valueB) \ + (Dallp1(dbl_valueA) || Dallp2(dbl_valueB)) +#define Dbl_isnotzero_hiddenhigh7mantissa(dbl_value) \ + (Dhiddenhigh7mantissa(dbl_value)!=0) +#define Dbl_isnotzero_exponent(dbl_value) (Dexponent(dbl_value)!=0) +#define Dbl_isnotzero_mantissa(dbl_valueA,dbl_valueB) \ + (Dmantissap1(dbl_valueA) || Dmantissap2(dbl_valueB)) +#define Dbl_isnotzero_mantissap1(dbl_valueA) (Dmantissap1(dbl_valueA)!=0) +#define Dbl_isnotzero_mantissap2(dbl_valueB) (Dmantissap2(dbl_valueB)!=0) +#define Dbl_isnotzero_exponentmantissa(dbl_valueA,dbl_valueB) \ + (Dexponentmantissap1(dbl_valueA) || Dmantissap2(dbl_valueB)) +#define Dbl_isnotzero_low4p2(dbl_value) (Dlow4p2(dbl_value)!=0) +#define Dbl_iszero(dbl_valueA,dbl_valueB) (Dallp1(dbl_valueA)==0 && \ + Dallp2(dbl_valueB)==0) +#define Dbl_iszero_allp1(dbl_value) (Dallp1(dbl_value)==0) +#define Dbl_iszero_allp2(dbl_value) (Dallp2(dbl_value)==0) +#define Dbl_iszero_hidden(dbl_value) (Is_dhidden(dbl_value)==0) +#define Dbl_iszero_hiddenoverflow(dbl_value) (Is_dhiddenoverflow(dbl_value)==0) +#define Dbl_iszero_hiddenhigh3mantissa(dbl_value) \ + (Dhiddenhigh3mantissa(dbl_value)==0) +#define Dbl_iszero_hiddenhigh7mantissa(dbl_value) \ + (Dhiddenhigh7mantissa(dbl_value)==0) +#define Dbl_iszero_sign(dbl_value) (Is_dsign(dbl_value)==0) +#define Dbl_iszero_exponent(dbl_value) (Dexponent(dbl_value)==0) +#define Dbl_iszero_mantissa(dbl_valueA,dbl_valueB) \ + (Dmantissap1(dbl_valueA)==0 && Dmantissap2(dbl_valueB)==0) +#define Dbl_iszero_exponentmantissa(dbl_valueA,dbl_valueB) \ + (Dexponentmantissap1(dbl_valueA)==0 && Dmantissap2(dbl_valueB)==0) +#define Dbl_isinfinity_exponent(dbl_value) \ + (Dexponent(dbl_value)==DBL_INFINITY_EXPONENT) +#define Dbl_isnotinfinity_exponent(dbl_value) \ + (Dexponent(dbl_value)!=DBL_INFINITY_EXPONENT) +#define Dbl_isinfinity(dbl_valueA,dbl_valueB) \ + (Dexponent(dbl_valueA)==DBL_INFINITY_EXPONENT && \ + Dmantissap1(dbl_valueA)==0 && Dmantissap2(dbl_valueB)==0) +#define Dbl_isnan(dbl_valueA,dbl_valueB) \ + (Dexponent(dbl_valueA)==DBL_INFINITY_EXPONENT && \ + (Dmantissap1(dbl_valueA)!=0 || Dmantissap2(dbl_valueB)!=0)) +#define Dbl_isnotnan(dbl_valueA,dbl_valueB) \ + (Dexponent(dbl_valueA)!=DBL_INFINITY_EXPONENT || \ + (Dmantissap1(dbl_valueA)==0 && Dmantissap2(dbl_valueB)==0)) + +#define Dbl_islessthan(dbl_op1a,dbl_op1b,dbl_op2a,dbl_op2b) \ + (Dallp1(dbl_op1a) < Dallp1(dbl_op2a) || \ + (Dallp1(dbl_op1a) == Dallp1(dbl_op2a) && \ + Dallp2(dbl_op1b) < Dallp2(dbl_op2b))) +#define Dbl_isgreaterthan(dbl_op1a,dbl_op1b,dbl_op2a,dbl_op2b) \ + (Dallp1(dbl_op1a) > Dallp1(dbl_op2a) || \ + (Dallp1(dbl_op1a) == Dallp1(dbl_op2a) && \ + Dallp2(dbl_op1b) > Dallp2(dbl_op2b))) +#define Dbl_isnotlessthan(dbl_op1a,dbl_op1b,dbl_op2a,dbl_op2b) \ + (Dallp1(dbl_op1a) > Dallp1(dbl_op2a) || \ + (Dallp1(dbl_op1a) == Dallp1(dbl_op2a) && \ + Dallp2(dbl_op1b) >= Dallp2(dbl_op2b))) +#define Dbl_isnotgreaterthan(dbl_op1a,dbl_op1b,dbl_op2a,dbl_op2b) \ + (Dallp1(dbl_op1a) < Dallp1(dbl_op2a) || \ + (Dallp1(dbl_op1a) == Dallp1(dbl_op2a) && \ + Dallp2(dbl_op1b) <= Dallp2(dbl_op2b))) +#define Dbl_isequal(dbl_op1a,dbl_op1b,dbl_op2a,dbl_op2b) \ + ((Dallp1(dbl_op1a) == Dallp1(dbl_op2a)) && \ + (Dallp2(dbl_op1b) == Dallp2(dbl_op2b))) + +#define Dbl_leftshiftby8(dbl_valueA,dbl_valueB) \ + Shiftdouble(Dallp1(dbl_valueA),Dallp2(dbl_valueB),24,Dallp1(dbl_valueA)); \ + Dallp2(dbl_valueB) <<= 8 +#define Dbl_leftshiftby7(dbl_valueA,dbl_valueB) \ + Shiftdouble(Dallp1(dbl_valueA),Dallp2(dbl_valueB),25,Dallp1(dbl_valueA)); \ + Dallp2(dbl_valueB) <<= 7 +#define Dbl_leftshiftby4(dbl_valueA,dbl_valueB) \ + Shiftdouble(Dallp1(dbl_valueA),Dallp2(dbl_valueB),28,Dallp1(dbl_valueA)); \ + Dallp2(dbl_valueB) <<= 4 +#define Dbl_leftshiftby3(dbl_valueA,dbl_valueB) \ + Shiftdouble(Dallp1(dbl_valueA),Dallp2(dbl_valueB),29,Dallp1(dbl_valueA)); \ + Dallp2(dbl_valueB) <<= 3 +#define Dbl_leftshiftby2(dbl_valueA,dbl_valueB) \ + Shiftdouble(Dallp1(dbl_valueA),Dallp2(dbl_valueB),30,Dallp1(dbl_valueA)); \ + Dallp2(dbl_valueB) <<= 2 +#define Dbl_leftshiftby1(dbl_valueA,dbl_valueB) \ + Shiftdouble(Dallp1(dbl_valueA),Dallp2(dbl_valueB),31,Dallp1(dbl_valueA)); \ + Dallp2(dbl_valueB) <<= 1 + +#define Dbl_rightshiftby8(dbl_valueA,dbl_valueB) \ + Shiftdouble(Dallp1(dbl_valueA),Dallp2(dbl_valueB),8,Dallp2(dbl_valueB)); \ + Dallp1(dbl_valueA) >>= 8 +#define Dbl_rightshiftby4(dbl_valueA,dbl_valueB) \ + Shiftdouble(Dallp1(dbl_valueA),Dallp2(dbl_valueB),4,Dallp2(dbl_valueB)); \ + Dallp1(dbl_valueA) >>= 4 +#define Dbl_rightshiftby2(dbl_valueA,dbl_valueB) \ + Shiftdouble(Dallp1(dbl_valueA),Dallp2(dbl_valueB),2,Dallp2(dbl_valueB)); \ + Dallp1(dbl_valueA) >>= 2 +#define Dbl_rightshiftby1(dbl_valueA,dbl_valueB) \ + Shiftdouble(Dallp1(dbl_valueA),Dallp2(dbl_valueB),1,Dallp2(dbl_valueB)); \ + Dallp1(dbl_valueA) >>= 1 + +/* This magnitude comparison uses the signless first words and + * the regular part2 words. The comparison is graphically: + * + * 1st greater? ------------- + * | + * 1st less?-----------------+--------- + * | | + * 2nd greater or equal----->| | + * False True + */ +#define Dbl_ismagnitudeless(leftB,rightB,signlessleft,signlessright) \ + ((signlessleft <= signlessright) && \ + ( (signlessleft < signlessright) || (Dallp2(leftB)<Dallp2(rightB)) )) + +#define Dbl_copytoint_exponentmantissap1(src,dest) \ + dest = Dexponentmantissap1(src) + +/* A quiet NaN has the high mantissa bit clear and at least on other (in this + * case the adjacent bit) bit set. */ +#define Dbl_set_quiet(dbl_value) Deposit_dhigh2mantissa(dbl_value,1) +#define Dbl_set_exponent(dbl_value, exp) Deposit_dexponent(dbl_value,exp) + +#define Dbl_set_mantissa(desta,destb,valuea,valueb) \ + Deposit_dmantissap1(desta,valuea); \ + Dmantissap2(destb) = Dmantissap2(valueb) +#define Dbl_set_mantissap1(desta,valuea) \ + Deposit_dmantissap1(desta,valuea) +#define Dbl_set_mantissap2(destb,valueb) \ + Dmantissap2(destb) = Dmantissap2(valueb) + +#define Dbl_set_exponentmantissa(desta,destb,valuea,valueb) \ + Deposit_dexponentmantissap1(desta,valuea); \ + Dmantissap2(destb) = Dmantissap2(valueb) +#define Dbl_set_exponentmantissap1(dest,value) \ + Deposit_dexponentmantissap1(dest,value) + +#define Dbl_copyfromptr(src,desta,destb) \ + Dallp1(desta) = src->wd0; \ + Dallp2(destb) = src->wd1 +#define Dbl_copytoptr(srca,srcb,dest) \ + dest->wd0 = Dallp1(srca); \ + dest->wd1 = Dallp2(srcb) + +/* An infinity is represented with the max exponent and a zero mantissa */ +#define Dbl_setinfinity_exponent(dbl_value) \ + Deposit_dexponent(dbl_value,DBL_INFINITY_EXPONENT) +#define Dbl_setinfinity_exponentmantissa(dbl_valueA,dbl_valueB) \ + Deposit_dexponentmantissap1(dbl_valueA, \ + (DBL_INFINITY_EXPONENT << (32-(1+DBL_EXP_LENGTH)))); \ + Dmantissap2(dbl_valueB) = 0 +#define Dbl_setinfinitypositive(dbl_valueA,dbl_valueB) \ + Dallp1(dbl_valueA) \ + = (DBL_INFINITY_EXPONENT << (32-(1+DBL_EXP_LENGTH))); \ + Dmantissap2(dbl_valueB) = 0 +#define Dbl_setinfinitynegative(dbl_valueA,dbl_valueB) \ + Dallp1(dbl_valueA) = ((unsigned int)1<<31) | \ + (DBL_INFINITY_EXPONENT << (32-(1+DBL_EXP_LENGTH))); \ + Dmantissap2(dbl_valueB) = 0 +#define Dbl_setinfinity(dbl_valueA,dbl_valueB,sign) \ + Dallp1(dbl_valueA) = ((unsigned int)sign << 31) | \ + (DBL_INFINITY_EXPONENT << (32-(1+DBL_EXP_LENGTH))); \ + Dmantissap2(dbl_valueB) = 0 + +#define Dbl_sethigh4bits(dbl_value, extsign) Deposit_dhigh4p1(dbl_value,extsign) +#define Dbl_set_sign(dbl_value,sign) Deposit_dsign(dbl_value,sign) +#define Dbl_invert_sign(dbl_value) Deposit_dsign(dbl_value,~Dsign(dbl_value)) +#define Dbl_setone_sign(dbl_value) Deposit_dsign(dbl_value,1) +#define Dbl_setone_lowmantissap2(dbl_value) Deposit_dlowp2(dbl_value,1) +#define Dbl_setzero_sign(dbl_value) Dallp1(dbl_value) &= 0x7fffffff +#define Dbl_setzero_exponent(dbl_value) \ + Dallp1(dbl_value) &= 0x800fffff +#define Dbl_setzero_mantissa(dbl_valueA,dbl_valueB) \ + Dallp1(dbl_valueA) &= 0xfff00000; \ + Dallp2(dbl_valueB) = 0 +#define Dbl_setzero_mantissap1(dbl_value) Dallp1(dbl_value) &= 0xfff00000 +#define Dbl_setzero_mantissap2(dbl_value) Dallp2(dbl_value) = 0 +#define Dbl_setzero_exponentmantissa(dbl_valueA,dbl_valueB) \ + Dallp1(dbl_valueA) &= 0x80000000; \ + Dallp2(dbl_valueB) = 0 +#define Dbl_setzero_exponentmantissap1(dbl_valueA) \ + Dallp1(dbl_valueA) &= 0x80000000 +#define Dbl_setzero(dbl_valueA,dbl_valueB) \ + Dallp1(dbl_valueA) = 0; Dallp2(dbl_valueB) = 0 +#define Dbl_setzerop1(dbl_value) Dallp1(dbl_value) = 0 +#define Dbl_setzerop2(dbl_value) Dallp2(dbl_value) = 0 +#define Dbl_setnegativezero(dbl_value) \ + Dallp1(dbl_value) = (unsigned int)1 << 31; Dallp2(dbl_value) = 0 +#define Dbl_setnegativezerop1(dbl_value) Dallp1(dbl_value) = (unsigned int)1<<31 + +/* Use the following macro for both overflow & underflow conditions */ +#define ovfl - +#define unfl + +#define Dbl_setwrapped_exponent(dbl_value,exponent,op) \ + Deposit_dexponent(dbl_value,(exponent op DBL_WRAP)) + +#define Dbl_setlargestpositive(dbl_valueA,dbl_valueB) \ + Dallp1(dbl_valueA) = ((DBL_EMAX+DBL_BIAS) << (32-(1+DBL_EXP_LENGTH))) \ + | ((1<<(32-(1+DBL_EXP_LENGTH))) - 1 ); \ + Dallp2(dbl_valueB) = 0xFFFFFFFF +#define Dbl_setlargestnegative(dbl_valueA,dbl_valueB) \ + Dallp1(dbl_valueA) = ((DBL_EMAX+DBL_BIAS) << (32-(1+DBL_EXP_LENGTH))) \ + | ((1<<(32-(1+DBL_EXP_LENGTH))) - 1 ) \ + | ((unsigned int)1<<31); \ + Dallp2(dbl_valueB) = 0xFFFFFFFF +#define Dbl_setlargest_exponentmantissa(dbl_valueA,dbl_valueB) \ + Deposit_dexponentmantissap1(dbl_valueA, \ + (((DBL_EMAX+DBL_BIAS) << (32-(1+DBL_EXP_LENGTH))) \ + | ((1<<(32-(1+DBL_EXP_LENGTH))) - 1 ))); \ + Dallp2(dbl_valueB) = 0xFFFFFFFF + +#define Dbl_setnegativeinfinity(dbl_valueA,dbl_valueB) \ + Dallp1(dbl_valueA) = ((1<<DBL_EXP_LENGTH) | DBL_INFINITY_EXPONENT) \ + << (32-(1+DBL_EXP_LENGTH)) ; \ + Dallp2(dbl_valueB) = 0 +#define Dbl_setlargest(dbl_valueA,dbl_valueB,sign) \ + Dallp1(dbl_valueA) = ((unsigned int)sign << 31) | \ + ((DBL_EMAX+DBL_BIAS) << (32-(1+DBL_EXP_LENGTH))) | \ + ((1 << (32-(1+DBL_EXP_LENGTH))) - 1 ); \ + Dallp2(dbl_valueB) = 0xFFFFFFFF + + +/* The high bit is always zero so arithmetic or logical shifts will work. */ +#define Dbl_right_align(srcdstA,srcdstB,shift,extent) \ + if( shift >= 32 ) \ + { \ + /* Big shift requires examining the portion shift off \ + the end to properly set inexact. */ \ + if(shift < 64) \ + { \ + if(shift > 32) \ + { \ + Variable_shift_double(Dallp1(srcdstA),Dallp2(srcdstB), \ + shift-32, Extall(extent)); \ + if(Dallp2(srcdstB) << 64 - (shift)) Ext_setone_low(extent); \ + } \ + else Extall(extent) = Dallp2(srcdstB); \ + Dallp2(srcdstB) = Dallp1(srcdstA) >> (shift - 32); \ + } \ + else \ + { \ + Extall(extent) = Dallp1(srcdstA); \ + if(Dallp2(srcdstB)) Ext_setone_low(extent); \ + Dallp2(srcdstB) = 0; \ + } \ + Dallp1(srcdstA) = 0; \ + } \ + else \ + { \ + /* Small alignment is simpler. Extension is easily set. */ \ + if (shift > 0) \ + { \ + Extall(extent) = Dallp2(srcdstB) << 32 - (shift); \ + Variable_shift_double(Dallp1(srcdstA),Dallp2(srcdstB),shift, \ + Dallp2(srcdstB)); \ + Dallp1(srcdstA) >>= shift; \ + } \ + else Extall(extent) = 0; \ + } + +/* + * Here we need to shift the result right to correct for an overshift + * (due to the exponent becoming negative) during normalization. + */ +#define Dbl_fix_overshift(srcdstA,srcdstB,shift,extent) \ + Extall(extent) = Dallp2(srcdstB) << 32 - (shift); \ + Dallp2(srcdstB) = (Dallp1(srcdstA) << 32 - (shift)) | \ + (Dallp2(srcdstB) >> (shift)); \ + Dallp1(srcdstA) = Dallp1(srcdstA) >> shift + +#define Dbl_hiddenhigh3mantissa(dbl_value) Dhiddenhigh3mantissa(dbl_value) +#define Dbl_hidden(dbl_value) Dhidden(dbl_value) +#define Dbl_lowmantissap2(dbl_value) Dlowp2(dbl_value) + +/* The left argument is never smaller than the right argument */ +#define Dbl_subtract(lefta,leftb,righta,rightb,resulta,resultb) \ + if( Dallp2(rightb) > Dallp2(leftb) ) Dallp1(lefta)--; \ + Dallp2(resultb) = Dallp2(leftb) - Dallp2(rightb); \ + Dallp1(resulta) = Dallp1(lefta) - Dallp1(righta) + +/* Subtract right augmented with extension from left augmented with zeros and + * store into result and extension. */ +#define Dbl_subtract_withextension(lefta,leftb,righta,rightb,extent,resulta,resultb) \ + Dbl_subtract(lefta,leftb,righta,rightb,resulta,resultb); \ + if( (Extall(extent) = 0-Extall(extent)) ) \ + { \ + if((Dallp2(resultb)--) == 0) Dallp1(resulta)--; \ + } + +#define Dbl_addition(lefta,leftb,righta,rightb,resulta,resultb) \ + /* If the sum of the low words is less than either source, then \ + * an overflow into the next word occurred. */ \ + Dallp1(resulta) = Dallp1(lefta) + Dallp1(righta); \ + if((Dallp2(resultb) = Dallp2(leftb) + Dallp2(rightb)) < Dallp2(rightb)) \ + Dallp1(resulta)++ + +#define Dbl_xortointp1(left,right,result) \ + result = Dallp1(left) XOR Dallp1(right) + +#define Dbl_xorfromintp1(left,right,result) \ + Dallp1(result) = left XOR Dallp1(right) + +#define Dbl_swap_lower(left,right) \ + Dallp2(left) = Dallp2(left) XOR Dallp2(right); \ + Dallp2(right) = Dallp2(left) XOR Dallp2(right); \ + Dallp2(left) = Dallp2(left) XOR Dallp2(right) + +/* Need to Initialize */ +#define Dbl_makequietnan(desta,destb) \ + Dallp1(desta) = ((DBL_EMAX+DBL_BIAS)+1)<< (32-(1+DBL_EXP_LENGTH)) \ + | (1<<(32-(1+DBL_EXP_LENGTH+2))); \ + Dallp2(destb) = 0 +#define Dbl_makesignalingnan(desta,destb) \ + Dallp1(desta) = ((DBL_EMAX+DBL_BIAS)+1)<< (32-(1+DBL_EXP_LENGTH)) \ + | (1<<(32-(1+DBL_EXP_LENGTH+1))); \ + Dallp2(destb) = 0 + +#define Dbl_normalize(dbl_opndA,dbl_opndB,exponent) \ + while(Dbl_iszero_hiddenhigh7mantissa(dbl_opndA)) { \ + Dbl_leftshiftby8(dbl_opndA,dbl_opndB); \ + exponent -= 8; \ + } \ + if(Dbl_iszero_hiddenhigh3mantissa(dbl_opndA)) { \ + Dbl_leftshiftby4(dbl_opndA,dbl_opndB); \ + exponent -= 4; \ + } \ + while(Dbl_iszero_hidden(dbl_opndA)) { \ + Dbl_leftshiftby1(dbl_opndA,dbl_opndB); \ + exponent -= 1; \ + } + +#define Twoword_add(src1dstA,src1dstB,src2A,src2B) \ + /* \ + * want this macro to generate: \ + * ADD src1dstB,src2B,src1dstB; \ + * ADDC src1dstA,src2A,src1dstA; \ + */ \ + if ((src1dstB) + (src2B) < (src1dstB)) Dallp1(src1dstA)++; \ + Dallp1(src1dstA) += (src2A); \ + Dallp2(src1dstB) += (src2B) + +#define Twoword_subtract(src1dstA,src1dstB,src2A,src2B) \ + /* \ + * want this macro to generate: \ + * SUB src1dstB,src2B,src1dstB; \ + * SUBB src1dstA,src2A,src1dstA; \ + */ \ + if ((src1dstB) < (src2B)) Dallp1(src1dstA)--; \ + Dallp1(src1dstA) -= (src2A); \ + Dallp2(src1dstB) -= (src2B) + +#define Dbl_setoverflow(resultA,resultB) \ + /* set result to infinity or largest number */ \ + switch (Rounding_mode()) { \ + case ROUNDPLUS: \ + if (Dbl_isone_sign(resultA)) { \ + Dbl_setlargestnegative(resultA,resultB); \ + } \ + else { \ + Dbl_setinfinitypositive(resultA,resultB); \ + } \ + break; \ + case ROUNDMINUS: \ + if (Dbl_iszero_sign(resultA)) { \ + Dbl_setlargestpositive(resultA,resultB); \ + } \ + else { \ + Dbl_setinfinitynegative(resultA,resultB); \ + } \ + break; \ + case ROUNDNEAREST: \ + Dbl_setinfinity_exponentmantissa(resultA,resultB); \ + break; \ + case ROUNDZERO: \ + Dbl_setlargest_exponentmantissa(resultA,resultB); \ + } + +#define Dbl_denormalize(opndp1,opndp2,exponent,guard,sticky,inexact) \ + Dbl_clear_signexponent_set_hidden(opndp1); \ + if (exponent >= (1-DBL_P)) { \ + if (exponent >= -31) { \ + guard = (Dallp2(opndp2) >> -exponent) & 1; \ + if (exponent < 0) sticky |= Dallp2(opndp2) << (32+exponent); \ + if (exponent > -31) { \ + Variable_shift_double(opndp1,opndp2,1-exponent,opndp2); \ + Dallp1(opndp1) >>= 1-exponent; \ + } \ + else { \ + Dallp2(opndp2) = Dallp1(opndp1); \ + Dbl_setzerop1(opndp1); \ + } \ + } \ + else { \ + guard = (Dallp1(opndp1) >> -32-exponent) & 1; \ + if (exponent == -32) sticky |= Dallp2(opndp2); \ + else sticky |= (Dallp2(opndp2) | Dallp1(opndp1) << 64+exponent); \ + Dallp2(opndp2) = Dallp1(opndp1) >> -31-exponent; \ + Dbl_setzerop1(opndp1); \ + } \ + inexact = guard | sticky; \ + } \ + else { \ + guard = 0; \ + sticky |= (Dallp1(opndp1) | Dallp2(opndp2)); \ + Dbl_setzero(opndp1,opndp2); \ + inexact = sticky; \ + } + +/* + * The fused multiply add instructions requires a double extended format, + * with 106 bits of mantissa. + */ +#define DBLEXT_THRESHOLD 106 + +#define Dblext_setzero(valA,valB,valC,valD) \ + Dextallp1(valA) = 0; Dextallp2(valB) = 0; \ + Dextallp3(valC) = 0; Dextallp4(valD) = 0 + + +#define Dblext_isnotzero_mantissap3(valC) (Dextallp3(valC)!=0) +#define Dblext_isnotzero_mantissap4(valD) (Dextallp3(valD)!=0) +#define Dblext_isone_lowp2(val) (Dextlowp2(val)!=0) +#define Dblext_isone_highp3(val) (Dexthighp3(val)!=0) +#define Dblext_isnotzero_low31p3(val) (Dextlow31p3(val)!=0) +#define Dblext_iszero(valA,valB,valC,valD) (Dextallp1(valA)==0 && \ + Dextallp2(valB)==0 && Dextallp3(valC)==0 && Dextallp4(valD)==0) + +#define Dblext_copy(srca,srcb,srcc,srcd,desta,destb,destc,destd) \ + Dextallp1(desta) = Dextallp4(srca); \ + Dextallp2(destb) = Dextallp4(srcb); \ + Dextallp3(destc) = Dextallp4(srcc); \ + Dextallp4(destd) = Dextallp4(srcd) + +#define Dblext_swap_lower(leftp2,leftp3,leftp4,rightp2,rightp3,rightp4) \ + Dextallp2(leftp2) = Dextallp2(leftp2) XOR Dextallp2(rightp2); \ + Dextallp2(rightp2) = Dextallp2(leftp2) XOR Dextallp2(rightp2); \ + Dextallp2(leftp2) = Dextallp2(leftp2) XOR Dextallp2(rightp2); \ + Dextallp3(leftp3) = Dextallp3(leftp3) XOR Dextallp3(rightp3); \ + Dextallp3(rightp3) = Dextallp3(leftp3) XOR Dextallp3(rightp3); \ + Dextallp3(leftp3) = Dextallp3(leftp3) XOR Dextallp3(rightp3); \ + Dextallp4(leftp4) = Dextallp4(leftp4) XOR Dextallp4(rightp4); \ + Dextallp4(rightp4) = Dextallp4(leftp4) XOR Dextallp4(rightp4); \ + Dextallp4(leftp4) = Dextallp4(leftp4) XOR Dextallp4(rightp4) + +#define Dblext_setone_lowmantissap4(dbl_value) Deposit_dextlowp4(dbl_value,1) + +/* The high bit is always zero so arithmetic or logical shifts will work. */ +#define Dblext_right_align(srcdstA,srcdstB,srcdstC,srcdstD,shift) \ + {int shiftamt, sticky; \ + shiftamt = shift % 32; \ + sticky = 0; \ + switch (shift/32) { \ + case 0: if (shiftamt > 0) { \ + sticky = Dextallp4(srcdstD) << 32 - (shiftamt); \ + Variable_shift_double(Dextallp3(srcdstC), \ + Dextallp4(srcdstD),shiftamt,Dextallp4(srcdstD)); \ + Variable_shift_double(Dextallp2(srcdstB), \ + Dextallp3(srcdstC),shiftamt,Dextallp3(srcdstC)); \ + Variable_shift_double(Dextallp1(srcdstA), \ + Dextallp2(srcdstB),shiftamt,Dextallp2(srcdstB)); \ + Dextallp1(srcdstA) >>= shiftamt; \ + } \ + break; \ + case 1: if (shiftamt > 0) { \ + sticky = (Dextallp3(srcdstC) << 31 - shiftamt) | \ + Dextallp4(srcdstD); \ + Variable_shift_double(Dextallp2(srcdstB), \ + Dextallp3(srcdstC),shiftamt,Dextallp4(srcdstD)); \ + Variable_shift_double(Dextallp1(srcdstA), \ + Dextallp2(srcdstB),shiftamt,Dextallp3(srcdstC)); \ + } \ + else { \ + sticky = Dextallp4(srcdstD); \ + Dextallp4(srcdstD) = Dextallp3(srcdstC); \ + Dextallp3(srcdstC) = Dextallp2(srcdstB); \ + } \ + Dextallp2(srcdstB) = Dextallp1(srcdstA) >> shiftamt; \ + Dextallp1(srcdstA) = 0; \ + break; \ + case 2: if (shiftamt > 0) { \ + sticky = (Dextallp2(srcdstB) << 31 - shiftamt) | \ + Dextallp3(srcdstC) | Dextallp4(srcdstD); \ + Variable_shift_double(Dextallp1(srcdstA), \ + Dextallp2(srcdstB),shiftamt,Dextallp4(srcdstD)); \ + } \ + else { \ + sticky = Dextallp3(srcdstC) | Dextallp4(srcdstD); \ + Dextallp4(srcdstD) = Dextallp2(srcdstB); \ + } \ + Dextallp3(srcdstC) = Dextallp1(srcdstA) >> shiftamt; \ + Dextallp1(srcdstA) = Dextallp2(srcdstB) = 0; \ + break; \ + case 3: if (shiftamt > 0) { \ + sticky = (Dextallp1(srcdstA) << 31 - shiftamt) | \ + Dextallp2(srcdstB) | Dextallp3(srcdstC) | \ + Dextallp4(srcdstD); \ + } \ + else { \ + sticky = Dextallp2(srcdstB) | Dextallp3(srcdstC) | \ + Dextallp4(srcdstD); \ + } \ + Dextallp4(srcdstD) = Dextallp1(srcdstA) >> shiftamt; \ + Dextallp1(srcdstA) = Dextallp2(srcdstB) = 0; \ + Dextallp3(srcdstC) = 0; \ + break; \ + } \ + if (sticky) Dblext_setone_lowmantissap4(srcdstD); \ + } + +/* The left argument is never smaller than the right argument */ +#define Dblext_subtract(lefta,leftb,leftc,leftd,righta,rightb,rightc,rightd,resulta,resultb,resultc,resultd) \ + if( Dextallp4(rightd) > Dextallp4(leftd) ) \ + if( (Dextallp3(leftc)--) == 0) \ + if( (Dextallp2(leftb)--) == 0) Dextallp1(lefta)--; \ + Dextallp4(resultd) = Dextallp4(leftd) - Dextallp4(rightd); \ + if( Dextallp3(rightc) > Dextallp3(leftc) ) \ + if( (Dextallp2(leftb)--) == 0) Dextallp1(lefta)--; \ + Dextallp3(resultc) = Dextallp3(leftc) - Dextallp3(rightc); \ + if( Dextallp2(rightb) > Dextallp2(leftb) ) Dextallp1(lefta)--; \ + Dextallp2(resultb) = Dextallp2(leftb) - Dextallp2(rightb); \ + Dextallp1(resulta) = Dextallp1(lefta) - Dextallp1(righta) + +#define Dblext_addition(lefta,leftb,leftc,leftd,righta,rightb,rightc,rightd,resulta,resultb,resultc,resultd) \ + /* If the sum of the low words is less than either source, then \ + * an overflow into the next word occurred. */ \ + if ((Dextallp4(resultd) = Dextallp4(leftd)+Dextallp4(rightd)) < \ + Dextallp4(rightd)) \ + if((Dextallp3(resultc) = Dextallp3(leftc)+Dextallp3(rightc)+1) <= \ + Dextallp3(rightc)) \ + if((Dextallp2(resultb) = Dextallp2(leftb)+Dextallp2(rightb)+1) \ + <= Dextallp2(rightb)) \ + Dextallp1(resulta) = Dextallp1(lefta)+Dextallp1(righta)+1; \ + else Dextallp1(resulta) = Dextallp1(lefta)+Dextallp1(righta); \ + else \ + if ((Dextallp2(resultb) = Dextallp2(leftb)+Dextallp2(rightb)) < \ + Dextallp2(rightb)) \ + Dextallp1(resulta) = Dextallp1(lefta)+Dextallp1(righta)+1; \ + else Dextallp1(resulta) = Dextallp1(lefta)+Dextallp1(righta); \ + else \ + if ((Dextallp3(resultc) = Dextallp3(leftc)+Dextallp3(rightc)) < \ + Dextallp3(rightc)) \ + if ((Dextallp2(resultb) = Dextallp2(leftb)+Dextallp2(rightb)+1) \ + <= Dextallp2(rightb)) \ + Dextallp1(resulta) = Dextallp1(lefta)+Dextallp1(righta)+1; \ + else Dextallp1(resulta) = Dextallp1(lefta)+Dextallp1(righta); \ + else \ + if ((Dextallp2(resultb) = Dextallp2(leftb)+Dextallp2(rightb)) < \ + Dextallp2(rightb)) \ + Dextallp1(resulta) = Dextallp1(lefta)+Dextallp1(righta)+1; \ + else Dextallp1(resulta) = Dextallp1(lefta)+Dextallp1(righta) + + +#define Dblext_arithrightshiftby1(srcdstA,srcdstB,srcdstC,srcdstD) \ + Shiftdouble(Dextallp3(srcdstC),Dextallp4(srcdstD),1,Dextallp4(srcdstD)); \ + Shiftdouble(Dextallp2(srcdstB),Dextallp3(srcdstC),1,Dextallp3(srcdstC)); \ + Shiftdouble(Dextallp1(srcdstA),Dextallp2(srcdstB),1,Dextallp2(srcdstB)); \ + Dextallp1(srcdstA) = (int)Dextallp1(srcdstA) >> 1 + +#define Dblext_leftshiftby8(valA,valB,valC,valD) \ + Shiftdouble(Dextallp1(valA),Dextallp2(valB),24,Dextallp1(valA)); \ + Shiftdouble(Dextallp2(valB),Dextallp3(valC),24,Dextallp2(valB)); \ + Shiftdouble(Dextallp3(valC),Dextallp4(valD),24,Dextallp3(valC)); \ + Dextallp4(valD) <<= 8 +#define Dblext_leftshiftby4(valA,valB,valC,valD) \ + Shiftdouble(Dextallp1(valA),Dextallp2(valB),28,Dextallp1(valA)); \ + Shiftdouble(Dextallp2(valB),Dextallp3(valC),28,Dextallp2(valB)); \ + Shiftdouble(Dextallp3(valC),Dextallp4(valD),28,Dextallp3(valC)); \ + Dextallp4(valD) <<= 4 +#define Dblext_leftshiftby3(valA,valB,valC,valD) \ + Shiftdouble(Dextallp1(valA),Dextallp2(valB),29,Dextallp1(valA)); \ + Shiftdouble(Dextallp2(valB),Dextallp3(valC),29,Dextallp2(valB)); \ + Shiftdouble(Dextallp3(valC),Dextallp4(valD),29,Dextallp3(valC)); \ + Dextallp4(valD) <<= 3 +#define Dblext_leftshiftby2(valA,valB,valC,valD) \ + Shiftdouble(Dextallp1(valA),Dextallp2(valB),30,Dextallp1(valA)); \ + Shiftdouble(Dextallp2(valB),Dextallp3(valC),30,Dextallp2(valB)); \ + Shiftdouble(Dextallp3(valC),Dextallp4(valD),30,Dextallp3(valC)); \ + Dextallp4(valD) <<= 2 +#define Dblext_leftshiftby1(valA,valB,valC,valD) \ + Shiftdouble(Dextallp1(valA),Dextallp2(valB),31,Dextallp1(valA)); \ + Shiftdouble(Dextallp2(valB),Dextallp3(valC),31,Dextallp2(valB)); \ + Shiftdouble(Dextallp3(valC),Dextallp4(valD),31,Dextallp3(valC)); \ + Dextallp4(valD) <<= 1 + +#define Dblext_rightshiftby4(valueA,valueB,valueC,valueD) \ + Shiftdouble(Dextallp3(valueC),Dextallp4(valueD),4,Dextallp4(valueD)); \ + Shiftdouble(Dextallp2(valueB),Dextallp3(valueC),4,Dextallp3(valueC)); \ + Shiftdouble(Dextallp1(valueA),Dextallp2(valueB),4,Dextallp2(valueB)); \ + Dextallp1(valueA) >>= 4 +#define Dblext_rightshiftby1(valueA,valueB,valueC,valueD) \ + Shiftdouble(Dextallp3(valueC),Dextallp4(valueD),1,Dextallp4(valueD)); \ + Shiftdouble(Dextallp2(valueB),Dextallp3(valueC),1,Dextallp3(valueC)); \ + Shiftdouble(Dextallp1(valueA),Dextallp2(valueB),1,Dextallp2(valueB)); \ + Dextallp1(valueA) >>= 1 + +#define Dblext_xortointp1(left,right,result) Dbl_xortointp1(left,right,result) + +#define Dblext_xorfromintp1(left,right,result) \ + Dbl_xorfromintp1(left,right,result) + +#define Dblext_copytoint_exponentmantissap1(src,dest) \ + Dbl_copytoint_exponentmantissap1(src,dest) + +#define Dblext_ismagnitudeless(leftB,rightB,signlessleft,signlessright) \ + Dbl_ismagnitudeless(leftB,rightB,signlessleft,signlessright) + +#define Dbl_copyto_dblext(src1,src2,dest1,dest2,dest3,dest4) \ + Dextallp1(dest1) = Dallp1(src1); Dextallp2(dest2) = Dallp2(src2); \ + Dextallp3(dest3) = 0; Dextallp4(dest4) = 0 + +#define Dblext_set_sign(dbl_value,sign) Dbl_set_sign(dbl_value,sign) +#define Dblext_clear_signexponent_set_hidden(srcdst) \ + Dbl_clear_signexponent_set_hidden(srcdst) +#define Dblext_clear_signexponent(srcdst) Dbl_clear_signexponent(srcdst) +#define Dblext_clear_sign(srcdst) Dbl_clear_sign(srcdst) +#define Dblext_isone_hidden(dbl_value) Dbl_isone_hidden(dbl_value) + +/* + * The Fourword_add() macro assumes that integers are 4 bytes in size. + * It will break if this is not the case. + */ + +#define Fourword_add(src1dstA,src1dstB,src1dstC,src1dstD,src2A,src2B,src2C,src2D) \ + /* \ + * want this macro to generate: \ + * ADD src1dstD,src2D,src1dstD; \ + * ADDC src1dstC,src2C,src1dstC; \ + * ADDC src1dstB,src2B,src1dstB; \ + * ADDC src1dstA,src2A,src1dstA; \ + */ \ + if ((unsigned int)(src1dstD += (src2D)) < (unsigned int)(src2D)) { \ + if ((unsigned int)(src1dstC += (src2C) + 1) <= \ + (unsigned int)(src2C)) { \ + if ((unsigned int)(src1dstB += (src2B) + 1) <= \ + (unsigned int)(src2B)) src1dstA++; \ + } \ + else if ((unsigned int)(src1dstB += (src2B)) < \ + (unsigned int)(src2B)) src1dstA++; \ + } \ + else { \ + if ((unsigned int)(src1dstC += (src2C)) < \ + (unsigned int)(src2C)) { \ + if ((unsigned int)(src1dstB += (src2B) + 1) <= \ + (unsigned int)(src2B)) src1dstA++; \ + } \ + else if ((unsigned int)(src1dstB += (src2B)) < \ + (unsigned int)(src2B)) src1dstA++; \ + } \ + src1dstA += (src2A) + +#define Dblext_denormalize(opndp1,opndp2,opndp3,opndp4,exponent,is_tiny) \ + {int shiftamt, sticky; \ + is_tiny = TRUE; \ + if (exponent == 0 && (Dextallp3(opndp3) || Dextallp4(opndp4))) { \ + switch (Rounding_mode()) { \ + case ROUNDPLUS: \ + if (Dbl_iszero_sign(opndp1)) { \ + Dbl_increment(opndp1,opndp2); \ + if (Dbl_isone_hiddenoverflow(opndp1)) \ + is_tiny = FALSE; \ + Dbl_decrement(opndp1,opndp2); \ + } \ + break; \ + case ROUNDMINUS: \ + if (Dbl_isone_sign(opndp1)) { \ + Dbl_increment(opndp1,opndp2); \ + if (Dbl_isone_hiddenoverflow(opndp1)) \ + is_tiny = FALSE; \ + Dbl_decrement(opndp1,opndp2); \ + } \ + break; \ + case ROUNDNEAREST: \ + if (Dblext_isone_highp3(opndp3) && \ + (Dblext_isone_lowp2(opndp2) || \ + Dblext_isnotzero_low31p3(opndp3))) { \ + Dbl_increment(opndp1,opndp2); \ + if (Dbl_isone_hiddenoverflow(opndp1)) \ + is_tiny = FALSE; \ + Dbl_decrement(opndp1,opndp2); \ + } \ + break; \ + } \ + } \ + Dblext_clear_signexponent_set_hidden(opndp1); \ + if (exponent >= (1-QUAD_P)) { \ + shiftamt = (1-exponent) % 32; \ + switch((1-exponent)/32) { \ + case 0: sticky = Dextallp4(opndp4) << 32-(shiftamt); \ + Variableshiftdouble(opndp3,opndp4,shiftamt,opndp4); \ + Variableshiftdouble(opndp2,opndp3,shiftamt,opndp3); \ + Variableshiftdouble(opndp1,opndp2,shiftamt,opndp2); \ + Dextallp1(opndp1) >>= shiftamt; \ + break; \ + case 1: sticky = (Dextallp3(opndp3) << 32-(shiftamt)) | \ + Dextallp4(opndp4); \ + Variableshiftdouble(opndp2,opndp3,shiftamt,opndp4); \ + Variableshiftdouble(opndp1,opndp2,shiftamt,opndp3); \ + Dextallp2(opndp2) = Dextallp1(opndp1) >> shiftamt; \ + Dextallp1(opndp1) = 0; \ + break; \ + case 2: sticky = (Dextallp2(opndp2) << 32-(shiftamt)) | \ + Dextallp3(opndp3) | Dextallp4(opndp4); \ + Variableshiftdouble(opndp1,opndp2,shiftamt,opndp4); \ + Dextallp3(opndp3) = Dextallp1(opndp1) >> shiftamt; \ + Dextallp1(opndp1) = Dextallp2(opndp2) = 0; \ + break; \ + case 3: sticky = (Dextallp1(opndp1) << 32-(shiftamt)) | \ + Dextallp2(opndp2) | Dextallp3(opndp3) | \ + Dextallp4(opndp4); \ + Dextallp4(opndp4) = Dextallp1(opndp1) >> shiftamt; \ + Dextallp1(opndp1) = Dextallp2(opndp2) = 0; \ + Dextallp3(opndp3) = 0; \ + break; \ + } \ + } \ + else { \ + sticky = Dextallp1(opndp1) | Dextallp2(opndp2) | \ + Dextallp3(opndp3) | Dextallp4(opndp4); \ + Dblext_setzero(opndp1,opndp2,opndp3,opndp4); \ + } \ + if (sticky) Dblext_setone_lowmantissap4(opndp4); \ + exponent = 0; \ + } diff --git a/kernel/arch/parisc/math-emu/decode_exc.c b/kernel/arch/parisc/math-emu/decode_exc.c new file mode 100644 index 000000000..04e550e76 --- /dev/null +++ b/kernel/arch/parisc/math-emu/decode_exc.c @@ -0,0 +1,370 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/fp/decode_exc.c $ Revision: $ + * + * Purpose: + * <<please update with a synopsis of the functionality provided by this file>> + * + * External Interfaces: + * <<the following list was autogenerated, please review>> + * decode_fpu(Fpu_register, trap_counts) + * + * Internal Interfaces: + * <<please update>> + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + +#include <linux/kernel.h> +#include "float.h" +#include "sgl_float.h" +#include "dbl_float.h" +#include "cnv_float.h" +/* #include "types.h" */ +#include <asm/signal.h> +#include <asm/siginfo.h> +/* #include <machine/sys/mdep_private.h> */ + +#undef Fpustatus_register +#define Fpustatus_register Fpu_register[0] + +/* General definitions */ +#define DOESTRAP 1 +#define NOTRAP 0 +#define SIGNALCODE(signal, code) ((signal) << 24 | (code)) +#define copropbit 1<<31-2 /* bit position 2 */ +#define opclass 9 /* bits 21 & 22 */ +#define fmt 11 /* bits 19 & 20 */ +#define df 13 /* bits 17 & 18 */ +#define twobits 3 /* mask low-order 2 bits */ +#define fivebits 31 /* mask low-order 5 bits */ +#define MAX_EXCP_REG 7 /* number of excpeption registers to check */ + +/* Exception register definitions */ +#define Excp_type(index) Exceptiontype(Fpu_register[index]) +#define Excp_instr(index) Instructionfield(Fpu_register[index]) +#define Clear_excp_register(index) Allexception(Fpu_register[index]) = 0 +#define Excp_format() \ + (current_ir >> ((current_ir>>opclass & twobits)==1 ? df : fmt) & twobits) + +/* Miscellaneous definitions */ +#define Fpu_sgl(index) Fpu_register[index*2] + +#define Fpu_dblp1(index) Fpu_register[index*2] +#define Fpu_dblp2(index) Fpu_register[(index*2)+1] + +#define Fpu_quadp1(index) Fpu_register[index*2] +#define Fpu_quadp2(index) Fpu_register[(index*2)+1] +#define Fpu_quadp3(index) Fpu_register[(index*2)+2] +#define Fpu_quadp4(index) Fpu_register[(index*2)+3] + +/* Single precision floating-point definitions */ +#ifndef Sgl_decrement +# define Sgl_decrement(sgl_value) Sall(sgl_value)-- +#endif + +/* Double precision floating-point definitions */ +#ifndef Dbl_decrement +# define Dbl_decrement(dbl_valuep1,dbl_valuep2) \ + if ((Dallp2(dbl_valuep2)--) == 0) Dallp1(dbl_valuep1)-- +#endif + + +#define update_trap_counts(Fpu_register, aflags, bflags, trap_counts) { \ + aflags=(Fpu_register[0])>>27; /* assumes zero fill. 32 bit */ \ + Fpu_register[0] |= bflags; \ +} + +u_int +decode_fpu(unsigned int Fpu_register[], unsigned int trap_counts[]) +{ + unsigned int current_ir, excp; + int target, exception_index = 1; + boolean inexact; + unsigned int aflags; + unsigned int bflags; + unsigned int excptype; + + + /* Keep stats on how many floating point exceptions (based on type) + * that happen. Want to keep this overhead low, but still provide + * some information to the customer. All exits from this routine + * need to restore Fpu_register[0] + */ + + bflags=(Fpu_register[0] & 0xf8000000); + Fpu_register[0] &= 0x07ffffff; + + /* exception_index is used to index the exception register queue. It + * always points at the last register that contains a valid exception. A + * zero value implies no exceptions (also the initialized value). Setting + * the T-bit resets the exception_index to zero. + */ + + /* + * Check for reserved-op exception. A reserved-op exception does not + * set any exception registers nor does it set the T-bit. If the T-bit + * is not set then a reserved-op exception occurred. + * + * At some point, we may want to report reserved op exceptions as + * illegal instructions. + */ + + if (!Is_tbit_set()) { + update_trap_counts(Fpu_register, aflags, bflags, trap_counts); + return SIGNALCODE(SIGILL, ILL_COPROC); + } + + /* + * Is a coprocessor op. + * + * Now we need to determine what type of exception occurred. + */ + for (exception_index=1; exception_index<=MAX_EXCP_REG; exception_index++) { + current_ir = Excp_instr(exception_index); + /* + * On PA89: there are 5 different unimplemented exception + * codes: 0x1, 0x9, 0xb, 0x3, and 0x23. PA-RISC 2.0 adds + * another, 0x2b. Only these have the low order bit set. + */ + excptype = Excp_type(exception_index); + if (excptype & UNIMPLEMENTEDEXCEPTION) { + /* + * Clear T-bit and exception register so that + * we can tell if a trap really occurs while + * emulating the instruction. + */ + Clear_tbit(); + Clear_excp_register(exception_index); + /* + * Now emulate this instruction. If a trap occurs, + * fpudispatch will return a non-zero number + */ + excp = fpudispatch(current_ir,excptype,0,Fpu_register); + /* accumulate the status flags, don't lose them as in hpux */ + if (excp) { + /* + * We now need to make sure that the T-bit and the + * exception register contain the correct values + * before continuing. + */ + /* + * Set t-bit since it might still be needed for a + * subsequent real trap (I don't understand fully -PB) + */ + Set_tbit(); + /* some of the following code uses + * Excp_type(exception_index) so fix that up */ + Set_exceptiontype_and_instr_field(excp,current_ir, + Fpu_register[exception_index]); + if (excp == UNIMPLEMENTEDEXCEPTION) { + /* + * it is really unimplemented, so restore the + * TIMEX extended unimplemented exception code + */ + excp = excptype; + update_trap_counts(Fpu_register, aflags, bflags, + trap_counts); + return SIGNALCODE(SIGILL, ILL_COPROC); + } + /* some of the following code uses excptype, so + * fix that up too */ + excptype = excp; + } + /* handle exceptions other than the real UNIMPLIMENTED the + * same way as if the hardware had caused them */ + if (excp == NOEXCEPTION) + /* For now use 'break', should technically be 'continue' */ + break; + } + + /* + * In PA89, the underflow exception has been extended to encode + * additional information. The exception looks like pp01x0, + * where x is 1 if inexact and pp represent the inexact bit (I) + * and the round away bit (RA) + */ + if (excptype & UNDERFLOWEXCEPTION) { + /* check for underflow trap enabled */ + if (Is_underflowtrap_enabled()) { + update_trap_counts(Fpu_register, aflags, bflags, + trap_counts); + return SIGNALCODE(SIGFPE, FPE_FLTUND); + } else { + /* + * Isn't a real trap; we need to + * return the default value. + */ + target = current_ir & fivebits; +#ifndef lint + if (Ibit(Fpu_register[exception_index])) inexact = TRUE; + else inexact = FALSE; +#endif + switch (Excp_format()) { + case SGL: + /* + * If ra (round-away) is set, will + * want to undo the rounding done + * by the hardware. + */ + if (Rabit(Fpu_register[exception_index])) + Sgl_decrement(Fpu_sgl(target)); + + /* now denormalize */ + sgl_denormalize(&Fpu_sgl(target),&inexact,Rounding_mode()); + break; + case DBL: + /* + * If ra (round-away) is set, will + * want to undo the rounding done + * by the hardware. + */ + if (Rabit(Fpu_register[exception_index])) + Dbl_decrement(Fpu_dblp1(target),Fpu_dblp2(target)); + + /* now denormalize */ + dbl_denormalize(&Fpu_dblp1(target),&Fpu_dblp2(target), + &inexact,Rounding_mode()); + break; + } + if (inexact) Set_underflowflag(); + /* + * Underflow can generate an inexact + * exception. If inexact trap is enabled, + * want to do an inexact trap, otherwise + * set inexact flag. + */ + if (inexact && Is_inexacttrap_enabled()) { + /* + * Set exception field of exception register + * to inexact, parm field to zero. + * Underflow bit should be cleared. + */ + Set_exceptiontype(Fpu_register[exception_index], + INEXACTEXCEPTION); + Set_parmfield(Fpu_register[exception_index],0); + update_trap_counts(Fpu_register, aflags, bflags, + trap_counts); + return SIGNALCODE(SIGFPE, FPE_FLTRES); + } + else { + /* + * Exception register needs to be cleared. + * Inexact flag needs to be set if inexact. + */ + Clear_excp_register(exception_index); + if (inexact) Set_inexactflag(); + } + } + continue; + } + switch(Excp_type(exception_index)) { + case OVERFLOWEXCEPTION: + case OVERFLOWEXCEPTION | INEXACTEXCEPTION: + /* check for overflow trap enabled */ + update_trap_counts(Fpu_register, aflags, bflags, + trap_counts); + if (Is_overflowtrap_enabled()) { + update_trap_counts(Fpu_register, aflags, bflags, + trap_counts); + return SIGNALCODE(SIGFPE, FPE_FLTOVF); + } else { + /* + * Isn't a real trap; we need to + * return the default value. + */ + target = current_ir & fivebits; + switch (Excp_format()) { + case SGL: + Sgl_setoverflow(Fpu_sgl(target)); + break; + case DBL: + Dbl_setoverflow(Fpu_dblp1(target),Fpu_dblp2(target)); + break; + } + Set_overflowflag(); + /* + * Overflow always generates an inexact + * exception. If inexact trap is enabled, + * want to do an inexact trap, otherwise + * set inexact flag. + */ + if (Is_inexacttrap_enabled()) { + /* + * Set exception field of exception + * register to inexact. Overflow + * bit should be cleared. + */ + Set_exceptiontype(Fpu_register[exception_index], + INEXACTEXCEPTION); + update_trap_counts(Fpu_register, aflags, bflags, + trap_counts); + return SIGNALCODE(SIGFPE, FPE_FLTRES); + } + else { + /* + * Exception register needs to be cleared. + * Inexact flag needs to be set. + */ + Clear_excp_register(exception_index); + Set_inexactflag(); + } + } + break; + case INVALIDEXCEPTION: + case OPC_2E_INVALIDEXCEPTION: + update_trap_counts(Fpu_register, aflags, bflags, trap_counts); + return SIGNALCODE(SIGFPE, FPE_FLTINV); + case DIVISIONBYZEROEXCEPTION: + update_trap_counts(Fpu_register, aflags, bflags, trap_counts); + Clear_excp_register(exception_index); + return SIGNALCODE(SIGFPE, FPE_FLTDIV); + case INEXACTEXCEPTION: + update_trap_counts(Fpu_register, aflags, bflags, trap_counts); + return SIGNALCODE(SIGFPE, FPE_FLTRES); + default: + update_trap_counts(Fpu_register, aflags, bflags, trap_counts); + printk("%s(%d) Unknown FPU exception 0x%x\n", __FILE__, + __LINE__, Excp_type(exception_index)); + return SIGNALCODE(SIGILL, ILL_COPROC); + case NOEXCEPTION: /* no exception */ + /* + * Clear exception register in case + * other fields are non-zero. + */ + Clear_excp_register(exception_index); + break; + } + } + /* + * No real exceptions occurred. + */ + Clear_tbit(); + update_trap_counts(Fpu_register, aflags, bflags, trap_counts); + return(NOTRAP); +} diff --git a/kernel/arch/parisc/math-emu/denormal.c b/kernel/arch/parisc/math-emu/denormal.c new file mode 100644 index 000000000..60687e13c --- /dev/null +++ b/kernel/arch/parisc/math-emu/denormal.c @@ -0,0 +1,135 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/fp/denormal.c $ Revision: $ + * + * Purpose: + * <<please update with a synopsis of the functionality provided by this file>> + * + * External Interfaces: + * <<the following list was autogenerated, please review>> + * dbl_denormalize(dbl_opndp1,dbl_opndp2,inexactflag,rmode) + * sgl_denormalize(sgl_opnd,inexactflag,rmode) + * + * Internal Interfaces: + * <<please update>> + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + + +#include "float.h" +#include "sgl_float.h" +#include "dbl_float.h" +#include "hppa.h" +#include <linux/kernel.h> +/* #include <machine/sys/mdep_private.h> */ + +#undef Fpustatus_register +#define Fpustatus_register Fpu_register[0] + +void +sgl_denormalize(unsigned int *sgl_opnd, boolean *inexactflag, int rmode) +{ + unsigned int opnd; + int sign, exponent; + boolean guardbit = FALSE, stickybit, inexact; + + opnd = *sgl_opnd; + stickybit = *inexactflag; + exponent = Sgl_exponent(opnd) - SGL_WRAP; + sign = Sgl_sign(opnd); + Sgl_denormalize(opnd,exponent,guardbit,stickybit,inexact); + if (inexact) { + switch (rmode) { + case ROUNDPLUS: + if (sign == 0) { + Sgl_increment(opnd); + } + break; + case ROUNDMINUS: + if (sign != 0) { + Sgl_increment(opnd); + } + break; + case ROUNDNEAREST: + if (guardbit && (stickybit || + Sgl_isone_lowmantissa(opnd))) { + Sgl_increment(opnd); + } + break; + } + } + Sgl_set_sign(opnd,sign); + *sgl_opnd = opnd; + *inexactflag = inexact; + return; +} + +void +dbl_denormalize(unsigned int *dbl_opndp1, + unsigned int * dbl_opndp2, + boolean *inexactflag, + int rmode) +{ + unsigned int opndp1, opndp2; + int sign, exponent; + boolean guardbit = FALSE, stickybit, inexact; + + opndp1 = *dbl_opndp1; + opndp2 = *dbl_opndp2; + stickybit = *inexactflag; + exponent = Dbl_exponent(opndp1) - DBL_WRAP; + sign = Dbl_sign(opndp1); + Dbl_denormalize(opndp1,opndp2,exponent,guardbit,stickybit,inexact); + if (inexact) { + switch (rmode) { + case ROUNDPLUS: + if (sign == 0) { + Dbl_increment(opndp1,opndp2); + } + break; + case ROUNDMINUS: + if (sign != 0) { + Dbl_increment(opndp1,opndp2); + } + break; + case ROUNDNEAREST: + if (guardbit && (stickybit || + Dbl_isone_lowmantissap2(opndp2))) { + Dbl_increment(opndp1,opndp2); + } + break; + } + } + Dbl_set_sign(opndp1,sign); + *dbl_opndp1 = opndp1; + *dbl_opndp2 = opndp2; + *inexactflag = inexact; + return; +} diff --git a/kernel/arch/parisc/math-emu/dfadd.c b/kernel/arch/parisc/math-emu/dfadd.c new file mode 100644 index 000000000..d37e2d2cb --- /dev/null +++ b/kernel/arch/parisc/math-emu/dfadd.c @@ -0,0 +1,524 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/dfadd.c $Revision: 1.1 $ + * + * Purpose: + * Double_add: add two double precision values. + * + * External Interfaces: + * dbl_fadd(leftptr, rightptr, dstptr, status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "dbl_float.h" + +/* + * Double_add: add two double precision values. + */ +dbl_fadd( + dbl_floating_point *leftptr, + dbl_floating_point *rightptr, + dbl_floating_point *dstptr, + unsigned int *status) +{ + register unsigned int signless_upper_left, signless_upper_right, save; + register unsigned int leftp1, leftp2, rightp1, rightp2, extent; + register unsigned int resultp1 = 0, resultp2 = 0; + + register int result_exponent, right_exponent, diff_exponent; + register int sign_save, jumpsize; + register boolean inexact = FALSE; + register boolean underflowtrap; + + /* Create local copies of the numbers */ + Dbl_copyfromptr(leftptr,leftp1,leftp2); + Dbl_copyfromptr(rightptr,rightp1,rightp2); + + /* A zero "save" helps discover equal operands (for later), * + * and is used in swapping operands (if needed). */ + Dbl_xortointp1(leftp1,rightp1,/*to*/save); + + /* + * check first operand for NaN's or infinity + */ + if ((result_exponent = Dbl_exponent(leftp1)) == DBL_INFINITY_EXPONENT) + { + if (Dbl_iszero_mantissa(leftp1,leftp2)) + { + if (Dbl_isnotnan(rightp1,rightp2)) + { + if (Dbl_isinfinity(rightp1,rightp2) && save!=0) + { + /* + * invalid since operands are opposite signed infinity's + */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * return infinity + */ + Dbl_copytoptr(leftp1,leftp2,dstptr); + return(NOEXCEPTION); + } + } + else + { + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(leftp1)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(leftp1); + } + /* + * is second operand a signaling NaN? + */ + else if (Dbl_is_signalingnan(rightp1)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(rightp1); + Dbl_copytoptr(rightp1,rightp2,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(leftp1,leftp2,dstptr); + return(NOEXCEPTION); + } + } /* End left NaN or Infinity processing */ + /* + * check second operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(rightp1)) + { + if (Dbl_iszero_mantissa(rightp1,rightp2)) + { + /* return infinity */ + Dbl_copytoptr(rightp1,rightp2,dstptr); + return(NOEXCEPTION); + } + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(rightp1)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(rightp1); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(rightp1,rightp2,dstptr); + return(NOEXCEPTION); + } /* End right NaN or Infinity processing */ + + /* Invariant: Must be dealing with finite numbers */ + + /* Compare operands by removing the sign */ + Dbl_copytoint_exponentmantissap1(leftp1,signless_upper_left); + Dbl_copytoint_exponentmantissap1(rightp1,signless_upper_right); + + /* sign difference selects add or sub operation. */ + if(Dbl_ismagnitudeless(leftp2,rightp2,signless_upper_left,signless_upper_right)) + { + /* Set the left operand to the larger one by XOR swap * + * First finish the first word using "save" */ + Dbl_xorfromintp1(save,rightp1,/*to*/rightp1); + Dbl_xorfromintp1(save,leftp1,/*to*/leftp1); + Dbl_swap_lower(leftp2,rightp2); + result_exponent = Dbl_exponent(leftp1); + } + /* Invariant: left is not smaller than right. */ + + if((right_exponent = Dbl_exponent(rightp1)) == 0) + { + /* Denormalized operands. First look for zeroes */ + if(Dbl_iszero_mantissa(rightp1,rightp2)) + { + /* right is zero */ + if(Dbl_iszero_exponentmantissa(leftp1,leftp2)) + { + /* Both operands are zeros */ + if(Is_rounding_mode(ROUNDMINUS)) + { + Dbl_or_signs(leftp1,/*with*/rightp1); + } + else + { + Dbl_and_signs(leftp1,/*with*/rightp1); + } + } + else + { + /* Left is not a zero and must be the result. Trapped + * underflows are signaled if left is denormalized. Result + * is always exact. */ + if( (result_exponent == 0) && Is_underflowtrap_enabled() ) + { + /* need to normalize results mantissa */ + sign_save = Dbl_signextendedsign(leftp1); + Dbl_leftshiftby1(leftp1,leftp2); + Dbl_normalize(leftp1,leftp2,result_exponent); + Dbl_set_sign(leftp1,/*using*/sign_save); + Dbl_setwrapped_exponent(leftp1,result_exponent,unfl); + Dbl_copytoptr(leftp1,leftp2,dstptr); + /* inexact = FALSE */ + return(UNDERFLOWEXCEPTION); + } + } + Dbl_copytoptr(leftp1,leftp2,dstptr); + return(NOEXCEPTION); + } + + /* Neither are zeroes */ + Dbl_clear_sign(rightp1); /* Exponent is already cleared */ + if(result_exponent == 0 ) + { + /* Both operands are denormalized. The result must be exact + * and is simply calculated. A sum could become normalized and a + * difference could cancel to a true zero. */ + if( (/*signed*/int) save < 0 ) + { + Dbl_subtract(leftp1,leftp2,/*minus*/rightp1,rightp2, + /*into*/resultp1,resultp2); + if(Dbl_iszero_mantissa(resultp1,resultp2)) + { + if(Is_rounding_mode(ROUNDMINUS)) + { + Dbl_setone_sign(resultp1); + } + else + { + Dbl_setzero_sign(resultp1); + } + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + else + { + Dbl_addition(leftp1,leftp2,rightp1,rightp2, + /*into*/resultp1,resultp2); + if(Dbl_isone_hidden(resultp1)) + { + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + if(Is_underflowtrap_enabled()) + { + /* need to normalize result */ + sign_save = Dbl_signextendedsign(resultp1); + Dbl_leftshiftby1(resultp1,resultp2); + Dbl_normalize(resultp1,resultp2,result_exponent); + Dbl_set_sign(resultp1,/*using*/sign_save); + Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + /* inexact = FALSE */ + return(UNDERFLOWEXCEPTION); + } + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + right_exponent = 1; /* Set exponent to reflect different bias + * with denomalized numbers. */ + } + else + { + Dbl_clear_signexponent_set_hidden(rightp1); + } + Dbl_clear_exponent_set_hidden(leftp1); + diff_exponent = result_exponent - right_exponent; + + /* + * Special case alignment of operands that would force alignment + * beyond the extent of the extension. A further optimization + * could special case this but only reduces the path length for this + * infrequent case. + */ + if(diff_exponent > DBL_THRESHOLD) + { + diff_exponent = DBL_THRESHOLD; + } + + /* Align right operand by shifting to right */ + Dbl_right_align(/*operand*/rightp1,rightp2,/*shifted by*/diff_exponent, + /*and lower to*/extent); + + /* Treat sum and difference of the operands separately. */ + if( (/*signed*/int) save < 0 ) + { + /* + * Difference of the two operands. Their can be no overflow. A + * borrow can occur out of the hidden bit and force a post + * normalization phase. + */ + Dbl_subtract_withextension(leftp1,leftp2,/*minus*/rightp1,rightp2, + /*with*/extent,/*into*/resultp1,resultp2); + if(Dbl_iszero_hidden(resultp1)) + { + /* Handle normalization */ + /* A straight forward algorithm would now shift the result + * and extension left until the hidden bit becomes one. Not + * all of the extension bits need participate in the shift. + * Only the two most significant bits (round and guard) are + * needed. If only a single shift is needed then the guard + * bit becomes a significant low order bit and the extension + * must participate in the rounding. If more than a single + * shift is needed, then all bits to the right of the guard + * bit are zeros, and the guard bit may or may not be zero. */ + sign_save = Dbl_signextendedsign(resultp1); + Dbl_leftshiftby1_withextent(resultp1,resultp2,extent,resultp1,resultp2); + + /* Need to check for a zero result. The sign and exponent + * fields have already been zeroed. The more efficient test + * of the full object can be used. + */ + if(Dbl_iszero(resultp1,resultp2)) + /* Must have been "x-x" or "x+(-x)". */ + { + if(Is_rounding_mode(ROUNDMINUS)) Dbl_setone_sign(resultp1); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + result_exponent--; + /* Look to see if normalization is finished. */ + if(Dbl_isone_hidden(resultp1)) + { + if(result_exponent==0) + { + /* Denormalized, exponent should be zero. Left operand * + * was normalized, so extent (guard, round) was zero */ + goto underflow; + } + else + { + /* No further normalization is needed. */ + Dbl_set_sign(resultp1,/*using*/sign_save); + Ext_leftshiftby1(extent); + goto round; + } + } + + /* Check for denormalized, exponent should be zero. Left * + * operand was normalized, so extent (guard, round) was zero */ + if(!(underflowtrap = Is_underflowtrap_enabled()) && + result_exponent==0) goto underflow; + + /* Shift extension to complete one bit of normalization and + * update exponent. */ + Ext_leftshiftby1(extent); + + /* Discover first one bit to determine shift amount. Use a + * modified binary search. We have already shifted the result + * one position right and still not found a one so the remainder + * of the extension must be zero and simplifies rounding. */ + /* Scan bytes */ + while(Dbl_iszero_hiddenhigh7mantissa(resultp1)) + { + Dbl_leftshiftby8(resultp1,resultp2); + if((result_exponent -= 8) <= 0 && !underflowtrap) + goto underflow; + } + /* Now narrow it down to the nibble */ + if(Dbl_iszero_hiddenhigh3mantissa(resultp1)) + { + /* The lower nibble contains the normalizing one */ + Dbl_leftshiftby4(resultp1,resultp2); + if((result_exponent -= 4) <= 0 && !underflowtrap) + goto underflow; + } + /* Select case were first bit is set (already normalized) + * otherwise select the proper shift. */ + if((jumpsize = Dbl_hiddenhigh3mantissa(resultp1)) > 7) + { + /* Already normalized */ + if(result_exponent <= 0) goto underflow; + Dbl_set_sign(resultp1,/*using*/sign_save); + Dbl_set_exponent(resultp1,/*using*/result_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + Dbl_sethigh4bits(resultp1,/*using*/sign_save); + switch(jumpsize) + { + case 1: + { + Dbl_leftshiftby3(resultp1,resultp2); + result_exponent -= 3; + break; + } + case 2: + case 3: + { + Dbl_leftshiftby2(resultp1,resultp2); + result_exponent -= 2; + break; + } + case 4: + case 5: + case 6: + case 7: + { + Dbl_leftshiftby1(resultp1,resultp2); + result_exponent -= 1; + break; + } + } + if(result_exponent > 0) + { + Dbl_set_exponent(resultp1,/*using*/result_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); /* Sign bit is already set */ + } + /* Fixup potential underflows */ + underflow: + if(Is_underflowtrap_enabled()) + { + Dbl_set_sign(resultp1,sign_save); + Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + /* inexact = FALSE */ + return(UNDERFLOWEXCEPTION); + } + /* + * Since we cannot get an inexact denormalized result, + * we can now return. + */ + Dbl_fix_overshift(resultp1,resultp2,(1-result_exponent),extent); + Dbl_clear_signexponent(resultp1); + Dbl_set_sign(resultp1,sign_save); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } /* end if(hidden...)... */ + /* Fall through and round */ + } /* end if(save < 0)... */ + else + { + /* Add magnitudes */ + Dbl_addition(leftp1,leftp2,rightp1,rightp2,/*to*/resultp1,resultp2); + if(Dbl_isone_hiddenoverflow(resultp1)) + { + /* Prenormalization required. */ + Dbl_rightshiftby1_withextent(resultp2,extent,extent); + Dbl_arithrightshiftby1(resultp1,resultp2); + result_exponent++; + } /* end if hiddenoverflow... */ + } /* end else ...add magnitudes... */ + + /* Round the result. If the extension is all zeros,then the result is + * exact. Otherwise round in the correct direction. No underflow is + * possible. If a postnormalization is necessary, then the mantissa is + * all zeros so no shift is needed. */ + round: + if(Ext_isnotzero(extent)) + { + inexact = TRUE; + switch(Rounding_mode()) + { + case ROUNDNEAREST: /* The default. */ + if(Ext_isone_sign(extent)) + { + /* at least 1/2 ulp */ + if(Ext_isnotzero_lower(extent) || + Dbl_isone_lowmantissap2(resultp2)) + { + /* either exactly half way and odd or more than 1/2ulp */ + Dbl_increment(resultp1,resultp2); + } + } + break; + + case ROUNDPLUS: + if(Dbl_iszero_sign(resultp1)) + { + /* Round up positive results */ + Dbl_increment(resultp1,resultp2); + } + break; + + case ROUNDMINUS: + if(Dbl_isone_sign(resultp1)) + { + /* Round down negative results */ + Dbl_increment(resultp1,resultp2); + } + + case ROUNDZERO:; + /* truncate is simple */ + } /* end switch... */ + if(Dbl_isone_hiddenoverflow(resultp1)) result_exponent++; + } + if(result_exponent == DBL_INFINITY_EXPONENT) + { + /* Overflow */ + if(Is_overflowtrap_enabled()) + { + Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return(OVERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return(OVERFLOWEXCEPTION); + } + else + { + inexact = TRUE; + Set_overflowflag(); + Dbl_setoverflow(resultp1,resultp2); + } + } + else Dbl_set_exponent(resultp1,result_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if(inexact) + if(Is_inexacttrap_enabled()) + return(INEXACTEXCEPTION); + else Set_inexactflag(); + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/dfcmp.c b/kernel/arch/parisc/math-emu/dfcmp.c new file mode 100644 index 000000000..59521267f --- /dev/null +++ b/kernel/arch/parisc/math-emu/dfcmp.c @@ -0,0 +1,181 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/dfcmp.c $Revision: 1.1 $ + * + * Purpose: + * dbl_cmp: compare two values + * + * External Interfaces: + * dbl_fcmp(leftptr, rightptr, cond, status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + + +#include "float.h" +#include "dbl_float.h" + +/* + * dbl_cmp: compare two values + */ +int +dbl_fcmp (dbl_floating_point * leftptr, dbl_floating_point * rightptr, + unsigned int cond, unsigned int *status) + + /* The predicate to be tested */ + + { + register unsigned int leftp1, leftp2, rightp1, rightp2; + register int xorresult; + + /* Create local copies of the numbers */ + Dbl_copyfromptr(leftptr,leftp1,leftp2); + Dbl_copyfromptr(rightptr,rightp1,rightp2); + /* + * Test for NaN + */ + if( (Dbl_exponent(leftp1) == DBL_INFINITY_EXPONENT) + || (Dbl_exponent(rightp1) == DBL_INFINITY_EXPONENT) ) + { + /* Check if a NaN is involved. Signal an invalid exception when + * comparing a signaling NaN or when comparing quiet NaNs and the + * low bit of the condition is set */ + if( ((Dbl_exponent(leftp1) == DBL_INFINITY_EXPONENT) + && Dbl_isnotzero_mantissa(leftp1,leftp2) + && (Exception(cond) || Dbl_isone_signaling(leftp1))) + || + ((Dbl_exponent(rightp1) == DBL_INFINITY_EXPONENT) + && Dbl_isnotzero_mantissa(rightp1,rightp2) + && (Exception(cond) || Dbl_isone_signaling(rightp1))) ) + { + if( Is_invalidtrap_enabled() ) { + Set_status_cbit(Unordered(cond)); + return(INVALIDEXCEPTION); + } + else Set_invalidflag(); + Set_status_cbit(Unordered(cond)); + return(NOEXCEPTION); + } + /* All the exceptional conditions are handled, now special case + NaN compares */ + else if( ((Dbl_exponent(leftp1) == DBL_INFINITY_EXPONENT) + && Dbl_isnotzero_mantissa(leftp1,leftp2)) + || + ((Dbl_exponent(rightp1) == DBL_INFINITY_EXPONENT) + && Dbl_isnotzero_mantissa(rightp1,rightp2)) ) + { + /* NaNs always compare unordered. */ + Set_status_cbit(Unordered(cond)); + return(NOEXCEPTION); + } + /* infinities will drop down to the normal compare mechanisms */ + } + /* First compare for unequal signs => less or greater or + * special equal case */ + Dbl_xortointp1(leftp1,rightp1,xorresult); + if( xorresult < 0 ) + { + /* left negative => less, left positive => greater. + * equal is possible if both operands are zeros. */ + if( Dbl_iszero_exponentmantissa(leftp1,leftp2) + && Dbl_iszero_exponentmantissa(rightp1,rightp2) ) + { + Set_status_cbit(Equal(cond)); + } + else if( Dbl_isone_sign(leftp1) ) + { + Set_status_cbit(Lessthan(cond)); + } + else + { + Set_status_cbit(Greaterthan(cond)); + } + } + /* Signs are the same. Treat negative numbers separately + * from the positives because of the reversed sense. */ + else if(Dbl_isequal(leftp1,leftp2,rightp1,rightp2)) + { + Set_status_cbit(Equal(cond)); + } + else if( Dbl_iszero_sign(leftp1) ) + { + /* Positive compare */ + if( Dbl_allp1(leftp1) < Dbl_allp1(rightp1) ) + { + Set_status_cbit(Lessthan(cond)); + } + else if( Dbl_allp1(leftp1) > Dbl_allp1(rightp1) ) + { + Set_status_cbit(Greaterthan(cond)); + } + else + { + /* Equal first parts. Now we must use unsigned compares to + * resolve the two possibilities. */ + if( Dbl_allp2(leftp2) < Dbl_allp2(rightp2) ) + { + Set_status_cbit(Lessthan(cond)); + } + else + { + Set_status_cbit(Greaterthan(cond)); + } + } + } + else + { + /* Negative compare. Signed or unsigned compares + * both work the same. That distinction is only + * important when the sign bits differ. */ + if( Dbl_allp1(leftp1) > Dbl_allp1(rightp1) ) + { + Set_status_cbit(Lessthan(cond)); + } + else if( Dbl_allp1(leftp1) < Dbl_allp1(rightp1) ) + { + Set_status_cbit(Greaterthan(cond)); + } + else + { + /* Equal first parts. Now we must use unsigned compares to + * resolve the two possibilities. */ + if( Dbl_allp2(leftp2) > Dbl_allp2(rightp2) ) + { + Set_status_cbit(Lessthan(cond)); + } + else + { + Set_status_cbit(Greaterthan(cond)); + } + } + } + return(NOEXCEPTION); + } diff --git a/kernel/arch/parisc/math-emu/dfdiv.c b/kernel/arch/parisc/math-emu/dfdiv.c new file mode 100644 index 000000000..d7d4bec0e --- /dev/null +++ b/kernel/arch/parisc/math-emu/dfdiv.c @@ -0,0 +1,400 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/dfdiv.c $Revision: 1.1 $ + * + * Purpose: + * Double Precision Floating-point Divide + * + * External Interfaces: + * dbl_fdiv(srcptr1,srcptr2,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "dbl_float.h" + +/* + * Double Precision Floating-point Divide + */ + +int +dbl_fdiv (dbl_floating_point * srcptr1, dbl_floating_point * srcptr2, + dbl_floating_point * dstptr, unsigned int *status) +{ + register unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2; + register unsigned int opnd3p1, opnd3p2, resultp1, resultp2; + register int dest_exponent, count; + register boolean inexact = FALSE, guardbit = FALSE, stickybit = FALSE; + boolean is_tiny; + + Dbl_copyfromptr(srcptr1,opnd1p1,opnd1p2); + Dbl_copyfromptr(srcptr2,opnd2p1,opnd2p2); + /* + * set sign bit of result + */ + if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) + Dbl_setnegativezerop1(resultp1); + else Dbl_setzerop1(resultp1); + /* + * check first operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(opnd1p1)) { + if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { + if (Dbl_isnotnan(opnd2p1,opnd2p2)) { + if (Dbl_isinfinity(opnd2p1,opnd2p2)) { + /* + * invalid since both operands + * are infinity + */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * return infinity + */ + Dbl_setinfinity_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd1p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd1p1); + } + /* + * is second operand a signaling NaN? + */ + else if (Dbl_is_signalingnan(opnd2p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd2p1); + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); + return(NOEXCEPTION); + } + } + /* + * check second operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(opnd2p1)) { + if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { + /* + * return zero + */ + Dbl_setzero_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd2p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd2p1); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + /* + * check for division by zero + */ + if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { + if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) { + /* invalid since both operands are zero */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + if (Is_divisionbyzerotrap_enabled()) + return(DIVISIONBYZEROEXCEPTION); + Set_divisionbyzeroflag(); + Dbl_setinfinity_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * Generate exponent + */ + dest_exponent = Dbl_exponent(opnd1p1) - Dbl_exponent(opnd2p1) + DBL_BIAS; + + /* + * Generate mantissa + */ + if (Dbl_isnotzero_exponent(opnd1p1)) { + /* set hidden bit */ + Dbl_clear_signexponent_set_hidden(opnd1p1); + } + else { + /* check for zero */ + if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { + Dbl_setzero_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* is denormalized, want to normalize */ + Dbl_clear_signexponent(opnd1p1); + Dbl_leftshiftby1(opnd1p1,opnd1p2); + Dbl_normalize(opnd1p1,opnd1p2,dest_exponent); + } + /* opnd2 needs to have hidden bit set with msb in hidden bit */ + if (Dbl_isnotzero_exponent(opnd2p1)) { + Dbl_clear_signexponent_set_hidden(opnd2p1); + } + else { + /* is denormalized; want to normalize */ + Dbl_clear_signexponent(opnd2p1); + Dbl_leftshiftby1(opnd2p1,opnd2p2); + while (Dbl_iszero_hiddenhigh7mantissa(opnd2p1)) { + dest_exponent+=8; + Dbl_leftshiftby8(opnd2p1,opnd2p2); + } + if (Dbl_iszero_hiddenhigh3mantissa(opnd2p1)) { + dest_exponent+=4; + Dbl_leftshiftby4(opnd2p1,opnd2p2); + } + while (Dbl_iszero_hidden(opnd2p1)) { + dest_exponent++; + Dbl_leftshiftby1(opnd2p1,opnd2p2); + } + } + + /* Divide the source mantissas */ + + /* + * A non-restoring divide algorithm is used. + */ + Twoword_subtract(opnd1p1,opnd1p2,opnd2p1,opnd2p2); + Dbl_setzero(opnd3p1,opnd3p2); + for (count=1; count <= DBL_P && (opnd1p1 || opnd1p2); count++) { + Dbl_leftshiftby1(opnd1p1,opnd1p2); + Dbl_leftshiftby1(opnd3p1,opnd3p2); + if (Dbl_iszero_sign(opnd1p1)) { + Dbl_setone_lowmantissap2(opnd3p2); + Twoword_subtract(opnd1p1,opnd1p2,opnd2p1,opnd2p2); + } + else { + Twoword_add(opnd1p1, opnd1p2, opnd2p1, opnd2p2); + } + } + if (count <= DBL_P) { + Dbl_leftshiftby1(opnd3p1,opnd3p2); + Dbl_setone_lowmantissap2(opnd3p2); + Dbl_leftshift(opnd3p1,opnd3p2,(DBL_P-count)); + if (Dbl_iszero_hidden(opnd3p1)) { + Dbl_leftshiftby1(opnd3p1,opnd3p2); + dest_exponent--; + } + } + else { + if (Dbl_iszero_hidden(opnd3p1)) { + /* need to get one more bit of result */ + Dbl_leftshiftby1(opnd1p1,opnd1p2); + Dbl_leftshiftby1(opnd3p1,opnd3p2); + if (Dbl_iszero_sign(opnd1p1)) { + Dbl_setone_lowmantissap2(opnd3p2); + Twoword_subtract(opnd1p1,opnd1p2,opnd2p1,opnd2p2); + } + else { + Twoword_add(opnd1p1,opnd1p2,opnd2p1,opnd2p2); + } + dest_exponent--; + } + if (Dbl_iszero_sign(opnd1p1)) guardbit = TRUE; + stickybit = Dbl_allp1(opnd1p1) || Dbl_allp2(opnd1p2); + } + inexact = guardbit | stickybit; + + /* + * round result + */ + if (inexact && (dest_exponent > 0 || Is_underflowtrap_enabled())) { + Dbl_clear_signexponent(opnd3p1); + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(resultp1)) + Dbl_increment(opnd3p1,opnd3p2); + break; + case ROUNDMINUS: + if (Dbl_isone_sign(resultp1)) + Dbl_increment(opnd3p1,opnd3p2); + break; + case ROUNDNEAREST: + if (guardbit && (stickybit || + Dbl_isone_lowmantissap2(opnd3p2))) { + Dbl_increment(opnd3p1,opnd3p2); + } + } + if (Dbl_isone_hidden(opnd3p1)) dest_exponent++; + } + Dbl_set_mantissa(resultp1,resultp2,opnd3p1,opnd3p2); + + /* + * Test for overflow + */ + if (dest_exponent >= DBL_INFINITY_EXPONENT) { + /* trap if OVERFLOWTRAP enabled */ + if (Is_overflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Dbl_setwrapped_exponent(resultp1,dest_exponent,ovfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return(OVERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return(OVERFLOWEXCEPTION); + } + Set_overflowflag(); + /* set result to infinity or largest number */ + Dbl_setoverflow(resultp1,resultp2); + inexact = TRUE; + } + /* + * Test for underflow + */ + else if (dest_exponent <= 0) { + /* trap if UNDERFLOWTRAP enabled */ + if (Is_underflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Dbl_setwrapped_exponent(resultp1,dest_exponent,unfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return(UNDERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return(UNDERFLOWEXCEPTION); + } + + /* Determine if should set underflow flag */ + is_tiny = TRUE; + if (dest_exponent == 0 && inexact) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(resultp1)) { + Dbl_increment(opnd3p1,opnd3p2); + if (Dbl_isone_hiddenoverflow(opnd3p1)) + is_tiny = FALSE; + Dbl_decrement(opnd3p1,opnd3p2); + } + break; + case ROUNDMINUS: + if (Dbl_isone_sign(resultp1)) { + Dbl_increment(opnd3p1,opnd3p2); + if (Dbl_isone_hiddenoverflow(opnd3p1)) + is_tiny = FALSE; + Dbl_decrement(opnd3p1,opnd3p2); + } + break; + case ROUNDNEAREST: + if (guardbit && (stickybit || + Dbl_isone_lowmantissap2(opnd3p2))) { + Dbl_increment(opnd3p1,opnd3p2); + if (Dbl_isone_hiddenoverflow(opnd3p1)) + is_tiny = FALSE; + Dbl_decrement(opnd3p1,opnd3p2); + } + break; + } + } + + /* + * denormalize result or set to signed zero + */ + stickybit = inexact; + Dbl_denormalize(opnd3p1,opnd3p2,dest_exponent,guardbit, + stickybit,inexact); + + /* return rounded number */ + if (inexact) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(resultp1)) { + Dbl_increment(opnd3p1,opnd3p2); + } + break; + case ROUNDMINUS: + if (Dbl_isone_sign(resultp1)) { + Dbl_increment(opnd3p1,opnd3p2); + } + break; + case ROUNDNEAREST: + if (guardbit && (stickybit || + Dbl_isone_lowmantissap2(opnd3p2))) { + Dbl_increment(opnd3p1,opnd3p2); + } + break; + } + if (is_tiny) Set_underflowflag(); + } + Dbl_set_exponentmantissa(resultp1,resultp2,opnd3p1,opnd3p2); + } + else Dbl_set_exponent(resultp1,dest_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + + /* check for inexact */ + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/dfmpy.c b/kernel/arch/parisc/math-emu/dfmpy.c new file mode 100644 index 000000000..4380f5a62 --- /dev/null +++ b/kernel/arch/parisc/math-emu/dfmpy.c @@ -0,0 +1,394 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/dfmpy.c $Revision: 1.1 $ + * + * Purpose: + * Double Precision Floating-point Multiply + * + * External Interfaces: + * dbl_fmpy(srcptr1,srcptr2,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "dbl_float.h" + +/* + * Double Precision Floating-point Multiply + */ + +int +dbl_fmpy( + dbl_floating_point *srcptr1, + dbl_floating_point *srcptr2, + dbl_floating_point *dstptr, + unsigned int *status) +{ + register unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2; + register unsigned int opnd3p1, opnd3p2, resultp1, resultp2; + register int dest_exponent, count; + register boolean inexact = FALSE, guardbit = FALSE, stickybit = FALSE; + boolean is_tiny; + + Dbl_copyfromptr(srcptr1,opnd1p1,opnd1p2); + Dbl_copyfromptr(srcptr2,opnd2p1,opnd2p2); + + /* + * set sign bit of result + */ + if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) + Dbl_setnegativezerop1(resultp1); + else Dbl_setzerop1(resultp1); + /* + * check first operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(opnd1p1)) { + if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { + if (Dbl_isnotnan(opnd2p1,opnd2p2)) { + if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { + /* + * invalid since operands are infinity + * and zero + */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * return infinity + */ + Dbl_setinfinity_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd1p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd1p1); + } + /* + * is second operand a signaling NaN? + */ + else if (Dbl_is_signalingnan(opnd2p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd2p1); + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); + return(NOEXCEPTION); + } + } + /* + * check second operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(opnd2p1)) { + if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { + if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) { + /* invalid since operands are zero & infinity */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(opnd2p1,opnd2p2); + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + /* + * return infinity + */ + Dbl_setinfinity_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd2p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd2p1); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + /* + * Generate exponent + */ + dest_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) -DBL_BIAS; + + /* + * Generate mantissa + */ + if (Dbl_isnotzero_exponent(opnd1p1)) { + /* set hidden bit */ + Dbl_clear_signexponent_set_hidden(opnd1p1); + } + else { + /* check for zero */ + if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { + Dbl_setzero_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* is denormalized, adjust exponent */ + Dbl_clear_signexponent(opnd1p1); + Dbl_leftshiftby1(opnd1p1,opnd1p2); + Dbl_normalize(opnd1p1,opnd1p2,dest_exponent); + } + /* opnd2 needs to have hidden bit set with msb in hidden bit */ + if (Dbl_isnotzero_exponent(opnd2p1)) { + Dbl_clear_signexponent_set_hidden(opnd2p1); + } + else { + /* check for zero */ + if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { + Dbl_setzero_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* is denormalized; want to normalize */ + Dbl_clear_signexponent(opnd2p1); + Dbl_leftshiftby1(opnd2p1,opnd2p2); + Dbl_normalize(opnd2p1,opnd2p2,dest_exponent); + } + + /* Multiply two source mantissas together */ + + /* make room for guard bits */ + Dbl_leftshiftby7(opnd2p1,opnd2p2); + Dbl_setzero(opnd3p1,opnd3p2); + /* + * Four bits at a time are inspected in each loop, and a + * simple shift and add multiply algorithm is used. + */ + for (count=1;count<=DBL_P;count+=4) { + stickybit |= Dlow4p2(opnd3p2); + Dbl_rightshiftby4(opnd3p1,opnd3p2); + if (Dbit28p2(opnd1p2)) { + /* Twoword_add should be an ADDC followed by an ADD. */ + Twoword_add(opnd3p1, opnd3p2, opnd2p1<<3 | opnd2p2>>29, + opnd2p2<<3); + } + if (Dbit29p2(opnd1p2)) { + Twoword_add(opnd3p1, opnd3p2, opnd2p1<<2 | opnd2p2>>30, + opnd2p2<<2); + } + if (Dbit30p2(opnd1p2)) { + Twoword_add(opnd3p1, opnd3p2, opnd2p1<<1 | opnd2p2>>31, + opnd2p2<<1); + } + if (Dbit31p2(opnd1p2)) { + Twoword_add(opnd3p1, opnd3p2, opnd2p1, opnd2p2); + } + Dbl_rightshiftby4(opnd1p1,opnd1p2); + } + if (Dbit3p1(opnd3p1)==0) { + Dbl_leftshiftby1(opnd3p1,opnd3p2); + } + else { + /* result mantissa >= 2. */ + dest_exponent++; + } + /* check for denormalized result */ + while (Dbit3p1(opnd3p1)==0) { + Dbl_leftshiftby1(opnd3p1,opnd3p2); + dest_exponent--; + } + /* + * check for guard, sticky and inexact bits + */ + stickybit |= Dallp2(opnd3p2) << 25; + guardbit = (Dallp2(opnd3p2) << 24) >> 31; + inexact = guardbit | stickybit; + + /* align result mantissa */ + Dbl_rightshiftby8(opnd3p1,opnd3p2); + + /* + * round result + */ + if (inexact && (dest_exponent>0 || Is_underflowtrap_enabled())) { + Dbl_clear_signexponent(opnd3p1); + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(resultp1)) + Dbl_increment(opnd3p1,opnd3p2); + break; + case ROUNDMINUS: + if (Dbl_isone_sign(resultp1)) + Dbl_increment(opnd3p1,opnd3p2); + break; + case ROUNDNEAREST: + if (guardbit) { + if (stickybit || Dbl_isone_lowmantissap2(opnd3p2)) + Dbl_increment(opnd3p1,opnd3p2); + } + } + if (Dbl_isone_hidden(opnd3p1)) dest_exponent++; + } + Dbl_set_mantissa(resultp1,resultp2,opnd3p1,opnd3p2); + + /* + * Test for overflow + */ + if (dest_exponent >= DBL_INFINITY_EXPONENT) { + /* trap if OVERFLOWTRAP enabled */ + if (Is_overflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Dbl_setwrapped_exponent(resultp1,dest_exponent,ovfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return (OVERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return (OVERFLOWEXCEPTION); + } + inexact = TRUE; + Set_overflowflag(); + /* set result to infinity or largest number */ + Dbl_setoverflow(resultp1,resultp2); + } + /* + * Test for underflow + */ + else if (dest_exponent <= 0) { + /* trap if UNDERFLOWTRAP enabled */ + if (Is_underflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Dbl_setwrapped_exponent(resultp1,dest_exponent,unfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return (UNDERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return (UNDERFLOWEXCEPTION); + } + + /* Determine if should set underflow flag */ + is_tiny = TRUE; + if (dest_exponent == 0 && inexact) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(resultp1)) { + Dbl_increment(opnd3p1,opnd3p2); + if (Dbl_isone_hiddenoverflow(opnd3p1)) + is_tiny = FALSE; + Dbl_decrement(opnd3p1,opnd3p2); + } + break; + case ROUNDMINUS: + if (Dbl_isone_sign(resultp1)) { + Dbl_increment(opnd3p1,opnd3p2); + if (Dbl_isone_hiddenoverflow(opnd3p1)) + is_tiny = FALSE; + Dbl_decrement(opnd3p1,opnd3p2); + } + break; + case ROUNDNEAREST: + if (guardbit && (stickybit || + Dbl_isone_lowmantissap2(opnd3p2))) { + Dbl_increment(opnd3p1,opnd3p2); + if (Dbl_isone_hiddenoverflow(opnd3p1)) + is_tiny = FALSE; + Dbl_decrement(opnd3p1,opnd3p2); + } + break; + } + } + + /* + * denormalize result or set to signed zero + */ + stickybit = inexact; + Dbl_denormalize(opnd3p1,opnd3p2,dest_exponent,guardbit, + stickybit,inexact); + + /* return zero or smallest number */ + if (inexact) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(resultp1)) { + Dbl_increment(opnd3p1,opnd3p2); + } + break; + case ROUNDMINUS: + if (Dbl_isone_sign(resultp1)) { + Dbl_increment(opnd3p1,opnd3p2); + } + break; + case ROUNDNEAREST: + if (guardbit && (stickybit || + Dbl_isone_lowmantissap2(opnd3p2))) { + Dbl_increment(opnd3p1,opnd3p2); + } + break; + } + if (is_tiny) Set_underflowflag(); + } + Dbl_set_exponentmantissa(resultp1,resultp2,opnd3p1,opnd3p2); + } + else Dbl_set_exponent(resultp1,dest_exponent); + /* check for inexact */ + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/dfrem.c b/kernel/arch/parisc/math-emu/dfrem.c new file mode 100644 index 000000000..b98378534 --- /dev/null +++ b/kernel/arch/parisc/math-emu/dfrem.c @@ -0,0 +1,297 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/dfrem.c $Revision: 1.1 $ + * + * Purpose: + * Double Precision Floating-point Remainder + * + * External Interfaces: + * dbl_frem(srcptr1,srcptr2,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + + +#include "float.h" +#include "dbl_float.h" + +/* + * Double Precision Floating-point Remainder + */ + +int +dbl_frem (dbl_floating_point * srcptr1, dbl_floating_point * srcptr2, + dbl_floating_point * dstptr, unsigned int *status) +{ + register unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2; + register unsigned int resultp1, resultp2; + register int opnd1_exponent, opnd2_exponent, dest_exponent, stepcount; + register boolean roundup = FALSE; + + Dbl_copyfromptr(srcptr1,opnd1p1,opnd1p2); + Dbl_copyfromptr(srcptr2,opnd2p1,opnd2p2); + /* + * check first operand for NaN's or infinity + */ + if ((opnd1_exponent = Dbl_exponent(opnd1p1)) == DBL_INFINITY_EXPONENT) { + if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { + if (Dbl_isnotnan(opnd2p1,opnd2p2)) { + /* invalid since first operand is infinity */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd1p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd1p1); + } + /* + * is second operand a signaling NaN? + */ + else if (Dbl_is_signalingnan(opnd2p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd2p1); + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); + return(NOEXCEPTION); + } + } + /* + * check second operand for NaN's or infinity + */ + if ((opnd2_exponent = Dbl_exponent(opnd2p1)) == DBL_INFINITY_EXPONENT) { + if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { + /* + * return first operand + */ + Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); + return(NOEXCEPTION); + } + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd2p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd2p1); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + /* + * check second operand for zero + */ + if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { + /* invalid since second operand is zero */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + + /* + * get sign of result + */ + resultp1 = opnd1p1; + + /* + * check for denormalized operands + */ + if (opnd1_exponent == 0) { + /* check for zero */ + if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { + Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); + return(NOEXCEPTION); + } + /* normalize, then continue */ + opnd1_exponent = 1; + Dbl_normalize(opnd1p1,opnd1p2,opnd1_exponent); + } + else { + Dbl_clear_signexponent_set_hidden(opnd1p1); + } + if (opnd2_exponent == 0) { + /* normalize, then continue */ + opnd2_exponent = 1; + Dbl_normalize(opnd2p1,opnd2p2,opnd2_exponent); + } + else { + Dbl_clear_signexponent_set_hidden(opnd2p1); + } + + /* find result exponent and divide step loop count */ + dest_exponent = opnd2_exponent - 1; + stepcount = opnd1_exponent - opnd2_exponent; + + /* + * check for opnd1/opnd2 < 1 + */ + if (stepcount < 0) { + /* + * check for opnd1/opnd2 > 1/2 + * + * In this case n will round to 1, so + * r = opnd1 - opnd2 + */ + if (stepcount == -1 && + Dbl_isgreaterthan(opnd1p1,opnd1p2,opnd2p1,opnd2p2)) { + /* set sign */ + Dbl_allp1(resultp1) = ~Dbl_allp1(resultp1); + /* align opnd2 with opnd1 */ + Dbl_leftshiftby1(opnd2p1,opnd2p2); + Dbl_subtract(opnd2p1,opnd2p2,opnd1p1,opnd1p2, + opnd2p1,opnd2p2); + /* now normalize */ + while (Dbl_iszero_hidden(opnd2p1)) { + Dbl_leftshiftby1(opnd2p1,opnd2p2); + dest_exponent--; + } + Dbl_set_exponentmantissa(resultp1,resultp2,opnd2p1,opnd2p2); + goto testforunderflow; + } + /* + * opnd1/opnd2 <= 1/2 + * + * In this case n will round to zero, so + * r = opnd1 + */ + Dbl_set_exponentmantissa(resultp1,resultp2,opnd1p1,opnd1p2); + dest_exponent = opnd1_exponent; + goto testforunderflow; + } + + /* + * Generate result + * + * Do iterative subtract until remainder is less than operand 2. + */ + while (stepcount-- > 0 && (Dbl_allp1(opnd1p1) || Dbl_allp2(opnd1p2))) { + if (Dbl_isnotlessthan(opnd1p1,opnd1p2,opnd2p1,opnd2p2)) { + Dbl_subtract(opnd1p1,opnd1p2,opnd2p1,opnd2p2,opnd1p1,opnd1p2); + } + Dbl_leftshiftby1(opnd1p1,opnd1p2); + } + /* + * Do last subtract, then determine which way to round if remainder + * is exactly 1/2 of opnd2 + */ + if (Dbl_isnotlessthan(opnd1p1,opnd1p2,opnd2p1,opnd2p2)) { + Dbl_subtract(opnd1p1,opnd1p2,opnd2p1,opnd2p2,opnd1p1,opnd1p2); + roundup = TRUE; + } + if (stepcount > 0 || Dbl_iszero(opnd1p1,opnd1p2)) { + /* division is exact, remainder is zero */ + Dbl_setzero_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + + /* + * Check for cases where opnd1/opnd2 < n + * + * In this case the result's sign will be opposite that of + * opnd1. The mantissa also needs some correction. + */ + Dbl_leftshiftby1(opnd1p1,opnd1p2); + if (Dbl_isgreaterthan(opnd1p1,opnd1p2,opnd2p1,opnd2p2)) { + Dbl_invert_sign(resultp1); + Dbl_leftshiftby1(opnd2p1,opnd2p2); + Dbl_subtract(opnd2p1,opnd2p2,opnd1p1,opnd1p2,opnd1p1,opnd1p2); + } + /* check for remainder being exactly 1/2 of opnd2 */ + else if (Dbl_isequal(opnd1p1,opnd1p2,opnd2p1,opnd2p2) && roundup) { + Dbl_invert_sign(resultp1); + } + + /* normalize result's mantissa */ + while (Dbl_iszero_hidden(opnd1p1)) { + dest_exponent--; + Dbl_leftshiftby1(opnd1p1,opnd1p2); + } + Dbl_set_exponentmantissa(resultp1,resultp2,opnd1p1,opnd1p2); + + /* + * Test for underflow + */ + testforunderflow: + if (dest_exponent <= 0) { + /* trap if UNDERFLOWTRAP enabled */ + if (Is_underflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Dbl_setwrapped_exponent(resultp1,dest_exponent,unfl); + /* frem is always exact */ + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(UNDERFLOWEXCEPTION); + } + /* + * denormalize result or set to signed zero + */ + if (dest_exponent >= (1 - DBL_P)) { + Dbl_rightshift_exponentmantissa(resultp1,resultp2, + 1-dest_exponent); + } + else { + Dbl_setzero_exponentmantissa(resultp1,resultp2); + } + } + else Dbl_set_exponent(resultp1,dest_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/dfsqrt.c b/kernel/arch/parisc/math-emu/dfsqrt.c new file mode 100644 index 000000000..9542c6d28 --- /dev/null +++ b/kernel/arch/parisc/math-emu/dfsqrt.c @@ -0,0 +1,195 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/dfsqrt.c $Revision: 1.1 $ + * + * Purpose: + * Double Floating-point Square Root + * + * External Interfaces: + * dbl_fsqrt(srcptr,nullptr,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "dbl_float.h" + +/* + * Double Floating-point Square Root + */ + +/*ARGSUSED*/ +unsigned int +dbl_fsqrt( + dbl_floating_point *srcptr, + unsigned int *nullptr, + dbl_floating_point *dstptr, + unsigned int *status) +{ + register unsigned int srcp1, srcp2, resultp1, resultp2; + register unsigned int newbitp1, newbitp2, sump1, sump2; + register int src_exponent; + register boolean guardbit = FALSE, even_exponent; + + Dbl_copyfromptr(srcptr,srcp1,srcp2); + /* + * check source operand for NaN or infinity + */ + if ((src_exponent = Dbl_exponent(srcp1)) == DBL_INFINITY_EXPONENT) { + /* + * is signaling NaN? + */ + if (Dbl_isone_signaling(srcp1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(srcp1); + } + /* + * Return quiet NaN or positive infinity. + * Fall through to negative test if negative infinity. + */ + if (Dbl_iszero_sign(srcp1) || + Dbl_isnotzero_mantissa(srcp1,srcp2)) { + Dbl_copytoptr(srcp1,srcp2,dstptr); + return(NOEXCEPTION); + } + } + + /* + * check for zero source operand + */ + if (Dbl_iszero_exponentmantissa(srcp1,srcp2)) { + Dbl_copytoptr(srcp1,srcp2,dstptr); + return(NOEXCEPTION); + } + + /* + * check for negative source operand + */ + if (Dbl_isone_sign(srcp1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_makequietnan(srcp1,srcp2); + Dbl_copytoptr(srcp1,srcp2,dstptr); + return(NOEXCEPTION); + } + + /* + * Generate result + */ + if (src_exponent > 0) { + even_exponent = Dbl_hidden(srcp1); + Dbl_clear_signexponent_set_hidden(srcp1); + } + else { + /* normalize operand */ + Dbl_clear_signexponent(srcp1); + src_exponent++; + Dbl_normalize(srcp1,srcp2,src_exponent); + even_exponent = src_exponent & 1; + } + if (even_exponent) { + /* exponent is even */ + /* Add comment here. Explain why odd exponent needs correction */ + Dbl_leftshiftby1(srcp1,srcp2); + } + /* + * Add comment here. Explain following algorithm. + * + * Trust me, it works. + * + */ + Dbl_setzero(resultp1,resultp2); + Dbl_allp1(newbitp1) = 1 << (DBL_P - 32); + Dbl_setzero_mantissap2(newbitp2); + while (Dbl_isnotzero(newbitp1,newbitp2) && Dbl_isnotzero(srcp1,srcp2)) { + Dbl_addition(resultp1,resultp2,newbitp1,newbitp2,sump1,sump2); + if(Dbl_isnotgreaterthan(sump1,sump2,srcp1,srcp2)) { + Dbl_leftshiftby1(newbitp1,newbitp2); + /* update result */ + Dbl_addition(resultp1,resultp2,newbitp1,newbitp2, + resultp1,resultp2); + Dbl_subtract(srcp1,srcp2,sump1,sump2,srcp1,srcp2); + Dbl_rightshiftby2(newbitp1,newbitp2); + } + else { + Dbl_rightshiftby1(newbitp1,newbitp2); + } + Dbl_leftshiftby1(srcp1,srcp2); + } + /* correct exponent for pre-shift */ + if (even_exponent) { + Dbl_rightshiftby1(resultp1,resultp2); + } + + /* check for inexact */ + if (Dbl_isnotzero(srcp1,srcp2)) { + if (!even_exponent && Dbl_islessthan(resultp1,resultp2,srcp1,srcp2)) { + Dbl_increment(resultp1,resultp2); + } + guardbit = Dbl_lowmantissap2(resultp2); + Dbl_rightshiftby1(resultp1,resultp2); + + /* now round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + Dbl_increment(resultp1,resultp2); + break; + case ROUNDNEAREST: + /* stickybit is always true, so guardbit + * is enough to determine rounding */ + if (guardbit) { + Dbl_increment(resultp1,resultp2); + } + break; + } + /* increment result exponent by 1 if mantissa overflowed */ + if (Dbl_isone_hiddenoverflow(resultp1)) src_exponent+=2; + + if (Is_inexacttrap_enabled()) { + Dbl_set_exponent(resultp1, + ((src_exponent-DBL_BIAS)>>1)+DBL_BIAS); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(INEXACTEXCEPTION); + } + else Set_inexactflag(); + } + else { + Dbl_rightshiftby1(resultp1,resultp2); + } + Dbl_set_exponent(resultp1,((src_exponent-DBL_BIAS)>>1)+DBL_BIAS); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/dfsub.c b/kernel/arch/parisc/math-emu/dfsub.c new file mode 100644 index 000000000..2e8b5a79b --- /dev/null +++ b/kernel/arch/parisc/math-emu/dfsub.c @@ -0,0 +1,526 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/dfsub.c $Revision: 1.1 $ + * + * Purpose: + * Double_subtract: subtract two double precision values. + * + * External Interfaces: + * dbl_fsub(leftptr, rightptr, dstptr, status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "dbl_float.h" + +/* + * Double_subtract: subtract two double precision values. + */ +int +dbl_fsub( + dbl_floating_point *leftptr, + dbl_floating_point *rightptr, + dbl_floating_point *dstptr, + unsigned int *status) + { + register unsigned int signless_upper_left, signless_upper_right, save; + register unsigned int leftp1, leftp2, rightp1, rightp2, extent; + register unsigned int resultp1 = 0, resultp2 = 0; + + register int result_exponent, right_exponent, diff_exponent; + register int sign_save, jumpsize; + register boolean inexact = FALSE, underflowtrap; + + /* Create local copies of the numbers */ + Dbl_copyfromptr(leftptr,leftp1,leftp2); + Dbl_copyfromptr(rightptr,rightp1,rightp2); + + /* A zero "save" helps discover equal operands (for later), * + * and is used in swapping operands (if needed). */ + Dbl_xortointp1(leftp1,rightp1,/*to*/save); + + /* + * check first operand for NaN's or infinity + */ + if ((result_exponent = Dbl_exponent(leftp1)) == DBL_INFINITY_EXPONENT) + { + if (Dbl_iszero_mantissa(leftp1,leftp2)) + { + if (Dbl_isnotnan(rightp1,rightp2)) + { + if (Dbl_isinfinity(rightp1,rightp2) && save==0) + { + /* + * invalid since operands are same signed infinity's + */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * return infinity + */ + Dbl_copytoptr(leftp1,leftp2,dstptr); + return(NOEXCEPTION); + } + } + else + { + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(leftp1)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(leftp1); + } + /* + * is second operand a signaling NaN? + */ + else if (Dbl_is_signalingnan(rightp1)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(rightp1); + Dbl_copytoptr(rightp1,rightp2,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(leftp1,leftp2,dstptr); + return(NOEXCEPTION); + } + } /* End left NaN or Infinity processing */ + /* + * check second operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(rightp1)) + { + if (Dbl_iszero_mantissa(rightp1,rightp2)) + { + /* return infinity */ + Dbl_invert_sign(rightp1); + Dbl_copytoptr(rightp1,rightp2,dstptr); + return(NOEXCEPTION); + } + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(rightp1)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(rightp1); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(rightp1,rightp2,dstptr); + return(NOEXCEPTION); + } /* End right NaN or Infinity processing */ + + /* Invariant: Must be dealing with finite numbers */ + + /* Compare operands by removing the sign */ + Dbl_copytoint_exponentmantissap1(leftp1,signless_upper_left); + Dbl_copytoint_exponentmantissap1(rightp1,signless_upper_right); + + /* sign difference selects add or sub operation. */ + if(Dbl_ismagnitudeless(leftp2,rightp2,signless_upper_left,signless_upper_right)) + { + /* Set the left operand to the larger one by XOR swap * + * First finish the first word using "save" */ + Dbl_xorfromintp1(save,rightp1,/*to*/rightp1); + Dbl_xorfromintp1(save,leftp1,/*to*/leftp1); + Dbl_swap_lower(leftp2,rightp2); + result_exponent = Dbl_exponent(leftp1); + Dbl_invert_sign(leftp1); + } + /* Invariant: left is not smaller than right. */ + + if((right_exponent = Dbl_exponent(rightp1)) == 0) + { + /* Denormalized operands. First look for zeroes */ + if(Dbl_iszero_mantissa(rightp1,rightp2)) + { + /* right is zero */ + if(Dbl_iszero_exponentmantissa(leftp1,leftp2)) + { + /* Both operands are zeros */ + Dbl_invert_sign(rightp1); + if(Is_rounding_mode(ROUNDMINUS)) + { + Dbl_or_signs(leftp1,/*with*/rightp1); + } + else + { + Dbl_and_signs(leftp1,/*with*/rightp1); + } + } + else + { + /* Left is not a zero and must be the result. Trapped + * underflows are signaled if left is denormalized. Result + * is always exact. */ + if( (result_exponent == 0) && Is_underflowtrap_enabled() ) + { + /* need to normalize results mantissa */ + sign_save = Dbl_signextendedsign(leftp1); + Dbl_leftshiftby1(leftp1,leftp2); + Dbl_normalize(leftp1,leftp2,result_exponent); + Dbl_set_sign(leftp1,/*using*/sign_save); + Dbl_setwrapped_exponent(leftp1,result_exponent,unfl); + Dbl_copytoptr(leftp1,leftp2,dstptr); + /* inexact = FALSE */ + return(UNDERFLOWEXCEPTION); + } + } + Dbl_copytoptr(leftp1,leftp2,dstptr); + return(NOEXCEPTION); + } + + /* Neither are zeroes */ + Dbl_clear_sign(rightp1); /* Exponent is already cleared */ + if(result_exponent == 0 ) + { + /* Both operands are denormalized. The result must be exact + * and is simply calculated. A sum could become normalized and a + * difference could cancel to a true zero. */ + if( (/*signed*/int) save >= 0 ) + { + Dbl_subtract(leftp1,leftp2,/*minus*/rightp1,rightp2, + /*into*/resultp1,resultp2); + if(Dbl_iszero_mantissa(resultp1,resultp2)) + { + if(Is_rounding_mode(ROUNDMINUS)) + { + Dbl_setone_sign(resultp1); + } + else + { + Dbl_setzero_sign(resultp1); + } + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + else + { + Dbl_addition(leftp1,leftp2,rightp1,rightp2, + /*into*/resultp1,resultp2); + if(Dbl_isone_hidden(resultp1)) + { + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + if(Is_underflowtrap_enabled()) + { + /* need to normalize result */ + sign_save = Dbl_signextendedsign(resultp1); + Dbl_leftshiftby1(resultp1,resultp2); + Dbl_normalize(resultp1,resultp2,result_exponent); + Dbl_set_sign(resultp1,/*using*/sign_save); + Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + /* inexact = FALSE */ + return(UNDERFLOWEXCEPTION); + } + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + right_exponent = 1; /* Set exponent to reflect different bias + * with denomalized numbers. */ + } + else + { + Dbl_clear_signexponent_set_hidden(rightp1); + } + Dbl_clear_exponent_set_hidden(leftp1); + diff_exponent = result_exponent - right_exponent; + + /* + * Special case alignment of operands that would force alignment + * beyond the extent of the extension. A further optimization + * could special case this but only reduces the path length for this + * infrequent case. + */ + if(diff_exponent > DBL_THRESHOLD) + { + diff_exponent = DBL_THRESHOLD; + } + + /* Align right operand by shifting to right */ + Dbl_right_align(/*operand*/rightp1,rightp2,/*shifted by*/diff_exponent, + /*and lower to*/extent); + + /* Treat sum and difference of the operands separately. */ + if( (/*signed*/int) save >= 0 ) + { + /* + * Difference of the two operands. Their can be no overflow. A + * borrow can occur out of the hidden bit and force a post + * normalization phase. + */ + Dbl_subtract_withextension(leftp1,leftp2,/*minus*/rightp1,rightp2, + /*with*/extent,/*into*/resultp1,resultp2); + if(Dbl_iszero_hidden(resultp1)) + { + /* Handle normalization */ + /* A straight forward algorithm would now shift the result + * and extension left until the hidden bit becomes one. Not + * all of the extension bits need participate in the shift. + * Only the two most significant bits (round and guard) are + * needed. If only a single shift is needed then the guard + * bit becomes a significant low order bit and the extension + * must participate in the rounding. If more than a single + * shift is needed, then all bits to the right of the guard + * bit are zeros, and the guard bit may or may not be zero. */ + sign_save = Dbl_signextendedsign(resultp1); + Dbl_leftshiftby1_withextent(resultp1,resultp2,extent,resultp1,resultp2); + + /* Need to check for a zero result. The sign and exponent + * fields have already been zeroed. The more efficient test + * of the full object can be used. + */ + if(Dbl_iszero(resultp1,resultp2)) + /* Must have been "x-x" or "x+(-x)". */ + { + if(Is_rounding_mode(ROUNDMINUS)) Dbl_setone_sign(resultp1); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + result_exponent--; + /* Look to see if normalization is finished. */ + if(Dbl_isone_hidden(resultp1)) + { + if(result_exponent==0) + { + /* Denormalized, exponent should be zero. Left operand * + * was normalized, so extent (guard, round) was zero */ + goto underflow; + } + else + { + /* No further normalization is needed. */ + Dbl_set_sign(resultp1,/*using*/sign_save); + Ext_leftshiftby1(extent); + goto round; + } + } + + /* Check for denormalized, exponent should be zero. Left * + * operand was normalized, so extent (guard, round) was zero */ + if(!(underflowtrap = Is_underflowtrap_enabled()) && + result_exponent==0) goto underflow; + + /* Shift extension to complete one bit of normalization and + * update exponent. */ + Ext_leftshiftby1(extent); + + /* Discover first one bit to determine shift amount. Use a + * modified binary search. We have already shifted the result + * one position right and still not found a one so the remainder + * of the extension must be zero and simplifies rounding. */ + /* Scan bytes */ + while(Dbl_iszero_hiddenhigh7mantissa(resultp1)) + { + Dbl_leftshiftby8(resultp1,resultp2); + if((result_exponent -= 8) <= 0 && !underflowtrap) + goto underflow; + } + /* Now narrow it down to the nibble */ + if(Dbl_iszero_hiddenhigh3mantissa(resultp1)) + { + /* The lower nibble contains the normalizing one */ + Dbl_leftshiftby4(resultp1,resultp2); + if((result_exponent -= 4) <= 0 && !underflowtrap) + goto underflow; + } + /* Select case were first bit is set (already normalized) + * otherwise select the proper shift. */ + if((jumpsize = Dbl_hiddenhigh3mantissa(resultp1)) > 7) + { + /* Already normalized */ + if(result_exponent <= 0) goto underflow; + Dbl_set_sign(resultp1,/*using*/sign_save); + Dbl_set_exponent(resultp1,/*using*/result_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + Dbl_sethigh4bits(resultp1,/*using*/sign_save); + switch(jumpsize) + { + case 1: + { + Dbl_leftshiftby3(resultp1,resultp2); + result_exponent -= 3; + break; + } + case 2: + case 3: + { + Dbl_leftshiftby2(resultp1,resultp2); + result_exponent -= 2; + break; + } + case 4: + case 5: + case 6: + case 7: + { + Dbl_leftshiftby1(resultp1,resultp2); + result_exponent -= 1; + break; + } + } + if(result_exponent > 0) + { + Dbl_set_exponent(resultp1,/*using*/result_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); /* Sign bit is already set */ + } + /* Fixup potential underflows */ + underflow: + if(Is_underflowtrap_enabled()) + { + Dbl_set_sign(resultp1,sign_save); + Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + /* inexact = FALSE */ + return(UNDERFLOWEXCEPTION); + } + /* + * Since we cannot get an inexact denormalized result, + * we can now return. + */ + Dbl_fix_overshift(resultp1,resultp2,(1-result_exponent),extent); + Dbl_clear_signexponent(resultp1); + Dbl_set_sign(resultp1,sign_save); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } /* end if(hidden...)... */ + /* Fall through and round */ + } /* end if(save >= 0)... */ + else + { + /* Subtract magnitudes */ + Dbl_addition(leftp1,leftp2,rightp1,rightp2,/*to*/resultp1,resultp2); + if(Dbl_isone_hiddenoverflow(resultp1)) + { + /* Prenormalization required. */ + Dbl_rightshiftby1_withextent(resultp2,extent,extent); + Dbl_arithrightshiftby1(resultp1,resultp2); + result_exponent++; + } /* end if hiddenoverflow... */ + } /* end else ...subtract magnitudes... */ + + /* Round the result. If the extension is all zeros,then the result is + * exact. Otherwise round in the correct direction. No underflow is + * possible. If a postnormalization is necessary, then the mantissa is + * all zeros so no shift is needed. */ + round: + if(Ext_isnotzero(extent)) + { + inexact = TRUE; + switch(Rounding_mode()) + { + case ROUNDNEAREST: /* The default. */ + if(Ext_isone_sign(extent)) + { + /* at least 1/2 ulp */ + if(Ext_isnotzero_lower(extent) || + Dbl_isone_lowmantissap2(resultp2)) + { + /* either exactly half way and odd or more than 1/2ulp */ + Dbl_increment(resultp1,resultp2); + } + } + break; + + case ROUNDPLUS: + if(Dbl_iszero_sign(resultp1)) + { + /* Round up positive results */ + Dbl_increment(resultp1,resultp2); + } + break; + + case ROUNDMINUS: + if(Dbl_isone_sign(resultp1)) + { + /* Round down negative results */ + Dbl_increment(resultp1,resultp2); + } + + case ROUNDZERO:; + /* truncate is simple */ + } /* end switch... */ + if(Dbl_isone_hiddenoverflow(resultp1)) result_exponent++; + } + if(result_exponent == DBL_INFINITY_EXPONENT) + { + /* Overflow */ + if(Is_overflowtrap_enabled()) + { + Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return(OVERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return(OVERFLOWEXCEPTION); + } + else + { + inexact = TRUE; + Set_overflowflag(); + Dbl_setoverflow(resultp1,resultp2); + } + } + else Dbl_set_exponent(resultp1,result_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if(inexact) + if(Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + return(NOEXCEPTION); + } diff --git a/kernel/arch/parisc/math-emu/driver.c b/kernel/arch/parisc/math-emu/driver.c new file mode 100644 index 000000000..09ef4136c --- /dev/null +++ b/kernel/arch/parisc/math-emu/driver.c @@ -0,0 +1,128 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * linux/arch/math-emu/driver.c.c + * + * decodes and dispatches unimplemented FPU instructions + * + * Copyright (C) 1999, 2000 Philipp Rumpf <prumpf@tux.org> + * Copyright (C) 2001 Hewlett-Packard <bame@debian.org> + */ + +#include <linux/sched.h> +#include "float.h" +#include "math-emu.h" + + +#define fptpos 31 +#define fpr1pos 10 +#define extru(r,pos,len) (((r) >> (31-(pos))) & (( 1 << (len)) - 1)) + +#define FPUDEBUG 0 + +/* Format of the floating-point exception registers. */ +struct exc_reg { + unsigned int exception : 6; + unsigned int ei : 26; +}; + +/* Macros for grabbing bits of the instruction format from the 'ei' + field above. */ +/* Major opcode 0c and 0e */ +#define FP0CE_UID(i) (((i) >> 6) & 3) +#define FP0CE_CLASS(i) (((i) >> 9) & 3) +#define FP0CE_SUBOP(i) (((i) >> 13) & 7) +#define FP0CE_SUBOP1(i) (((i) >> 15) & 7) /* Class 1 subopcode */ +#define FP0C_FORMAT(i) (((i) >> 11) & 3) +#define FP0E_FORMAT(i) (((i) >> 11) & 1) + +/* Major opcode 0c, uid 2 (performance monitoring) */ +#define FPPM_SUBOP(i) (((i) >> 9) & 0x1f) + +/* Major opcode 2e (fused operations). */ +#define FP2E_SUBOP(i) (((i) >> 5) & 1) +#define FP2E_FORMAT(i) (((i) >> 11) & 1) + +/* Major opcode 26 (FMPYSUB) */ +/* Major opcode 06 (FMPYADD) */ +#define FPx6_FORMAT(i) ((i) & 0x1f) + +/* Flags and enable bits of the status word. */ +#define FPSW_FLAGS(w) ((w) >> 27) +#define FPSW_ENABLE(w) ((w) & 0x1f) +#define FPSW_V (1<<4) +#define FPSW_Z (1<<3) +#define FPSW_O (1<<2) +#define FPSW_U (1<<1) +#define FPSW_I (1<<0) + +/* Handle a floating point exception. Return zero if the faulting + instruction can be completed successfully. */ +int +handle_fpe(struct pt_regs *regs) +{ + extern void printbinary(unsigned long x, int nbits); + struct siginfo si; + unsigned int orig_sw, sw; + int signalcode; + /* need an intermediate copy of float regs because FPU emulation + * code expects an artificial last entry which contains zero + * + * also, the passed in fr registers contain one word that defines + * the fpu type. the fpu type information is constructed + * inside the emulation code + */ + __u64 frcopy[36]; + + memcpy(frcopy, regs->fr, sizeof regs->fr); + frcopy[32] = 0; + + memcpy(&orig_sw, frcopy, sizeof(orig_sw)); + + if (FPUDEBUG) { + printk(KERN_DEBUG "FP VZOUICxxxxCQCQCQCQCQCRMxxTDVZOUI ->\n "); + printbinary(orig_sw, 32); + printk(KERN_DEBUG "\n"); + } + + signalcode = decode_fpu(frcopy, 0x666); + + /* Status word = FR0L. */ + memcpy(&sw, frcopy, sizeof(sw)); + if (FPUDEBUG) { + printk(KERN_DEBUG "VZOUICxxxxCQCQCQCQCQCRMxxTDVZOUI decode_fpu returns %d|0x%x\n", + signalcode >> 24, signalcode & 0xffffff); + printbinary(sw, 32); + printk(KERN_DEBUG "\n"); + } + + memcpy(regs->fr, frcopy, sizeof regs->fr); + if (signalcode != 0) { + si.si_signo = signalcode >> 24; + si.si_errno = 0; + si.si_code = signalcode & 0xffffff; + si.si_addr = (void __user *) regs->iaoq[0]; + force_sig_info(si.si_signo, &si, current); + return -1; + } + + return signalcode ? -1 : 0; +} diff --git a/kernel/arch/parisc/math-emu/fcnvff.c b/kernel/arch/parisc/math-emu/fcnvff.c new file mode 100644 index 000000000..76c063f7d --- /dev/null +++ b/kernel/arch/parisc/math-emu/fcnvff.c @@ -0,0 +1,309 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/fcnvff.c $Revision: 1.1 $ + * + * Purpose: + * Single Floating-point to Double Floating-point + * Double Floating-point to Single Floating-point + * + * External Interfaces: + * dbl_to_sgl_fcnvff(srcptr,nullptr,dstptr,status) + * sgl_to_dbl_fcnvff(srcptr,nullptr,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" +#include "dbl_float.h" +#include "cnv_float.h" + +/* + * Single Floating-point to Double Floating-point + */ +/*ARGSUSED*/ +int +sgl_to_dbl_fcnvff( + sgl_floating_point *srcptr, + unsigned int *nullptr, + dbl_floating_point *dstptr, + unsigned int *status) +{ + register unsigned int src, resultp1, resultp2; + register int src_exponent; + + src = *srcptr; + src_exponent = Sgl_exponent(src); + Dbl_allp1(resultp1) = Sgl_all(src); /* set sign of result */ + /* + * Test for NaN or infinity + */ + if (src_exponent == SGL_INFINITY_EXPONENT) { + /* + * determine if NaN or infinity + */ + if (Sgl_iszero_mantissa(src)) { + /* + * is infinity; want to return double infinity + */ + Dbl_setinfinity_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(src)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + else { + Set_invalidflag(); + Sgl_set_quiet(src); + } + } + /* + * NaN is quiet, return as double NaN + */ + Dbl_setinfinity_exponent(resultp1); + Sgl_to_dbl_mantissa(src,resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + /* + * Test for zero or denormalized + */ + if (src_exponent == 0) { + /* + * determine if zero or denormalized + */ + if (Sgl_isnotzero_mantissa(src)) { + /* + * is denormalized; want to normalize + */ + Sgl_clear_signexponent(src); + Sgl_leftshiftby1(src); + Sgl_normalize(src,src_exponent); + Sgl_to_dbl_exponent(src_exponent,resultp1); + Sgl_to_dbl_mantissa(src,resultp1,resultp2); + } + else { + Dbl_setzero_exponentmantissa(resultp1,resultp2); + } + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * No special cases, just complete the conversion + */ + Sgl_to_dbl_exponent(src_exponent, resultp1); + Sgl_to_dbl_mantissa(Sgl_mantissa(src), resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); +} + +/* + * Double Floating-point to Single Floating-point + */ +/*ARGSUSED*/ +int +dbl_to_sgl_fcnvff( + dbl_floating_point *srcptr, + unsigned int *nullptr, + sgl_floating_point *dstptr, + unsigned int *status) +{ + register unsigned int srcp1, srcp2, result; + register int src_exponent, dest_exponent, dest_mantissa; + register boolean inexact = FALSE, guardbit = FALSE, stickybit = FALSE; + register boolean lsb_odd = FALSE; + boolean is_tiny; + + Dbl_copyfromptr(srcptr,srcp1,srcp2); + src_exponent = Dbl_exponent(srcp1); + Sgl_all(result) = Dbl_allp1(srcp1); /* set sign of result */ + /* + * Test for NaN or infinity + */ + if (src_exponent == DBL_INFINITY_EXPONENT) { + /* + * determine if NaN or infinity + */ + if (Dbl_iszero_mantissa(srcp1,srcp2)) { + /* + * is infinity; want to return single infinity + */ + Sgl_setinfinity_exponentmantissa(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(srcp1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + else { + Set_invalidflag(); + /* make NaN quiet */ + Dbl_set_quiet(srcp1); + } + } + /* + * NaN is quiet, return as single NaN + */ + Sgl_setinfinity_exponent(result); + Sgl_set_mantissa(result,Dallp1(srcp1)<<3 | Dallp2(srcp2)>>29); + if (Sgl_iszero_mantissa(result)) Sgl_set_quiet(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * Generate result + */ + Dbl_to_sgl_exponent(src_exponent,dest_exponent); + if (dest_exponent > 0) { + Dbl_to_sgl_mantissa(srcp1,srcp2,dest_mantissa,inexact,guardbit, + stickybit,lsb_odd); + } + else { + if (Dbl_iszero_exponentmantissa(srcp1,srcp2)){ + Sgl_setzero_exponentmantissa(result); + *dstptr = result; + return(NOEXCEPTION); + } + if (Is_underflowtrap_enabled()) { + Dbl_to_sgl_mantissa(srcp1,srcp2,dest_mantissa,inexact, + guardbit,stickybit,lsb_odd); + } + else { + /* compute result, determine inexact info, + * and set Underflowflag if appropriate + */ + Dbl_to_sgl_denormalized(srcp1,srcp2,dest_exponent, + dest_mantissa,inexact,guardbit,stickybit,lsb_odd, + is_tiny); + } + } + /* + * Now round result if not exact + */ + if (inexact) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(result)) dest_mantissa++; + break; + case ROUNDMINUS: + if (Sgl_isone_sign(result)) dest_mantissa++; + break; + case ROUNDNEAREST: + if (guardbit) { + if (stickybit || lsb_odd) dest_mantissa++; + } + } + } + Sgl_set_exponentmantissa(result,dest_mantissa); + + /* + * check for mantissa overflow after rounding + */ + if ((dest_exponent>0 || Is_underflowtrap_enabled()) && + Sgl_isone_hidden(result)) dest_exponent++; + + /* + * Test for overflow + */ + if (dest_exponent >= SGL_INFINITY_EXPONENT) { + /* trap if OVERFLOWTRAP enabled */ + if (Is_overflowtrap_enabled()) { + /* + * Check for gross overflow + */ + if (dest_exponent >= SGL_INFINITY_EXPONENT+SGL_WRAP) + return(UNIMPLEMENTEDEXCEPTION); + + /* + * Adjust bias of result + */ + Sgl_setwrapped_exponent(result,dest_exponent,ovfl); + *dstptr = result; + if (inexact) + if (Is_inexacttrap_enabled()) + return(OVERFLOWEXCEPTION|INEXACTEXCEPTION); + else Set_inexactflag(); + return(OVERFLOWEXCEPTION); + } + Set_overflowflag(); + inexact = TRUE; + /* set result to infinity or largest number */ + Sgl_setoverflow(result); + } + /* + * Test for underflow + */ + else if (dest_exponent <= 0) { + /* trap if UNDERFLOWTRAP enabled */ + if (Is_underflowtrap_enabled()) { + /* + * Check for gross underflow + */ + if (dest_exponent <= -(SGL_WRAP)) + return(UNIMPLEMENTEDEXCEPTION); + /* + * Adjust bias of result + */ + Sgl_setwrapped_exponent(result,dest_exponent,unfl); + *dstptr = result; + if (inexact) + if (Is_inexacttrap_enabled()) + return(UNDERFLOWEXCEPTION|INEXACTEXCEPTION); + else Set_inexactflag(); + return(UNDERFLOWEXCEPTION); + } + /* + * result is denormalized or signed zero + */ + if (inexact && is_tiny) Set_underflowflag(); + + } + else Sgl_set_exponent(result,dest_exponent); + *dstptr = result; + /* + * Trap if inexact trap is enabled + */ + if (inexact) + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/fcnvfu.c b/kernel/arch/parisc/math-emu/fcnvfu.c new file mode 100644 index 000000000..7e8565537 --- /dev/null +++ b/kernel/arch/parisc/math-emu/fcnvfu.c @@ -0,0 +1,536 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/fcnvfu.c $Revision: 1.1 $ + * + * Purpose: + * Floating-point to Unsigned Fixed-point Converts + * + * External Interfaces: + * dbl_to_dbl_fcnvfu(srcptr,nullptr,dstptr,status) + * dbl_to_sgl_fcnvfu(srcptr,nullptr,dstptr,status) + * sgl_to_dbl_fcnvfu(srcptr,nullptr,dstptr,status) + * sgl_to_sgl_fcnvfu(srcptr,nullptr,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" +#include "dbl_float.h" +#include "cnv_float.h" + +/************************************************************************ + * Floating-point to Unsigned Fixed-point Converts * + ************************************************************************/ + +/* + * Single Floating-point to Single Unsigned Fixed + */ +/*ARGSUSED*/ +int +sgl_to_sgl_fcnvfu( + sgl_floating_point *srcptr, + unsigned int *nullptr, + unsigned int *dstptr, + unsigned int *status) +{ + register unsigned int src, result; + register int src_exponent; + register boolean inexact = FALSE; + + src = *srcptr; + src_exponent = Sgl_exponent(src) - SGL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > SGL_FX_MAX_EXP + 1) { + if (Sgl_isone_sign(src)) { + result = 0; + } else { + result = 0xffffffff; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * Generate result + */ + if (src_exponent >= 0) { + /* + * Check sign. + * If negative, trap unimplemented. + */ + if (Sgl_isone_sign(src)) { + result = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + Sgl_clear_signexponent_set_hidden(src); + Suint_from_sgl_mantissa(src,src_exponent,result); + + /* check for inexact */ + if (Sgl_isinexact_to_unsigned(src,src_exponent)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + result++; + break; + case ROUNDMINUS: /* never negative */ + break; + case ROUNDNEAREST: + if (Sgl_isone_roundbit(src,src_exponent) && + (Sgl_isone_stickybit(src,src_exponent) || + (result & 1))) { + result++; + } + break; + } + } + } else { + result = 0; + + /* check for inexact */ + if (Sgl_isnotzero_exponentmantissa(src)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(src)) { + result++; + } + break; + case ROUNDMINUS: + if (Sgl_isone_sign(src)) { + result = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + inexact = FALSE; + } + break; + case ROUNDNEAREST: + if (src_exponent == -1 && + Sgl_isnotzero_mantissa(src)) { + if (Sgl_isone_sign(src)) { + result = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + inexact = FALSE; + } + else result++; + } + break; + } + } + } + *dstptr = result; + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} + +/* + * Single Floating-point to Double Unsigned Fixed + */ +/*ARGSUSED*/ +int +sgl_to_dbl_fcnvfu( + sgl_floating_point *srcptr, + unsigned int *nullptr, + dbl_unsigned *dstptr, + unsigned int *status) +{ + register int src_exponent; + register unsigned int src, resultp1, resultp2; + register boolean inexact = FALSE; + + src = *srcptr; + src_exponent = Sgl_exponent(src) - SGL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > DBL_FX_MAX_EXP + 1) { + if (Sgl_isone_sign(src)) { + resultp1 = resultp2 = 0; + } else { + resultp1 = resultp2 = 0xffffffff; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Duint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * Generate result + */ + if (src_exponent >= 0) { + /* + * Check sign. + * If negative, trap unimplemented. + */ + if (Sgl_isone_sign(src)) { + resultp1 = resultp2 = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Duint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + Sgl_clear_signexponent_set_hidden(src); + Duint_from_sgl_mantissa(src,src_exponent,resultp1,resultp2); + + /* check for inexact */ + if (Sgl_isinexact_to_unsigned(src,src_exponent)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + Duint_increment(resultp1,resultp2); + break; + case ROUNDMINUS: /* never negative */ + break; + case ROUNDNEAREST: + if (Sgl_isone_roundbit(src,src_exponent) && + (Sgl_isone_stickybit(src,src_exponent) || + Duint_isone_lowp2(resultp2))) { + Duint_increment(resultp1,resultp2); + } + break; + } + } + } else { + Duint_setzero(resultp1,resultp2); + + /* check for inexact */ + if (Sgl_isnotzero_exponentmantissa(src)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(src)) { + Duint_increment(resultp1,resultp2); + } + break; + case ROUNDMINUS: + if (Sgl_isone_sign(src)) { + resultp1 = resultp2 = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + inexact = FALSE; + } + break; + case ROUNDNEAREST: + if (src_exponent == -1 && + Sgl_isnotzero_mantissa(src)) { + if (Sgl_isone_sign(src)) { + resultp1 = 0; + resultp2 = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + inexact = FALSE; + } + else Duint_increment(resultp1,resultp2); + } + } + } + } + Duint_copytoptr(resultp1,resultp2,dstptr); + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} + +/* + * Double Floating-point to Single Unsigned Fixed + */ +/*ARGSUSED*/ +int +dbl_to_sgl_fcnvfu (dbl_floating_point * srcptr, unsigned int *nullptr, + unsigned int *dstptr, unsigned int *status) +{ + register unsigned int srcp1, srcp2, result; + register int src_exponent; + register boolean inexact = FALSE; + + Dbl_copyfromptr(srcptr,srcp1,srcp2); + src_exponent = Dbl_exponent(srcp1) - DBL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > SGL_FX_MAX_EXP + 1) { + if (Dbl_isone_sign(srcp1)) { + result = 0; + } else { + result = 0xffffffff; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * Generate result + */ + if (src_exponent >= 0) { + /* + * Check sign. + * If negative, trap unimplemented. + */ + if (Dbl_isone_sign(srcp1)) { + result = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + Dbl_clear_signexponent_set_hidden(srcp1); + Suint_from_dbl_mantissa(srcp1,srcp2,src_exponent,result); + + /* check for inexact */ + if (Dbl_isinexact_to_unsigned(srcp1,srcp2,src_exponent)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + result++; + break; + case ROUNDMINUS: /* never negative */ + break; + case ROUNDNEAREST: + if(Dbl_isone_roundbit(srcp1,srcp2,src_exponent) && + (Dbl_isone_stickybit(srcp1,srcp2,src_exponent)|| + result&1)) + result++; + break; + } + /* check for overflow */ + if (result == 0) { + result = 0xffffffff; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + } + } else { + result = 0; + + /* check for inexact */ + if (Dbl_isnotzero_exponentmantissa(srcp1,srcp2)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(srcp1)) result++; + break; + case ROUNDMINUS: + if (Dbl_isone_sign(srcp1)) { + result = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + inexact = FALSE; + } + break; + case ROUNDNEAREST: + if (src_exponent == -1 && + Dbl_isnotzero_mantissa(srcp1,srcp2)) + if (Dbl_isone_sign(srcp1)) { + result = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + inexact = FALSE; + } + else result++; + } + } + } + *dstptr = result; + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} + +/* + * Double Floating-point to Double Unsigned Fixed + */ +/*ARGSUSED*/ +int +dbl_to_dbl_fcnvfu (dbl_floating_point * srcptr, unsigned int *nullptr, + dbl_unsigned * dstptr, unsigned int *status) +{ + register int src_exponent; + register unsigned int srcp1, srcp2, resultp1, resultp2; + register boolean inexact = FALSE; + + Dbl_copyfromptr(srcptr,srcp1,srcp2); + src_exponent = Dbl_exponent(srcp1) - DBL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > DBL_FX_MAX_EXP + 1) { + if (Dbl_isone_sign(srcp1)) { + resultp1 = resultp2 = 0; + } else { + resultp1 = resultp2 = 0xffffffff; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Duint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + + /* + * Generate result + */ + if (src_exponent >= 0) { + /* + * Check sign. + * If negative, trap unimplemented. + */ + if (Dbl_isone_sign(srcp1)) { + resultp1 = resultp2 = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Duint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + Dbl_clear_signexponent_set_hidden(srcp1); + Duint_from_dbl_mantissa(srcp1,srcp2,src_exponent,resultp1, + resultp2); + + /* check for inexact */ + if (Dbl_isinexact_to_unsigned(srcp1,srcp2,src_exponent)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + Duint_increment(resultp1,resultp2); + break; + case ROUNDMINUS: /* never negative */ + break; + case ROUNDNEAREST: + if(Dbl_isone_roundbit(srcp1,srcp2,src_exponent)) + if(Dbl_isone_stickybit(srcp1,srcp2,src_exponent) || + Duint_isone_lowp2(resultp2)) + Duint_increment(resultp1,resultp2); + } + } + } else { + Duint_setzero(resultp1,resultp2); + + /* check for inexact */ + if (Dbl_isnotzero_exponentmantissa(srcp1,srcp2)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(srcp1)) { + Duint_increment(resultp1,resultp2); + } + break; + case ROUNDMINUS: + if (Dbl_isone_sign(srcp1)) { + resultp1 = resultp2 = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + inexact = FALSE; + } + break; + case ROUNDNEAREST: + if (src_exponent == -1 && + Dbl_isnotzero_mantissa(srcp1,srcp2)) + if (Dbl_iszero_sign(srcp1)) { + Duint_increment(resultp1,resultp2); + } else { + resultp1 = 0; + resultp2 = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + inexact = FALSE; + } + } + } + } + Duint_copytoptr(resultp1,resultp2,dstptr); + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} + diff --git a/kernel/arch/parisc/math-emu/fcnvfut.c b/kernel/arch/parisc/math-emu/fcnvfut.c new file mode 100644 index 000000000..4176a44ed --- /dev/null +++ b/kernel/arch/parisc/math-emu/fcnvfut.c @@ -0,0 +1,332 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/fcnvfut.c $Revision: 1.1 $ + * + * Purpose: + * Floating-point to Unsigned Fixed-point Converts with Truncation + * + * External Interfaces: + * dbl_to_dbl_fcnvfut(srcptr,nullptr,dstptr,status) + * dbl_to_sgl_fcnvfut(srcptr,nullptr,dstptr,status) + * sgl_to_dbl_fcnvfut(srcptr,nullptr,dstptr,status) + * sgl_to_sgl_fcnvfut(srcptr,nullptr,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" +#include "dbl_float.h" +#include "cnv_float.h" + +/************************************************************************ + * Floating-point to Unsigned Fixed-point Converts with Truncation * + ************************************************************************/ + +/* + * Convert single floating-point to single fixed-point format + * with truncated result + */ +/*ARGSUSED*/ +int +sgl_to_sgl_fcnvfut (sgl_floating_point * srcptr, unsigned int *nullptr, + unsigned int *dstptr, unsigned int *status) +{ + register unsigned int src, result; + register int src_exponent; + + src = *srcptr; + src_exponent = Sgl_exponent(src) - SGL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > SGL_FX_MAX_EXP + 1) { + if (Sgl_isone_sign(src)) { + result = 0; + } else { + result = 0xffffffff; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * Generate result + */ + if (src_exponent >= 0) { + /* + * Check sign. + * If negative, trap unimplemented. + */ + if (Sgl_isone_sign(src)) { + result = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + Sgl_clear_signexponent_set_hidden(src); + Suint_from_sgl_mantissa(src,src_exponent,result); + *dstptr = result; + + /* check for inexact */ + if (Sgl_isinexact_to_unsigned(src,src_exponent)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + else { + *dstptr = 0; + + /* check for inexact */ + if (Sgl_isnotzero_exponentmantissa(src)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + return(NOEXCEPTION); +} + +/* + * Single Floating-point to Double Unsigned Fixed + */ +/*ARGSUSED*/ +int +sgl_to_dbl_fcnvfut (sgl_floating_point * srcptr, unsigned int *nullptr, + dbl_unsigned * dstptr, unsigned int *status) +{ + register int src_exponent; + register unsigned int src, resultp1, resultp2; + + src = *srcptr; + src_exponent = Sgl_exponent(src) - SGL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > DBL_FX_MAX_EXP + 1) { + if (Sgl_isone_sign(src)) { + resultp1 = resultp2 = 0; + } else { + resultp1 = resultp2 = 0xffffffff; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Duint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * Generate result + */ + if (src_exponent >= 0) { + /* + * Check sign. + * If negative, trap unimplemented. + */ + if (Sgl_isone_sign(src)) { + resultp1 = resultp2 = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Duint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + Sgl_clear_signexponent_set_hidden(src); + Duint_from_sgl_mantissa(src,src_exponent,resultp1,resultp2); + Duint_copytoptr(resultp1,resultp2,dstptr); + + /* check for inexact */ + if (Sgl_isinexact_to_unsigned(src,src_exponent)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + else { + Duint_setzero(resultp1,resultp2); + Duint_copytoptr(resultp1,resultp2,dstptr); + + /* check for inexact */ + if (Sgl_isnotzero_exponentmantissa(src)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + return(NOEXCEPTION); +} + +/* + * Double Floating-point to Single Unsigned Fixed + */ +/*ARGSUSED*/ +int +dbl_to_sgl_fcnvfut (dbl_floating_point * srcptr, unsigned int *nullptr, + unsigned int *dstptr, unsigned int *status) +{ + register unsigned int srcp1, srcp2, result; + register int src_exponent; + + Dbl_copyfromptr(srcptr,srcp1,srcp2); + src_exponent = Dbl_exponent(srcp1) - DBL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > SGL_FX_MAX_EXP + 1) { + if (Dbl_isone_sign(srcp1)) { + result = 0; + } else { + result = 0xffffffff; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * Generate result + */ + if (src_exponent >= 0) { + /* + * Check sign. + * If negative, trap unimplemented. + */ + if (Dbl_isone_sign(srcp1)) { + result = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + Dbl_clear_signexponent_set_hidden(srcp1); + Suint_from_dbl_mantissa(srcp1,srcp2,src_exponent,result); + *dstptr = result; + + /* check for inexact */ + if (Dbl_isinexact_to_unsigned(srcp1,srcp2,src_exponent)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + else { + *dstptr = 0; + + /* check for inexact */ + if (Dbl_isnotzero_exponentmantissa(srcp1,srcp2)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + return(NOEXCEPTION); +} + +/* + * Double Floating-point to Double Unsigned Fixed + */ +/*ARGSUSED*/ +int +dbl_to_dbl_fcnvfut (dbl_floating_point * srcptr, unsigned int *nullptr, + dbl_unsigned * dstptr, unsigned int *status) +{ + register int src_exponent; + register unsigned int srcp1, srcp2, resultp1, resultp2; + + Dbl_copyfromptr(srcptr,srcp1,srcp2); + src_exponent = Dbl_exponent(srcp1) - DBL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > DBL_FX_MAX_EXP + 1) { + if (Dbl_isone_sign(srcp1)) { + resultp1 = resultp2 = 0; + } else { + resultp1 = resultp2 = 0xffffffff; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Duint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * Generate result + */ + if (src_exponent >= 0) { + /* + * Check sign. + * If negative, trap unimplemented. + */ + if (Dbl_isone_sign(srcp1)) { + resultp1 = resultp2 = 0; + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Duint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + Dbl_clear_signexponent_set_hidden(srcp1); + Duint_from_dbl_mantissa(srcp1,srcp2,src_exponent, + resultp1,resultp2); + Duint_copytoptr(resultp1,resultp2,dstptr); + + /* check for inexact */ + if (Dbl_isinexact_to_unsigned(srcp1,srcp2,src_exponent)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + else { + Duint_setzero(resultp1,resultp2); + Duint_copytoptr(resultp1,resultp2,dstptr); + + /* check for inexact */ + if (Dbl_isnotzero_exponentmantissa(srcp1,srcp2)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/fcnvfx.c b/kernel/arch/parisc/math-emu/fcnvfx.c new file mode 100644 index 000000000..d6475bddb --- /dev/null +++ b/kernel/arch/parisc/math-emu/fcnvfx.c @@ -0,0 +1,501 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/fcnvfx.c $Revision: 1.1 $ + * + * Purpose: + * Single Floating-point to Single Fixed-point + * Single Floating-point to Double Fixed-point + * Double Floating-point to Single Fixed-point + * Double Floating-point to Double Fixed-point + * + * External Interfaces: + * dbl_to_dbl_fcnvfx(srcptr,nullptr,dstptr,status) + * dbl_to_sgl_fcnvfx(srcptr,nullptr,dstptr,status) + * sgl_to_dbl_fcnvfx(srcptr,nullptr,dstptr,status) + * sgl_to_sgl_fcnvfx(srcptr,nullptr,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" +#include "dbl_float.h" +#include "cnv_float.h" + +/* + * Single Floating-point to Single Fixed-point + */ +/*ARGSUSED*/ +int +sgl_to_sgl_fcnvfx( + sgl_floating_point *srcptr, + sgl_floating_point *nullptr, + int *dstptr, + sgl_floating_point *status) +{ + register unsigned int src, temp; + register int src_exponent, result; + register boolean inexact = FALSE; + + src = *srcptr; + src_exponent = Sgl_exponent(src) - SGL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > SGL_FX_MAX_EXP) { + /* check for MININT */ + if ((src_exponent > SGL_FX_MAX_EXP + 1) || + Sgl_isnotzero_mantissa(src) || Sgl_iszero_sign(src)) { + if (Sgl_iszero_sign(src)) result = 0x7fffffff; + else result = 0x80000000; + + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + } + /* + * Generate result + */ + if (src_exponent >= 0) { + temp = src; + Sgl_clear_signexponent_set_hidden(temp); + Int_from_sgl_mantissa(temp,src_exponent); + if (Sgl_isone_sign(src)) result = -Sgl_all(temp); + else result = Sgl_all(temp); + + /* check for inexact */ + if (Sgl_isinexact_to_fix(src,src_exponent)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(src)) result++; + break; + case ROUNDMINUS: + if (Sgl_isone_sign(src)) result--; + break; + case ROUNDNEAREST: + if (Sgl_isone_roundbit(src,src_exponent)) { + if (Sgl_isone_stickybit(src,src_exponent) + || (Sgl_isone_lowmantissa(temp))) + if (Sgl_iszero_sign(src)) result++; + else result--; + } + } + } + } + else { + result = 0; + + /* check for inexact */ + if (Sgl_isnotzero_exponentmantissa(src)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(src)) result++; + break; + case ROUNDMINUS: + if (Sgl_isone_sign(src)) result--; + break; + case ROUNDNEAREST: + if (src_exponent == -1) + if (Sgl_isnotzero_mantissa(src)) + if (Sgl_iszero_sign(src)) result++; + else result--; + } + } + } + *dstptr = result; + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} + +/* + * Single Floating-point to Double Fixed-point + */ +/*ARGSUSED*/ +int +sgl_to_dbl_fcnvfx( + sgl_floating_point *srcptr, + unsigned int *nullptr, + dbl_integer *dstptr, + unsigned int *status) +{ + register int src_exponent, resultp1; + register unsigned int src, temp, resultp2; + register boolean inexact = FALSE; + + src = *srcptr; + src_exponent = Sgl_exponent(src) - SGL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > DBL_FX_MAX_EXP) { + /* check for MININT */ + if ((src_exponent > DBL_FX_MAX_EXP + 1) || + Sgl_isnotzero_mantissa(src) || Sgl_iszero_sign(src)) { + if (Sgl_iszero_sign(src)) { + resultp1 = 0x7fffffff; + resultp2 = 0xffffffff; + } + else { + resultp1 = 0x80000000; + resultp2 = 0; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Dint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + Dint_set_minint(resultp1,resultp2); + Dint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * Generate result + */ + if (src_exponent >= 0) { + temp = src; + Sgl_clear_signexponent_set_hidden(temp); + Dint_from_sgl_mantissa(temp,src_exponent,resultp1,resultp2); + if (Sgl_isone_sign(src)) { + Dint_setone_sign(resultp1,resultp2); + } + + /* check for inexact */ + if (Sgl_isinexact_to_fix(src,src_exponent)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(src)) { + Dint_increment(resultp1,resultp2); + } + break; + case ROUNDMINUS: + if (Sgl_isone_sign(src)) { + Dint_decrement(resultp1,resultp2); + } + break; + case ROUNDNEAREST: + if (Sgl_isone_roundbit(src,src_exponent)) + if (Sgl_isone_stickybit(src,src_exponent) || + (Dint_isone_lowp2(resultp2))) + if (Sgl_iszero_sign(src)) { + Dint_increment(resultp1,resultp2); + } + else { + Dint_decrement(resultp1,resultp2); + } + } + } + } + else { + Dint_setzero(resultp1,resultp2); + + /* check for inexact */ + if (Sgl_isnotzero_exponentmantissa(src)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(src)) { + Dint_increment(resultp1,resultp2); + } + break; + case ROUNDMINUS: + if (Sgl_isone_sign(src)) { + Dint_decrement(resultp1,resultp2); + } + break; + case ROUNDNEAREST: + if (src_exponent == -1) + if (Sgl_isnotzero_mantissa(src)) + if (Sgl_iszero_sign(src)) { + Dint_increment(resultp1,resultp2); + } + else { + Dint_decrement(resultp1,resultp2); + } + } + } + } + Dint_copytoptr(resultp1,resultp2,dstptr); + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} + +/* + * Double Floating-point to Single Fixed-point + */ +/*ARGSUSED*/ +int +dbl_to_sgl_fcnvfx( + dbl_floating_point *srcptr, + unsigned int *nullptr, + int *dstptr, + unsigned int *status) +{ + register unsigned int srcp1,srcp2, tempp1,tempp2; + register int src_exponent, result; + register boolean inexact = FALSE; + + Dbl_copyfromptr(srcptr,srcp1,srcp2); + src_exponent = Dbl_exponent(srcp1) - DBL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > SGL_FX_MAX_EXP) { + /* check for MININT */ + if (Dbl_isoverflow_to_int(src_exponent,srcp1,srcp2)) { + if (Dbl_iszero_sign(srcp1)) result = 0x7fffffff; + else result = 0x80000000; + + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + } + /* + * Generate result + */ + if (src_exponent >= 0) { + tempp1 = srcp1; + tempp2 = srcp2; + Dbl_clear_signexponent_set_hidden(tempp1); + Int_from_dbl_mantissa(tempp1,tempp2,src_exponent); + if (Dbl_isone_sign(srcp1) && (src_exponent <= SGL_FX_MAX_EXP)) + result = -Dbl_allp1(tempp1); + else result = Dbl_allp1(tempp1); + + /* check for inexact */ + if (Dbl_isinexact_to_fix(srcp1,srcp2,src_exponent)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(srcp1)) result++; + break; + case ROUNDMINUS: + if (Dbl_isone_sign(srcp1)) result--; + break; + case ROUNDNEAREST: + if (Dbl_isone_roundbit(srcp1,srcp2,src_exponent)) + if (Dbl_isone_stickybit(srcp1,srcp2,src_exponent) || + (Dbl_isone_lowmantissap1(tempp1))) + if (Dbl_iszero_sign(srcp1)) result++; + else result--; + } + /* check for overflow */ + if ((Dbl_iszero_sign(srcp1) && result < 0) || + (Dbl_isone_sign(srcp1) && result > 0)) { + + if (Dbl_iszero_sign(srcp1)) result = 0x7fffffff; + else result = 0x80000000; + + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + } + } + else { + result = 0; + + /* check for inexact */ + if (Dbl_isnotzero_exponentmantissa(srcp1,srcp2)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(srcp1)) result++; + break; + case ROUNDMINUS: + if (Dbl_isone_sign(srcp1)) result--; + break; + case ROUNDNEAREST: + if (src_exponent == -1) + if (Dbl_isnotzero_mantissa(srcp1,srcp2)) + if (Dbl_iszero_sign(srcp1)) result++; + else result--; + } + } + } + *dstptr = result; + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} + +/* + * Double Floating-point to Double Fixed-point + */ +/*ARGSUSED*/ +int +dbl_to_dbl_fcnvfx( + dbl_floating_point *srcptr, + unsigned int *nullptr, + dbl_integer *dstptr, + unsigned int *status) +{ + register int src_exponent, resultp1; + register unsigned int srcp1, srcp2, tempp1, tempp2, resultp2; + register boolean inexact = FALSE; + + Dbl_copyfromptr(srcptr,srcp1,srcp2); + src_exponent = Dbl_exponent(srcp1) - DBL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > DBL_FX_MAX_EXP) { + /* check for MININT */ + if ((src_exponent > DBL_FX_MAX_EXP + 1) || + Dbl_isnotzero_mantissa(srcp1,srcp2) || Dbl_iszero_sign(srcp1)) { + if (Dbl_iszero_sign(srcp1)) { + resultp1 = 0x7fffffff; + resultp2 = 0xffffffff; + } + else { + resultp1 = 0x80000000; + resultp2 = 0; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Dint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + + /* + * Generate result + */ + if (src_exponent >= 0) { + tempp1 = srcp1; + tempp2 = srcp2; + Dbl_clear_signexponent_set_hidden(tempp1); + Dint_from_dbl_mantissa(tempp1,tempp2,src_exponent,resultp1, + resultp2); + if (Dbl_isone_sign(srcp1)) { + Dint_setone_sign(resultp1,resultp2); + } + + /* check for inexact */ + if (Dbl_isinexact_to_fix(srcp1,srcp2,src_exponent)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(srcp1)) { + Dint_increment(resultp1,resultp2); + } + break; + case ROUNDMINUS: + if (Dbl_isone_sign(srcp1)) { + Dint_decrement(resultp1,resultp2); + } + break; + case ROUNDNEAREST: + if (Dbl_isone_roundbit(srcp1,srcp2,src_exponent)) + if (Dbl_isone_stickybit(srcp1,srcp2,src_exponent) || + (Dint_isone_lowp2(resultp2))) + if (Dbl_iszero_sign(srcp1)) { + Dint_increment(resultp1,resultp2); + } + else { + Dint_decrement(resultp1,resultp2); + } + } + } + } + else { + Dint_setzero(resultp1,resultp2); + + /* check for inexact */ + if (Dbl_isnotzero_exponentmantissa(srcp1,srcp2)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(srcp1)) { + Dint_increment(resultp1,resultp2); + } + break; + case ROUNDMINUS: + if (Dbl_isone_sign(srcp1)) { + Dint_decrement(resultp1,resultp2); + } + break; + case ROUNDNEAREST: + if (src_exponent == -1) + if (Dbl_isnotzero_mantissa(srcp1,srcp2)) + if (Dbl_iszero_sign(srcp1)) { + Dint_increment(resultp1,resultp2); + } + else { + Dint_decrement(resultp1,resultp2); + } + } + } + } + Dint_copytoptr(resultp1,resultp2,dstptr); + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/fcnvfxt.c b/kernel/arch/parisc/math-emu/fcnvfxt.c new file mode 100644 index 000000000..8b9010c46 --- /dev/null +++ b/kernel/arch/parisc/math-emu/fcnvfxt.c @@ -0,0 +1,328 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/fcnvfxt.c $Revision: 1.1 $ + * + * Purpose: + * Single Floating-point to Single Fixed-point /w truncated result + * Single Floating-point to Double Fixed-point /w truncated result + * Double Floating-point to Single Fixed-point /w truncated result + * Double Floating-point to Double Fixed-point /w truncated result + * + * External Interfaces: + * dbl_to_dbl_fcnvfxt(srcptr,nullptr,dstptr,status) + * dbl_to_sgl_fcnvfxt(srcptr,nullptr,dstptr,status) + * sgl_to_dbl_fcnvfxt(srcptr,nullptr,dstptr,status) + * sgl_to_sgl_fcnvfxt(srcptr,nullptr,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" +#include "dbl_float.h" +#include "cnv_float.h" + +/* + * Convert single floating-point to single fixed-point format + * with truncated result + */ +/*ARGSUSED*/ +int +sgl_to_sgl_fcnvfxt( + sgl_floating_point *srcptr, + unsigned int *nullptr, + int *dstptr, + unsigned int *status) +{ + register unsigned int src, temp; + register int src_exponent, result; + + src = *srcptr; + src_exponent = Sgl_exponent(src) - SGL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > SGL_FX_MAX_EXP) { + /* check for MININT */ + if ((src_exponent > SGL_FX_MAX_EXP + 1) || + Sgl_isnotzero_mantissa(src) || Sgl_iszero_sign(src)) { + if (Sgl_iszero_sign(src)) result = 0x7fffffff; + else result = 0x80000000; + + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + } + /* + * Generate result + */ + if (src_exponent >= 0) { + temp = src; + Sgl_clear_signexponent_set_hidden(temp); + Int_from_sgl_mantissa(temp,src_exponent); + if (Sgl_isone_sign(src)) result = -Sgl_all(temp); + else result = Sgl_all(temp); + *dstptr = result; + + /* check for inexact */ + if (Sgl_isinexact_to_fix(src,src_exponent)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + else { + *dstptr = 0; + + /* check for inexact */ + if (Sgl_isnotzero_exponentmantissa(src)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + return(NOEXCEPTION); +} + +/* + * Single Floating-point to Double Fixed-point + */ +/*ARGSUSED*/ +int +sgl_to_dbl_fcnvfxt( + sgl_floating_point *srcptr, + unsigned int *nullptr, + dbl_integer *dstptr, + unsigned int *status) +{ + register int src_exponent, resultp1; + register unsigned int src, temp, resultp2; + + src = *srcptr; + src_exponent = Sgl_exponent(src) - SGL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > DBL_FX_MAX_EXP) { + /* check for MININT */ + if ((src_exponent > DBL_FX_MAX_EXP + 1) || + Sgl_isnotzero_mantissa(src) || Sgl_iszero_sign(src)) { + if (Sgl_iszero_sign(src)) { + resultp1 = 0x7fffffff; + resultp2 = 0xffffffff; + } + else { + resultp1 = 0x80000000; + resultp2 = 0; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Dint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + Dint_set_minint(resultp1,resultp2); + Dint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * Generate result + */ + if (src_exponent >= 0) { + temp = src; + Sgl_clear_signexponent_set_hidden(temp); + Dint_from_sgl_mantissa(temp,src_exponent,resultp1,resultp2); + if (Sgl_isone_sign(src)) { + Dint_setone_sign(resultp1,resultp2); + } + Dint_copytoptr(resultp1,resultp2,dstptr); + + /* check for inexact */ + if (Sgl_isinexact_to_fix(src,src_exponent)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + else { + Dint_setzero(resultp1,resultp2); + Dint_copytoptr(resultp1,resultp2,dstptr); + + /* check for inexact */ + if (Sgl_isnotzero_exponentmantissa(src)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + return(NOEXCEPTION); +} + +/* + * Double Floating-point to Single Fixed-point + */ +/*ARGSUSED*/ +int +dbl_to_sgl_fcnvfxt( + dbl_floating_point *srcptr, + unsigned int *nullptr, + int *dstptr, + unsigned int *status) +{ + register unsigned int srcp1, srcp2, tempp1, tempp2; + register int src_exponent, result; + + Dbl_copyfromptr(srcptr,srcp1,srcp2); + src_exponent = Dbl_exponent(srcp1) - DBL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > SGL_FX_MAX_EXP) { + /* check for MININT */ + if (Dbl_isoverflow_to_int(src_exponent,srcp1,srcp2)) { + if (Dbl_iszero_sign(srcp1)) result = 0x7fffffff; + else result = 0x80000000; + + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + *dstptr = result; + return(NOEXCEPTION); + } + } + /* + * Generate result + */ + if (src_exponent >= 0) { + tempp1 = srcp1; + tempp2 = srcp2; + Dbl_clear_signexponent_set_hidden(tempp1); + Int_from_dbl_mantissa(tempp1,tempp2,src_exponent); + if (Dbl_isone_sign(srcp1) && (src_exponent <= SGL_FX_MAX_EXP)) + result = -Dbl_allp1(tempp1); + else result = Dbl_allp1(tempp1); + *dstptr = result; + + /* check for inexact */ + if (Dbl_isinexact_to_fix(srcp1,srcp2,src_exponent)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + else { + *dstptr = 0; + + /* check for inexact */ + if (Dbl_isnotzero_exponentmantissa(srcp1,srcp2)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + return(NOEXCEPTION); +} + +/* + * Double Floating-point to Double Fixed-point + */ +/*ARGSUSED*/ +int +dbl_to_dbl_fcnvfxt( + dbl_floating_point *srcptr, + unsigned int *nullptr, + dbl_integer *dstptr, + unsigned int *status) +{ + register int src_exponent, resultp1; + register unsigned int srcp1, srcp2, tempp1, tempp2, resultp2; + + Dbl_copyfromptr(srcptr,srcp1,srcp2); + src_exponent = Dbl_exponent(srcp1) - DBL_BIAS; + + /* + * Test for overflow + */ + if (src_exponent > DBL_FX_MAX_EXP) { + /* check for MININT */ + if ((src_exponent > DBL_FX_MAX_EXP + 1) || + Dbl_isnotzero_mantissa(srcp1,srcp2) || Dbl_iszero_sign(srcp1)) { + if (Dbl_iszero_sign(srcp1)) { + resultp1 = 0x7fffffff; + resultp2 = 0xffffffff; + } + else { + resultp1 = 0x80000000; + resultp2 = 0; + } + if (Is_invalidtrap_enabled()) { + return(INVALIDEXCEPTION); + } + Set_invalidflag(); + Dint_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + /* + * Generate result + */ + if (src_exponent >= 0) { + tempp1 = srcp1; + tempp2 = srcp2; + Dbl_clear_signexponent_set_hidden(tempp1); + Dint_from_dbl_mantissa(tempp1,tempp2,src_exponent, + resultp1,resultp2); + if (Dbl_isone_sign(srcp1)) { + Dint_setone_sign(resultp1,resultp2); + } + Dint_copytoptr(resultp1,resultp2,dstptr); + + /* check for inexact */ + if (Dbl_isinexact_to_fix(srcp1,srcp2,src_exponent)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + else { + Dint_setzero(resultp1,resultp2); + Dint_copytoptr(resultp1,resultp2,dstptr); + + /* check for inexact */ + if (Dbl_isnotzero_exponentmantissa(srcp1,srcp2)) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + } + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/fcnvuf.c b/kernel/arch/parisc/math-emu/fcnvuf.c new file mode 100644 index 000000000..5e68189fe --- /dev/null +++ b/kernel/arch/parisc/math-emu/fcnvuf.c @@ -0,0 +1,318 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/fcnvuf.c $Revision: 1.1 $ + * + * Purpose: + * Fixed point to Floating-point Converts + * + * External Interfaces: + * dbl_to_dbl_fcnvuf(srcptr,nullptr,dstptr,status) + * dbl_to_sgl_fcnvuf(srcptr,nullptr,dstptr,status) + * sgl_to_dbl_fcnvuf(srcptr,nullptr,dstptr,status) + * sgl_to_sgl_fcnvuf(srcptr,nullptr,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" +#include "dbl_float.h" +#include "cnv_float.h" + +/************************************************************************ + * Fixed point to Floating-point Converts * + ************************************************************************/ + +/* + * Convert Single Unsigned Fixed to Single Floating-point format + */ + +int +sgl_to_sgl_fcnvuf( + unsigned int *srcptr, + unsigned int *nullptr, + sgl_floating_point *dstptr, + unsigned int *status) +{ + register unsigned int src, result = 0; + register int dst_exponent; + + src = *srcptr; + + /* Check for zero */ + if (src == 0) { + Sgl_setzero(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * Generate exponent and normalized mantissa + */ + dst_exponent = 16; /* initialize for normalization */ + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + */ + Find_ms_one_bit(src,dst_exponent); + /* left justify source, with msb at bit position 0 */ + src <<= dst_exponent+1; + Sgl_set_mantissa(result, src >> SGL_EXP_LENGTH); + Sgl_set_exponent(result, 30+SGL_BIAS - dst_exponent); + + /* check for inexact */ + if (Suint_isinexact_to_sgl(src)) { + switch (Rounding_mode()) { + case ROUNDPLUS: + Sgl_increment(result); + break; + case ROUNDMINUS: /* never negative */ + break; + case ROUNDNEAREST: + Sgl_roundnearest_from_suint(src,result); + break; + } + if (Is_inexacttrap_enabled()) { + *dstptr = result; + return(INEXACTEXCEPTION); + } + else Set_inexactflag(); + } + *dstptr = result; + return(NOEXCEPTION); +} + +/* + * Single Unsigned Fixed to Double Floating-point + */ + +int +sgl_to_dbl_fcnvuf( + unsigned int *srcptr, + unsigned int *nullptr, + dbl_floating_point *dstptr, + unsigned int *status) +{ + register int dst_exponent; + register unsigned int src, resultp1 = 0, resultp2 = 0; + + src = *srcptr; + + /* Check for zero */ + if (src == 0) { + Dbl_setzero(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * Generate exponent and normalized mantissa + */ + dst_exponent = 16; /* initialize for normalization */ + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + */ + Find_ms_one_bit(src,dst_exponent); + /* left justify source, with msb at bit position 0 */ + src <<= dst_exponent+1; + Dbl_set_mantissap1(resultp1, src >> DBL_EXP_LENGTH); + Dbl_set_mantissap2(resultp2, src << (32-DBL_EXP_LENGTH)); + Dbl_set_exponent(resultp1, (30+DBL_BIAS) - dst_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); +} + +/* + * Double Unsigned Fixed to Single Floating-point + */ + +int +dbl_to_sgl_fcnvuf( + dbl_unsigned *srcptr, + unsigned int *nullptr, + sgl_floating_point *dstptr, + unsigned int *status) +{ + int dst_exponent; + unsigned int srcp1, srcp2, result = 0; + + Duint_copyfromptr(srcptr,srcp1,srcp2); + + /* Check for zero */ + if (srcp1 == 0 && srcp2 == 0) { + Sgl_setzero(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * Generate exponent and normalized mantissa + */ + dst_exponent = 16; /* initialize for normalization */ + if (srcp1 == 0) { + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + */ + Find_ms_one_bit(srcp2,dst_exponent); + /* left justify source, with msb at bit position 0 */ + srcp1 = srcp2 << dst_exponent+1; + srcp2 = 0; + /* + * since msb set is in second word, need to + * adjust bit position count + */ + dst_exponent += 32; + } + else { + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + * + */ + Find_ms_one_bit(srcp1,dst_exponent); + /* left justify source, with msb at bit position 0 */ + if (dst_exponent >= 0) { + Variable_shift_double(srcp1,srcp2,(31-dst_exponent), + srcp1); + srcp2 <<= dst_exponent+1; + } + } + Sgl_set_mantissa(result, srcp1 >> SGL_EXP_LENGTH); + Sgl_set_exponent(result, (62+SGL_BIAS) - dst_exponent); + + /* check for inexact */ + if (Duint_isinexact_to_sgl(srcp1,srcp2)) { + switch (Rounding_mode()) { + case ROUNDPLUS: + Sgl_increment(result); + break; + case ROUNDMINUS: /* never negative */ + break; + case ROUNDNEAREST: + Sgl_roundnearest_from_duint(srcp1,srcp2,result); + break; + } + if (Is_inexacttrap_enabled()) { + *dstptr = result; + return(INEXACTEXCEPTION); + } + else Set_inexactflag(); + } + *dstptr = result; + return(NOEXCEPTION); +} + +/* + * Double Unsigned Fixed to Double Floating-point + */ + +int +dbl_to_dbl_fcnvuf( + dbl_unsigned *srcptr, + unsigned int *nullptr, + dbl_floating_point *dstptr, + unsigned int *status) +{ + register int dst_exponent; + register unsigned int srcp1, srcp2, resultp1 = 0, resultp2 = 0; + + Duint_copyfromptr(srcptr,srcp1,srcp2); + + /* Check for zero */ + if (srcp1 == 0 && srcp2 ==0) { + Dbl_setzero(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * Generate exponent and normalized mantissa + */ + dst_exponent = 16; /* initialize for normalization */ + if (srcp1 == 0) { + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + */ + Find_ms_one_bit(srcp2,dst_exponent); + /* left justify source, with msb at bit position 0 */ + srcp1 = srcp2 << dst_exponent+1; + srcp2 = 0; + /* + * since msb set is in second word, need to + * adjust bit position count + */ + dst_exponent += 32; + } + else { + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + */ + Find_ms_one_bit(srcp1,dst_exponent); + /* left justify source, with msb at bit position 0 */ + if (dst_exponent >= 0) { + Variable_shift_double(srcp1,srcp2,(31-dst_exponent), + srcp1); + srcp2 <<= dst_exponent+1; + } + } + Dbl_set_mantissap1(resultp1, srcp1 >> DBL_EXP_LENGTH); + Shiftdouble(srcp1,srcp2,DBL_EXP_LENGTH,resultp2); + Dbl_set_exponent(resultp1, (62+DBL_BIAS) - dst_exponent); + + /* check for inexact */ + if (Duint_isinexact_to_dbl(srcp2)) { + switch (Rounding_mode()) { + case ROUNDPLUS: + Dbl_increment(resultp1,resultp2); + break; + case ROUNDMINUS: /* never negative */ + break; + case ROUNDNEAREST: + Dbl_roundnearest_from_duint(srcp2,resultp1, + resultp2); + break; + } + if (Is_inexacttrap_enabled()) { + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(INEXACTEXCEPTION); + } + else Set_inexactflag(); + } + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); +} + diff --git a/kernel/arch/parisc/math-emu/fcnvxf.c b/kernel/arch/parisc/math-emu/fcnvxf.c new file mode 100644 index 000000000..05c7fadb3 --- /dev/null +++ b/kernel/arch/parisc/math-emu/fcnvxf.c @@ -0,0 +1,386 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/fcnvxf.c $Revision: 1.1 $ + * + * Purpose: + * Single Fixed-point to Single Floating-point + * Single Fixed-point to Double Floating-point + * Double Fixed-point to Single Floating-point + * Double Fixed-point to Double Floating-point + * + * External Interfaces: + * dbl_to_dbl_fcnvxf(srcptr,nullptr,dstptr,status) + * dbl_to_sgl_fcnvxf(srcptr,nullptr,dstptr,status) + * sgl_to_dbl_fcnvxf(srcptr,nullptr,dstptr,status) + * sgl_to_sgl_fcnvxf(srcptr,nullptr,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" +#include "dbl_float.h" +#include "cnv_float.h" + +/* + * Convert single fixed-point to single floating-point format + */ + +int +sgl_to_sgl_fcnvxf( + int *srcptr, + unsigned int *nullptr, + sgl_floating_point *dstptr, + unsigned int *status) +{ + register int src, dst_exponent; + register unsigned int result = 0; + + src = *srcptr; + /* + * set sign bit of result and get magnitude of source + */ + if (src < 0) { + Sgl_setone_sign(result); + Int_negate(src); + } + else { + Sgl_setzero_sign(result); + /* Check for zero */ + if (src == 0) { + Sgl_setzero(result); + *dstptr = result; + return(NOEXCEPTION); + } + } + /* + * Generate exponent and normalized mantissa + */ + dst_exponent = 16; /* initialize for normalization */ + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + */ + Find_ms_one_bit(src,dst_exponent); + /* left justify source, with msb at bit position 1 */ + if (dst_exponent >= 0) src <<= dst_exponent; + else src = 1 << 30; + Sgl_set_mantissa(result, src >> (SGL_EXP_LENGTH-1)); + Sgl_set_exponent(result, 30+SGL_BIAS - dst_exponent); + + /* check for inexact */ + if (Int_isinexact_to_sgl(src)) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(result)) + Sgl_increment(result); + break; + case ROUNDMINUS: + if (Sgl_isone_sign(result)) + Sgl_increment(result); + break; + case ROUNDNEAREST: + Sgl_roundnearest_from_int(src,result); + } + if (Is_inexacttrap_enabled()) { + *dstptr = result; + return(INEXACTEXCEPTION); + } + else Set_inexactflag(); + } + *dstptr = result; + return(NOEXCEPTION); +} + +/* + * Single Fixed-point to Double Floating-point + */ + +int +sgl_to_dbl_fcnvxf( + int *srcptr, + unsigned int *nullptr, + dbl_floating_point *dstptr, + unsigned int *status) +{ + register int src, dst_exponent; + register unsigned int resultp1 = 0, resultp2 = 0; + + src = *srcptr; + /* + * set sign bit of result and get magnitude of source + */ + if (src < 0) { + Dbl_setone_sign(resultp1); + Int_negate(src); + } + else { + Dbl_setzero_sign(resultp1); + /* Check for zero */ + if (src == 0) { + Dbl_setzero(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + /* + * Generate exponent and normalized mantissa + */ + dst_exponent = 16; /* initialize for normalization */ + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + */ + Find_ms_one_bit(src,dst_exponent); + /* left justify source, with msb at bit position 1 */ + if (dst_exponent >= 0) src <<= dst_exponent; + else src = 1 << 30; + Dbl_set_mantissap1(resultp1, src >> DBL_EXP_LENGTH - 1); + Dbl_set_mantissap2(resultp2, src << (33-DBL_EXP_LENGTH)); + Dbl_set_exponent(resultp1, (30+DBL_BIAS) - dst_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); +} + +/* + * Double Fixed-point to Single Floating-point + */ + +int +dbl_to_sgl_fcnvxf( + dbl_integer *srcptr, + unsigned int *nullptr, + sgl_floating_point *dstptr, + unsigned int *status) +{ + int dst_exponent, srcp1; + unsigned int result = 0, srcp2; + + Dint_copyfromptr(srcptr,srcp1,srcp2); + /* + * set sign bit of result and get magnitude of source + */ + if (srcp1 < 0) { + Sgl_setone_sign(result); + Dint_negate(srcp1,srcp2); + } + else { + Sgl_setzero_sign(result); + /* Check for zero */ + if (srcp1 == 0 && srcp2 == 0) { + Sgl_setzero(result); + *dstptr = result; + return(NOEXCEPTION); + } + } + /* + * Generate exponent and normalized mantissa + */ + dst_exponent = 16; /* initialize for normalization */ + if (srcp1 == 0) { + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + */ + Find_ms_one_bit(srcp2,dst_exponent); + /* left justify source, with msb at bit position 1 */ + if (dst_exponent >= 0) { + srcp1 = srcp2 << dst_exponent; + srcp2 = 0; + } + else { + srcp1 = srcp2 >> 1; + srcp2 <<= 31; + } + /* + * since msb set is in second word, need to + * adjust bit position count + */ + dst_exponent += 32; + } + else { + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + * + */ + Find_ms_one_bit(srcp1,dst_exponent); + /* left justify source, with msb at bit position 1 */ + if (dst_exponent > 0) { + Variable_shift_double(srcp1,srcp2,(32-dst_exponent), + srcp1); + srcp2 <<= dst_exponent; + } + /* + * If dst_exponent = 0, we don't need to shift anything. + * If dst_exponent = -1, src = - 2**63 so we won't need to + * shift srcp2. + */ + else srcp1 >>= -(dst_exponent); + } + Sgl_set_mantissa(result, srcp1 >> SGL_EXP_LENGTH - 1); + Sgl_set_exponent(result, (62+SGL_BIAS) - dst_exponent); + + /* check for inexact */ + if (Dint_isinexact_to_sgl(srcp1,srcp2)) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(result)) + Sgl_increment(result); + break; + case ROUNDMINUS: + if (Sgl_isone_sign(result)) + Sgl_increment(result); + break; + case ROUNDNEAREST: + Sgl_roundnearest_from_dint(srcp1,srcp2,result); + } + if (Is_inexacttrap_enabled()) { + *dstptr = result; + return(INEXACTEXCEPTION); + } + else Set_inexactflag(); + } + *dstptr = result; + return(NOEXCEPTION); +} + +/* + * Double Fixed-point to Double Floating-point + */ + +int +dbl_to_dbl_fcnvxf( + dbl_integer *srcptr, + unsigned int *nullptr, + dbl_floating_point *dstptr, + unsigned int *status) +{ + register int srcp1, dst_exponent; + register unsigned int srcp2, resultp1 = 0, resultp2 = 0; + + Dint_copyfromptr(srcptr,srcp1,srcp2); + /* + * set sign bit of result and get magnitude of source + */ + if (srcp1 < 0) { + Dbl_setone_sign(resultp1); + Dint_negate(srcp1,srcp2); + } + else { + Dbl_setzero_sign(resultp1); + /* Check for zero */ + if (srcp1 == 0 && srcp2 ==0) { + Dbl_setzero(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + /* + * Generate exponent and normalized mantissa + */ + dst_exponent = 16; /* initialize for normalization */ + if (srcp1 == 0) { + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + */ + Find_ms_one_bit(srcp2,dst_exponent); + /* left justify source, with msb at bit position 1 */ + if (dst_exponent >= 0) { + srcp1 = srcp2 << dst_exponent; + srcp2 = 0; + } + else { + srcp1 = srcp2 >> 1; + srcp2 <<= 31; + } + /* + * since msb set is in second word, need to + * adjust bit position count + */ + dst_exponent += 32; + } + else { + /* + * Check word for most significant bit set. Returns + * a value in dst_exponent indicating the bit position, + * between -1 and 30. + */ + Find_ms_one_bit(srcp1,dst_exponent); + /* left justify source, with msb at bit position 1 */ + if (dst_exponent > 0) { + Variable_shift_double(srcp1,srcp2,(32-dst_exponent), + srcp1); + srcp2 <<= dst_exponent; + } + /* + * If dst_exponent = 0, we don't need to shift anything. + * If dst_exponent = -1, src = - 2**63 so we won't need to + * shift srcp2. + */ + else srcp1 >>= -(dst_exponent); + } + Dbl_set_mantissap1(resultp1, srcp1 >> (DBL_EXP_LENGTH-1)); + Shiftdouble(srcp1,srcp2,DBL_EXP_LENGTH-1,resultp2); + Dbl_set_exponent(resultp1, (62+DBL_BIAS) - dst_exponent); + + /* check for inexact */ + if (Dint_isinexact_to_dbl(srcp2)) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(resultp1)) { + Dbl_increment(resultp1,resultp2); + } + break; + case ROUNDMINUS: + if (Dbl_isone_sign(resultp1)) { + Dbl_increment(resultp1,resultp2); + } + break; + case ROUNDNEAREST: + Dbl_roundnearest_from_dint(srcp2,resultp1, + resultp2); + } + if (Is_inexacttrap_enabled()) { + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(INEXACTEXCEPTION); + } + else Set_inexactflag(); + } + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/float.h b/kernel/arch/parisc/math-emu/float.h new file mode 100644 index 000000000..7a51f97e7 --- /dev/null +++ b/kernel/arch/parisc/math-emu/float.h @@ -0,0 +1,581 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/float.h $Revision: 1.1 $ + * + * Purpose: + * <<please update with a synopis of the functionality provided by this file>> + * + * BE header: no + * + * Shipped: yes + * /usr/conf/pa/spmath/float.h + * + * END_DESC +*/ + +#ifdef __NO_PA_HDRS + PA header file -- do not include this header file for non-PA builds. +#endif + +#include "fpbits.h" +#include "hppa.h" +/* + * Want to pick up the FPU capability flags, not the PDC structures. + * 'LOCORE' isn't really true in this case, but we don't want the C structures + * so it suits our purposes + */ +#define LOCORE +#include "fpu.h" + +/* + * Declare the basic structures for the 3 different + * floating-point precisions. + * + * Single number + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * |s| exp | mantissa | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + */ +#define Sall(object) (object) +#define Ssign(object) Bitfield_extract( 0, 1,object) +#define Ssignedsign(object) Bitfield_signed_extract( 0, 1,object) +#define Sexponent(object) Bitfield_extract( 1, 8,object) +#define Smantissa(object) Bitfield_mask( 9, 23,object) +#define Ssignaling(object) Bitfield_extract( 9, 1,object) +#define Ssignalingnan(object) Bitfield_extract( 1, 9,object) +#define Shigh2mantissa(object) Bitfield_extract( 9, 2,object) +#define Sexponentmantissa(object) Bitfield_mask( 1, 31,object) +#define Ssignexponent(object) Bitfield_extract( 0, 9,object) +#define Shidden(object) Bitfield_extract( 8, 1,object) +#define Shiddenoverflow(object) Bitfield_extract( 7, 1,object) +#define Shiddenhigh7mantissa(object) Bitfield_extract( 8, 8,object) +#define Shiddenhigh3mantissa(object) Bitfield_extract( 8, 4,object) +#define Slow(object) Bitfield_mask( 31, 1,object) +#define Slow4(object) Bitfield_mask( 28, 4,object) +#define Slow31(object) Bitfield_mask( 1, 31,object) +#define Shigh31(object) Bitfield_extract( 0, 31,object) +#define Ssignedhigh31(object) Bitfield_signed_extract( 0, 31,object) +#define Shigh4(object) Bitfield_extract( 0, 4,object) +#define Sbit24(object) Bitfield_extract( 24, 1,object) +#define Sbit28(object) Bitfield_extract( 28, 1,object) +#define Sbit29(object) Bitfield_extract( 29, 1,object) +#define Sbit30(object) Bitfield_extract( 30, 1,object) +#define Sbit31(object) Bitfield_mask( 31, 1,object) + +#define Deposit_ssign(object,value) Bitfield_deposit(value,0,1,object) +#define Deposit_sexponent(object,value) Bitfield_deposit(value,1,8,object) +#define Deposit_smantissa(object,value) Bitfield_deposit(value,9,23,object) +#define Deposit_shigh2mantissa(object,value) Bitfield_deposit(value,9,2,object) +#define Deposit_sexponentmantissa(object,value) \ + Bitfield_deposit(value,1,31,object) +#define Deposit_ssignexponent(object,value) Bitfield_deposit(value,0,9,object) +#define Deposit_slow(object,value) Bitfield_deposit(value,31,1,object) +#define Deposit_shigh4(object,value) Bitfield_deposit(value,0,4,object) + +#define Is_ssign(object) Bitfield_mask( 0, 1,object) +#define Is_ssignaling(object) Bitfield_mask( 9, 1,object) +#define Is_shidden(object) Bitfield_mask( 8, 1,object) +#define Is_shiddenoverflow(object) Bitfield_mask( 7, 1,object) +#define Is_slow(object) Bitfield_mask( 31, 1,object) +#define Is_sbit24(object) Bitfield_mask( 24, 1,object) +#define Is_sbit28(object) Bitfield_mask( 28, 1,object) +#define Is_sbit29(object) Bitfield_mask( 29, 1,object) +#define Is_sbit30(object) Bitfield_mask( 30, 1,object) +#define Is_sbit31(object) Bitfield_mask( 31, 1,object) + +/* + * Double number. + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * |s| exponent | mantissa part 1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | mantissa part 2 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + */ +#define Dallp1(object) (object) +#define Dsign(object) Bitfield_extract( 0, 1,object) +#define Dsignedsign(object) Bitfield_signed_extract( 0, 1,object) +#define Dexponent(object) Bitfield_extract( 1, 11,object) +#define Dmantissap1(object) Bitfield_mask( 12, 20,object) +#define Dsignaling(object) Bitfield_extract( 12, 1,object) +#define Dsignalingnan(object) Bitfield_extract( 1, 12,object) +#define Dhigh2mantissa(object) Bitfield_extract( 12, 2,object) +#define Dexponentmantissap1(object) Bitfield_mask( 1, 31,object) +#define Dsignexponent(object) Bitfield_extract( 0, 12,object) +#define Dhidden(object) Bitfield_extract( 11, 1,object) +#define Dhiddenoverflow(object) Bitfield_extract( 10, 1,object) +#define Dhiddenhigh7mantissa(object) Bitfield_extract( 11, 8,object) +#define Dhiddenhigh3mantissa(object) Bitfield_extract( 11, 4,object) +#define Dlowp1(object) Bitfield_mask( 31, 1,object) +#define Dlow31p1(object) Bitfield_mask( 1, 31,object) +#define Dhighp1(object) Bitfield_extract( 0, 1,object) +#define Dhigh4p1(object) Bitfield_extract( 0, 4,object) +#define Dhigh31p1(object) Bitfield_extract( 0, 31,object) +#define Dsignedhigh31p1(object) Bitfield_signed_extract( 0, 31,object) +#define Dbit3p1(object) Bitfield_extract( 3, 1,object) + +#define Deposit_dsign(object,value) Bitfield_deposit(value,0,1,object) +#define Deposit_dexponent(object,value) Bitfield_deposit(value,1,11,object) +#define Deposit_dmantissap1(object,value) Bitfield_deposit(value,12,20,object) +#define Deposit_dhigh2mantissa(object,value) Bitfield_deposit(value,12,2,object) +#define Deposit_dexponentmantissap1(object,value) \ + Bitfield_deposit(value,1,31,object) +#define Deposit_dsignexponent(object,value) Bitfield_deposit(value,0,12,object) +#define Deposit_dlowp1(object,value) Bitfield_deposit(value,31,1,object) +#define Deposit_dhigh4p1(object,value) Bitfield_deposit(value,0,4,object) + +#define Is_dsign(object) Bitfield_mask( 0, 1,object) +#define Is_dsignaling(object) Bitfield_mask( 12, 1,object) +#define Is_dhidden(object) Bitfield_mask( 11, 1,object) +#define Is_dhiddenoverflow(object) Bitfield_mask( 10, 1,object) +#define Is_dlowp1(object) Bitfield_mask( 31, 1,object) +#define Is_dhighp1(object) Bitfield_mask( 0, 1,object) +#define Is_dbit3p1(object) Bitfield_mask( 3, 1,object) + +#define Dallp2(object) (object) +#define Dmantissap2(object) (object) +#define Dlowp2(object) Bitfield_mask( 31, 1,object) +#define Dlow4p2(object) Bitfield_mask( 28, 4,object) +#define Dlow31p2(object) Bitfield_mask( 1, 31,object) +#define Dhighp2(object) Bitfield_extract( 0, 1,object) +#define Dhigh31p2(object) Bitfield_extract( 0, 31,object) +#define Dbit2p2(object) Bitfield_extract( 2, 1,object) +#define Dbit3p2(object) Bitfield_extract( 3, 1,object) +#define Dbit21p2(object) Bitfield_extract( 21, 1,object) +#define Dbit28p2(object) Bitfield_extract( 28, 1,object) +#define Dbit29p2(object) Bitfield_extract( 29, 1,object) +#define Dbit30p2(object) Bitfield_extract( 30, 1,object) +#define Dbit31p2(object) Bitfield_mask( 31, 1,object) + +#define Deposit_dlowp2(object,value) Bitfield_deposit(value,31,1,object) + +#define Is_dlowp2(object) Bitfield_mask( 31, 1,object) +#define Is_dhighp2(object) Bitfield_mask( 0, 1,object) +#define Is_dbit2p2(object) Bitfield_mask( 2, 1,object) +#define Is_dbit3p2(object) Bitfield_mask( 3, 1,object) +#define Is_dbit21p2(object) Bitfield_mask( 21, 1,object) +#define Is_dbit28p2(object) Bitfield_mask( 28, 1,object) +#define Is_dbit29p2(object) Bitfield_mask( 29, 1,object) +#define Is_dbit30p2(object) Bitfield_mask( 30, 1,object) +#define Is_dbit31p2(object) Bitfield_mask( 31, 1,object) + +/* + * Quad number. + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * |s| exponent | mantissa part 1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | mantissa part 2 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | mantissa part 3 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | mantissa part 4 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + */ +typedef struct + { + union + { + struct { unsigned qallp1; } u_qallp1; +/* Not needed for now... + Bitfield_extract( 0, 1,u_qsign,qsign) + Bitfield_signed_extract( 0, 1,u_qsignedsign,qsignedsign) + Bitfield_extract( 1, 15,u_qexponent,qexponent) + Bitfield_extract(16, 16,u_qmantissap1,qmantissap1) + Bitfield_extract(16, 1,u_qsignaling,qsignaling) + Bitfield_extract(1, 16,u_qsignalingnan,qsignalingnan) + Bitfield_extract(16, 2,u_qhigh2mantissa,qhigh2mantissa) + Bitfield_extract( 1, 31,u_qexponentmantissap1,qexponentmantissap1) + Bitfield_extract( 0, 16,u_qsignexponent,qsignexponent) + Bitfield_extract(15, 1,u_qhidden,qhidden) + Bitfield_extract(14, 1,u_qhiddenoverflow,qhiddenoverflow) + Bitfield_extract(15, 8,u_qhiddenhigh7mantissa,qhiddenhigh7mantissa) + Bitfield_extract(15, 4,u_qhiddenhigh3mantissa,qhiddenhigh3mantissa) + Bitfield_extract(31, 1,u_qlowp1,qlowp1) + Bitfield_extract( 1, 31,u_qlow31p1,qlow31p1) + Bitfield_extract( 0, 1,u_qhighp1,qhighp1) + Bitfield_extract( 0, 4,u_qhigh4p1,qhigh4p1) + Bitfield_extract( 0, 31,u_qhigh31p1,qhigh31p1) + */ + } quad_u1; + union + { + struct { unsigned qallp2; } u_qallp2; + /* Not needed for now... + Bitfield_extract(31, 1,u_qlowp2,qlowp2) + Bitfield_extract( 1, 31,u_qlow31p2,qlow31p2) + Bitfield_extract( 0, 1,u_qhighp2,qhighp2) + Bitfield_extract( 0, 31,u_qhigh31p2,qhigh31p2) + */ + } quad_u2; + union + { + struct { unsigned qallp3; } u_qallp3; + /* Not needed for now... + Bitfield_extract(31, 1,u_qlowp3,qlowp3) + Bitfield_extract( 1, 31,u_qlow31p3,qlow31p3) + Bitfield_extract( 0, 1,u_qhighp3,qhighp3) + Bitfield_extract( 0, 31,u_qhigh31p3,qhigh31p3) + */ + } quad_u3; + union + { + struct { unsigned qallp4; } u_qallp4; + /* Not need for now... + Bitfield_extract(31, 1,u_qlowp4,qlowp4) + Bitfield_extract( 1, 31,u_qlow31p4,qlow31p4) + Bitfield_extract( 0, 1,u_qhighp4,qhighp4) + Bitfield_extract( 0, 31,u_qhigh31p4,qhigh31p4) + */ + } quad_u4; + } quad_floating_point; + +/* Extension - An additional structure to hold the guard, round and + * sticky bits during computations. + */ +#define Extall(object) (object) +#define Extsign(object) Bitfield_extract( 0, 1,object) +#define Exthigh31(object) Bitfield_extract( 0, 31,object) +#define Extlow31(object) Bitfield_extract( 1, 31,object) +#define Extlow(object) Bitfield_extract( 31, 1,object) + +/* + * Single extended - The upper word is just like single precision, + * but one additional word of mantissa is needed. + */ +#define Sextallp1(object) (object) +#define Sextallp2(object) (object) +#define Sextlowp1(object) Bitfield_extract( 31, 1,object) +#define Sexthighp2(object) Bitfield_extract( 0, 1,object) +#define Sextlow31p2(object) Bitfield_extract( 1, 31,object) +#define Sexthiddenoverflow(object) Bitfield_extract( 4, 1,object) +#define Is_sexthiddenoverflow(object) Bitfield_mask( 4, 1,object) + +/* + * Double extended - The upper two words are just like double precision, + * but two additional words of mantissa are needed. + */ +#define Dextallp1(object) (object) +#define Dextallp2(object) (object) +#define Dextallp3(object) (object) +#define Dextallp4(object) (object) +#define Dextlowp2(object) Bitfield_extract( 31, 1,object) +#define Dexthighp3(object) Bitfield_extract( 0, 1,object) +#define Dextlow31p3(object) Bitfield_extract( 1, 31,object) +#define Dexthiddenoverflow(object) Bitfield_extract( 10, 1,object) +#define Is_dexthiddenoverflow(object) Bitfield_mask( 10, 1,object) +#define Deposit_dextlowp4(object,value) Bitfield_deposit(value,31,1,object) + +/* + * Declare the basic structures for the 3 different + * fixed-point precisions. + * + * Single number + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * |s| integer | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + */ +typedef int sgl_integer; + +/* + * Double number. + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * |s| high integer | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | low integer | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + */ +struct dint { + int wd0; + unsigned int wd1; +}; + +struct dblwd { + unsigned int wd0; + unsigned int wd1; +}; + +/* + * Quad number. + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * |s| integer part1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | integer part 2 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | integer part 3 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | integer part 4 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + */ + +struct quadwd { + int wd0; + unsigned int wd1; + unsigned int wd2; + unsigned int wd3; +}; + +typedef struct quadwd quad_integer; + + +/* useful typedefs */ +typedef unsigned int sgl_floating_point; +typedef struct dblwd dbl_floating_point; +typedef struct dint dbl_integer; +typedef struct dblwd dbl_unsigned; + +/* + * Define the different precisions' parameters. + */ +#define SGL_BITLENGTH 32 +#define SGL_EMAX 127 +#define SGL_EMIN (-126) +#define SGL_BIAS 127 +#define SGL_WRAP 192 +#define SGL_INFINITY_EXPONENT (SGL_EMAX+SGL_BIAS+1) +#define SGL_THRESHOLD 32 +#define SGL_EXP_LENGTH 8 +#define SGL_P 24 + +#define DBL_BITLENGTH 64 +#define DBL_EMAX 1023 +#define DBL_EMIN (-1022) +#define DBL_BIAS 1023 +#define DBL_WRAP 1536 +#define DBL_INFINITY_EXPONENT (DBL_EMAX+DBL_BIAS+1) +#define DBL_THRESHOLD 64 +#define DBL_EXP_LENGTH 11 +#define DBL_P 53 + +#define QUAD_BITLENGTH 128 +#define QUAD_EMAX 16383 +#define QUAD_EMIN (-16382) +#define QUAD_BIAS 16383 +#define QUAD_WRAP 24576 +#define QUAD_INFINITY_EXPONENT (QUAD_EMAX+QUAD_BIAS+1) +#define QUAD_P 113 + +/* Boolean Values etc. */ +#define FALSE 0 +#define TRUE (!FALSE) +#define NOT ! +#define XOR ^ + +/* other constants */ +#undef NULL +#define NULL 0 +#define NIL 0 +#define SGL 0 +#define DBL 1 +#define BADFMT 2 +#define QUAD 3 + + +/* Types */ +typedef int boolean; +typedef int FORMAT; +typedef int VOID; + + +/* Declare status register equivalent to FPUs architecture. + * + * 0 1 2 3 4 5 6 7 8 910 1 2 3 4 5 6 7 8 920 1 2 3 4 5 6 7 8 930 1 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * |V|Z|O|U|I|C| rsv | model | version |RM |rsv|T|r|V|Z|O|U|I| + * +-------+-------+-------+-------+-------+-------+-------+-------+ + */ +#define Cbit(object) Bitfield_extract( 5, 1,object) +#define Tbit(object) Bitfield_extract( 25, 1,object) +#define Roundingmode(object) Bitfield_extract( 21, 2,object) +#define Invalidtrap(object) Bitfield_extract( 27, 1,object) +#define Divisionbyzerotrap(object) Bitfield_extract( 28, 1,object) +#define Overflowtrap(object) Bitfield_extract( 29, 1,object) +#define Underflowtrap(object) Bitfield_extract( 30, 1,object) +#define Inexacttrap(object) Bitfield_extract( 31, 1,object) +#define Invalidflag(object) Bitfield_extract( 0, 1,object) +#define Divisionbyzeroflag(object) Bitfield_extract( 1, 1,object) +#define Overflowflag(object) Bitfield_extract( 2, 1,object) +#define Underflowflag(object) Bitfield_extract( 3, 1,object) +#define Inexactflag(object) Bitfield_extract( 4, 1,object) +#define Allflags(object) Bitfield_extract( 0, 5,object) + +/* Definitions relevant to the status register */ + +/* Rounding Modes */ +#define ROUNDNEAREST 0 +#define ROUNDZERO 1 +#define ROUNDPLUS 2 +#define ROUNDMINUS 3 + +/* Exceptions */ +#define NOEXCEPTION 0x0 +#define INVALIDEXCEPTION 0x20 +#define DIVISIONBYZEROEXCEPTION 0x10 +#define OVERFLOWEXCEPTION 0x08 +#define UNDERFLOWEXCEPTION 0x04 +#define INEXACTEXCEPTION 0x02 +#define UNIMPLEMENTEDEXCEPTION 0x01 + +/* New exceptions for the 2E Opcode */ +#define OPC_2E_INVALIDEXCEPTION 0x30 +#define OPC_2E_OVERFLOWEXCEPTION 0x18 +#define OPC_2E_UNDERFLOWEXCEPTION 0x0c +#define OPC_2E_INEXACTEXCEPTION 0x12 + +/* Declare exception registers equivalent to FPUs architecture + * + * 0 1 2 3 4 5 6 7 8 910 1 2 3 4 5 6 7 8 920 1 2 3 4 5 6 7 8 930 1 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * |excepttype | r1 | r2/ext | operation |parm |n| t/cond | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + */ +#define Allexception(object) (object) +#define Exceptiontype(object) Bitfield_extract( 0, 6,object) +#define Instructionfield(object) Bitfield_mask( 6,26,object) +#define Parmfield(object) Bitfield_extract( 23, 3,object) +#define Rabit(object) Bitfield_extract( 24, 1,object) +#define Ibit(object) Bitfield_extract( 25, 1,object) + +#define Set_exceptiontype(object,value) Bitfield_deposit(value, 0, 6,object) +#define Set_parmfield(object,value) Bitfield_deposit(value, 23, 3,object) +#define Set_exceptiontype_and_instr_field(exception,instruction,object) \ + object = exception << 26 | instruction + +/* Declare the condition field + * + * 0 1 2 3 4 5 6 7 8 910 1 2 3 4 5 6 7 8 920 1 2 3 4 5 6 7 8 930 1 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | |G|L|E|U|X| + * +-------+-------+-------+-------+-------+-------+-------+-------+ + */ +#define Greaterthanbit(object) Bitfield_extract( 27, 1,object) +#define Lessthanbit(object) Bitfield_extract( 28, 1,object) +#define Equalbit(object) Bitfield_extract( 29, 1,object) +#define Unorderedbit(object) Bitfield_extract( 30, 1,object) +#define Exceptionbit(object) Bitfield_extract( 31, 1,object) + +/* An alias name for the status register */ +#define Fpustatus_register (*status) + +/************************************************** + * Status register referencing and manipulation. * + **************************************************/ + +/* Rounding mode */ +#define Rounding_mode() Roundingmode(Fpustatus_register) +#define Is_rounding_mode(rmode) \ + (Roundingmode(Fpustatus_register) == rmode) +#define Set_rounding_mode(value) \ + Bitfield_deposit(value,21,2,Fpustatus_register) + +/* Boolean testing of the trap enable bits */ +#define Is_invalidtrap_enabled() Invalidtrap(Fpustatus_register) +#define Is_divisionbyzerotrap_enabled() Divisionbyzerotrap(Fpustatus_register) +#define Is_overflowtrap_enabled() Overflowtrap(Fpustatus_register) +#define Is_underflowtrap_enabled() Underflowtrap(Fpustatus_register) +#define Is_inexacttrap_enabled() Inexacttrap(Fpustatus_register) + +/* Set the indicated flags in the status register */ +#define Set_invalidflag() Bitfield_deposit(1,0,1,Fpustatus_register) +#define Set_divisionbyzeroflag() Bitfield_deposit(1,1,1,Fpustatus_register) +#define Set_overflowflag() Bitfield_deposit(1,2,1,Fpustatus_register) +#define Set_underflowflag() Bitfield_deposit(1,3,1,Fpustatus_register) +#define Set_inexactflag() Bitfield_deposit(1,4,1,Fpustatus_register) + +#define Clear_all_flags() Bitfield_deposit(0,0,5,Fpustatus_register) + +/* Manipulate the trap and condition code bits (tbit and cbit) */ +#define Set_tbit() Bitfield_deposit(1,25,1,Fpustatus_register) +#define Clear_tbit() Bitfield_deposit(0,25,1,Fpustatus_register) +#define Is_tbit_set() Tbit(Fpustatus_register) +#define Is_cbit_set() Cbit(Fpustatus_register) + +#define Set_status_cbit(value) \ + Bitfield_deposit(value,5,1,Fpustatus_register) + +/******************************* + * Condition field referencing * + *******************************/ +#define Unordered(cond) Unorderedbit(cond) +#define Equal(cond) Equalbit(cond) +#define Lessthan(cond) Lessthanbit(cond) +#define Greaterthan(cond) Greaterthanbit(cond) +#define Exception(cond) Exceptionbit(cond) + + +/* Defines for the extension */ +#define Ext_isone_sign(extent) (Extsign(extent)) +#define Ext_isnotzero(extent) \ + (Extall(extent)) +#define Ext_isnotzero_lower(extent) \ + (Extlow31(extent)) +#define Ext_leftshiftby1(extent) \ + Extall(extent) <<= 1 +#define Ext_negate(extent) \ + (int )Extall(extent) = 0 - (int )Extall(extent) +#define Ext_setone_low(extent) Bitfield_deposit(1,31,1,extent) +#define Ext_setzero(extent) Extall(extent) = 0 + +typedef int operation; + +/* error messages */ + +#define NONE 0 +#define UNDEFFPINST 1 + +/* Function definitions: opcode, opclass */ +#define FTEST (1<<2) | 0 +#define FCPY (2<<2) | 0 +#define FABS (3<<2) | 0 +#define FSQRT (4<<2) | 0 +#define FRND (5<<2) | 0 + +#define FCNVFF (0<<2) | 1 +#define FCNVXF (1<<2) | 1 +#define FCNVFX (2<<2) | 1 +#define FCNVFXT (3<<2) | 1 + +#define FCMP (0<<2) | 2 + +#define FADD (0<<2) | 3 +#define FSUB (1<<2) | 3 +#define FMPY (2<<2) | 3 +#define FDIV (3<<2) | 3 +#define FREM (4<<2) | 3 + diff --git a/kernel/arch/parisc/math-emu/fmpyfadd.c b/kernel/arch/parisc/math-emu/fmpyfadd.c new file mode 100644 index 000000000..b067c45c8 --- /dev/null +++ b/kernel/arch/parisc/math-emu/fmpyfadd.c @@ -0,0 +1,2655 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $ + * + * Purpose: + * Double Floating-point Multiply Fused Add + * Double Floating-point Multiply Negate Fused Add + * Single Floating-point Multiply Fused Add + * Single Floating-point Multiply Negate Fused Add + * + * External Interfaces: + * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) + * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) + * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) + * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" +#include "dbl_float.h" + + +/* + * Double Floating-point Multiply Fused Add + */ + +int +dbl_fmpyfadd( + dbl_floating_point *src1ptr, + dbl_floating_point *src2ptr, + dbl_floating_point *src3ptr, + unsigned int *status, + dbl_floating_point *dstptr) +{ + unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2; + register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4; + unsigned int rightp1, rightp2, rightp3, rightp4; + unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0; + register int mpy_exponent, add_exponent, count; + boolean inexact = FALSE, is_tiny = FALSE; + + unsigned int signlessleft1, signlessright1, save; + register int result_exponent, diff_exponent; + int sign_save, jumpsize; + + Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2); + Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2); + Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2); + + /* + * set sign bit of result of multiply + */ + if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) + Dbl_setnegativezerop1(resultp1); + else Dbl_setzerop1(resultp1); + + /* + * Generate multiply exponent + */ + mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS; + + /* + * check first operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(opnd1p1)) { + if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { + if (Dbl_isnotnan(opnd2p1,opnd2p2) && + Dbl_isnotnan(opnd3p1,opnd3p2)) { + if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { + /* + * invalid since operands are infinity + * and zero + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * Check third operand for infinity with a + * sign opposite of the multiply result + */ + if (Dbl_isinfinity(opnd3p1,opnd3p2) && + (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { + /* + * invalid since attempting a magnitude + * subtraction of infinities + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + + /* + * return infinity + */ + Dbl_setinfinity_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd1p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd1p1); + } + /* + * is second operand a signaling NaN? + */ + else if (Dbl_is_signalingnan(opnd2p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd2p1); + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + /* + * is third operand a signaling NaN? + */ + else if (Dbl_is_signalingnan(opnd3p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd3p1); + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); + return(NOEXCEPTION); + } + } + + /* + * check second operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(opnd2p1)) { + if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { + if (Dbl_isnotnan(opnd3p1,opnd3p2)) { + if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) { + /* + * invalid since multiply operands are + * zero & infinity + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(opnd2p1,opnd2p2); + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + + /* + * Check third operand for infinity with a + * sign opposite of the multiply result + */ + if (Dbl_isinfinity(opnd3p1,opnd3p2) && + (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { + /* + * invalid since attempting a magnitude + * subtraction of infinities + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + + /* + * return infinity + */ + Dbl_setinfinity_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd2p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd2p1); + } + /* + * is third operand a signaling NaN? + */ + else if (Dbl_is_signalingnan(opnd3p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd3p1); + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + } + + /* + * check third operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(opnd3p1)) { + if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { + /* return infinity */ + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } else { + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd3p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd3p1); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } + } + + /* + * Generate multiply mantissa + */ + if (Dbl_isnotzero_exponent(opnd1p1)) { + /* set hidden bit */ + Dbl_clear_signexponent_set_hidden(opnd1p1); + } + else { + /* check for zero */ + if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { + /* + * Perform the add opnd3 with zero here. + */ + if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { + if (Is_rounding_mode(ROUNDMINUS)) { + Dbl_or_signs(opnd3p1,resultp1); + } else { + Dbl_and_signs(opnd3p1,resultp1); + } + } + /* + * Now let's check for trapped underflow case. + */ + else if (Dbl_iszero_exponent(opnd3p1) && + Is_underflowtrap_enabled()) { + /* need to normalize results mantissa */ + sign_save = Dbl_signextendedsign(opnd3p1); + result_exponent = 0; + Dbl_leftshiftby1(opnd3p1,opnd3p2); + Dbl_normalize(opnd3p1,opnd3p2,result_exponent); + Dbl_set_sign(opnd3p1,/*using*/sign_save); + Dbl_setwrapped_exponent(opnd3p1,result_exponent, + unfl); + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + /* inexact = FALSE */ + return(OPC_2E_UNDERFLOWEXCEPTION); + } + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } + /* is denormalized, adjust exponent */ + Dbl_clear_signexponent(opnd1p1); + Dbl_leftshiftby1(opnd1p1,opnd1p2); + Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent); + } + /* opnd2 needs to have hidden bit set with msb in hidden bit */ + if (Dbl_isnotzero_exponent(opnd2p1)) { + Dbl_clear_signexponent_set_hidden(opnd2p1); + } + else { + /* check for zero */ + if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { + /* + * Perform the add opnd3 with zero here. + */ + if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { + if (Is_rounding_mode(ROUNDMINUS)) { + Dbl_or_signs(opnd3p1,resultp1); + } else { + Dbl_and_signs(opnd3p1,resultp1); + } + } + /* + * Now let's check for trapped underflow case. + */ + else if (Dbl_iszero_exponent(opnd3p1) && + Is_underflowtrap_enabled()) { + /* need to normalize results mantissa */ + sign_save = Dbl_signextendedsign(opnd3p1); + result_exponent = 0; + Dbl_leftshiftby1(opnd3p1,opnd3p2); + Dbl_normalize(opnd3p1,opnd3p2,result_exponent); + Dbl_set_sign(opnd3p1,/*using*/sign_save); + Dbl_setwrapped_exponent(opnd3p1,result_exponent, + unfl); + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + /* inexact = FALSE */ + return(OPC_2E_UNDERFLOWEXCEPTION); + } + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } + /* is denormalized; want to normalize */ + Dbl_clear_signexponent(opnd2p1); + Dbl_leftshiftby1(opnd2p1,opnd2p2); + Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent); + } + + /* Multiply the first two source mantissas together */ + + /* + * The intermediate result will be kept in tmpres, + * which needs enough room for 106 bits of mantissa, + * so lets call it a Double extended. + */ + Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4); + + /* + * Four bits at a time are inspected in each loop, and a + * simple shift and add multiply algorithm is used. + */ + for (count = DBL_P-1; count >= 0; count -= 4) { + Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4); + if (Dbit28p2(opnd1p2)) { + /* Fourword_add should be an ADD followed by 3 ADDC's */ + Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, + opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0); + } + if (Dbit29p2(opnd1p2)) { + Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, + opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0); + } + if (Dbit30p2(opnd1p2)) { + Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, + opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0); + } + if (Dbit31p2(opnd1p2)) { + Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, + opnd2p1, opnd2p2, 0, 0); + } + Dbl_rightshiftby4(opnd1p1,opnd1p2); + } + if (Is_dexthiddenoverflow(tmpresp1)) { + /* result mantissa >= 2 (mantissa overflow) */ + mpy_exponent++; + Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4); + } + + /* + * Restore the sign of the mpy result which was saved in resultp1. + * The exponent will continue to be kept in mpy_exponent. + */ + Dblext_set_sign(tmpresp1,Dbl_sign(resultp1)); + + /* + * No rounding is required, since the result of the multiply + * is exact in the extended format. + */ + + /* + * Now we are ready to perform the add portion of the operation. + * + * The exponents need to be kept as integers for now, since the + * multiply result might not fit into the exponent field. We + * can't overflow or underflow because of this yet, since the + * add could bring the final result back into range. + */ + add_exponent = Dbl_exponent(opnd3p1); + + /* + * Check for denormalized or zero add operand. + */ + if (add_exponent == 0) { + /* check for zero */ + if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { + /* right is zero */ + /* Left can't be zero and must be result. + * + * The final result is now in tmpres and mpy_exponent, + * and needs to be rounded and squeezed back into + * double precision format from double extended. + */ + result_exponent = mpy_exponent; + Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4, + resultp1,resultp2,resultp3,resultp4); + sign_save = Dbl_signextendedsign(resultp1);/*save sign*/ + goto round; + } + + /* + * Neither are zeroes. + * Adjust exponent and normalize add operand. + */ + sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */ + Dbl_clear_signexponent(opnd3p1); + Dbl_leftshiftby1(opnd3p1,opnd3p2); + Dbl_normalize(opnd3p1,opnd3p2,add_exponent); + Dbl_set_sign(opnd3p1,sign_save); /* restore sign */ + } else { + Dbl_clear_exponent_set_hidden(opnd3p1); + } + /* + * Copy opnd3 to the double extended variable called right. + */ + Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4); + + /* + * A zero "save" helps discover equal operands (for later), + * and is used in swapping operands (if needed). + */ + Dblext_xortointp1(tmpresp1,rightp1,/*to*/save); + + /* + * Compare magnitude of operands. + */ + Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1); + Dblext_copytoint_exponentmantissap1(rightp1,signlessright1); + if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && + Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){ + /* + * Set the left operand to the larger one by XOR swap. + * First finish the first word "save". + */ + Dblext_xorfromintp1(save,rightp1,/*to*/rightp1); + Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); + Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4, + rightp2,rightp3,rightp4); + /* also setup exponents used in rest of routine */ + diff_exponent = add_exponent - mpy_exponent; + result_exponent = add_exponent; + } else { + /* also setup exponents used in rest of routine */ + diff_exponent = mpy_exponent - add_exponent; + result_exponent = mpy_exponent; + } + /* Invariant: left is not smaller than right. */ + + /* + * Special case alignment of operands that would force alignment + * beyond the extent of the extension. A further optimization + * could special case this but only reduces the path length for + * this infrequent case. + */ + if (diff_exponent > DBLEXT_THRESHOLD) { + diff_exponent = DBLEXT_THRESHOLD; + } + + /* Align right operand by shifting it to the right */ + Dblext_clear_sign(rightp1); + Dblext_right_align(rightp1,rightp2,rightp3,rightp4, + /*shifted by*/diff_exponent); + + /* Treat sum and difference of the operands separately. */ + if ((int)save < 0) { + /* + * Difference of the two operands. Overflow can occur if the + * multiply overflowed. A borrow can occur out of the hidden + * bit and force a post normalization phase. + */ + Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4, + rightp1,rightp2,rightp3,rightp4, + resultp1,resultp2,resultp3,resultp4); + sign_save = Dbl_signextendedsign(resultp1); + if (Dbl_iszero_hidden(resultp1)) { + /* Handle normalization */ + /* A straightforward algorithm would now shift the + * result and extension left until the hidden bit + * becomes one. Not all of the extension bits need + * participate in the shift. Only the two most + * significant bits (round and guard) are needed. + * If only a single shift is needed then the guard + * bit becomes a significant low order bit and the + * extension must participate in the rounding. + * If more than a single shift is needed, then all + * bits to the right of the guard bit are zeros, + * and the guard bit may or may not be zero. */ + Dblext_leftshiftby1(resultp1,resultp2,resultp3, + resultp4); + + /* Need to check for a zero result. The sign and + * exponent fields have already been zeroed. The more + * efficient test of the full object can be used. + */ + if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){ + /* Must have been "x-x" or "x+(-x)". */ + if (Is_rounding_mode(ROUNDMINUS)) + Dbl_setone_sign(resultp1); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + result_exponent--; + + /* Look to see if normalization is finished. */ + if (Dbl_isone_hidden(resultp1)) { + /* No further normalization is needed */ + goto round; + } + + /* Discover first one bit to determine shift amount. + * Use a modified binary search. We have already + * shifted the result one position right and still + * not found a one so the remainder of the extension + * must be zero and simplifies rounding. */ + /* Scan bytes */ + while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) { + Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4); + result_exponent -= 8; + } + /* Now narrow it down to the nibble */ + if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) { + /* The lower nibble contains the + * normalizing one */ + Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4); + result_exponent -= 4; + } + /* Select case where first bit is set (already + * normalized) otherwise select the proper shift. */ + jumpsize = Dbl_hiddenhigh3mantissa(resultp1); + if (jumpsize <= 7) switch(jumpsize) { + case 1: + Dblext_leftshiftby3(resultp1,resultp2,resultp3, + resultp4); + result_exponent -= 3; + break; + case 2: + case 3: + Dblext_leftshiftby2(resultp1,resultp2,resultp3, + resultp4); + result_exponent -= 2; + break; + case 4: + case 5: + case 6: + case 7: + Dblext_leftshiftby1(resultp1,resultp2,resultp3, + resultp4); + result_exponent -= 1; + break; + } + } /* end if (hidden...)... */ + /* Fall through and round */ + } /* end if (save < 0)... */ + else { + /* Add magnitudes */ + Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4, + rightp1,rightp2,rightp3,rightp4, + /*to*/resultp1,resultp2,resultp3,resultp4); + sign_save = Dbl_signextendedsign(resultp1); + if (Dbl_isone_hiddenoverflow(resultp1)) { + /* Prenormalization required. */ + Dblext_arithrightshiftby1(resultp1,resultp2,resultp3, + resultp4); + result_exponent++; + } /* end if hiddenoverflow... */ + } /* end else ...add magnitudes... */ + + /* Round the result. If the extension and lower two words are + * all zeros, then the result is exact. Otherwise round in the + * correct direction. Underflow is possible. If a postnormalization + * is necessary, then the mantissa is all zeros so no shift is needed. + */ + round: + if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { + Dblext_denormalize(resultp1,resultp2,resultp3,resultp4, + result_exponent,is_tiny); + } + Dbl_set_sign(resultp1,/*using*/sign_save); + if (Dblext_isnotzero_mantissap3(resultp3) || + Dblext_isnotzero_mantissap4(resultp4)) { + inexact = TRUE; + switch(Rounding_mode()) { + case ROUNDNEAREST: /* The default. */ + if (Dblext_isone_highp3(resultp3)) { + /* at least 1/2 ulp */ + if (Dblext_isnotzero_low31p3(resultp3) || + Dblext_isnotzero_mantissap4(resultp4) || + Dblext_isone_lowp2(resultp2)) { + /* either exactly half way and odd or + * more than 1/2ulp */ + Dbl_increment(resultp1,resultp2); + } + } + break; + + case ROUNDPLUS: + if (Dbl_iszero_sign(resultp1)) { + /* Round up positive results */ + Dbl_increment(resultp1,resultp2); + } + break; + + case ROUNDMINUS: + if (Dbl_isone_sign(resultp1)) { + /* Round down negative results */ + Dbl_increment(resultp1,resultp2); + } + + case ROUNDZERO:; + /* truncate is simple */ + } /* end switch... */ + if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++; + } + if (result_exponent >= DBL_INFINITY_EXPONENT) { + /* trap if OVERFLOWTRAP enabled */ + if (Is_overflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return (OPC_2E_OVERFLOWEXCEPTION | + OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return (OPC_2E_OVERFLOWEXCEPTION); + } + inexact = TRUE; + Set_overflowflag(); + /* set result to infinity or largest number */ + Dbl_setoverflow(resultp1,resultp2); + + } else if (result_exponent <= 0) { /* underflow case */ + if (Is_underflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return (OPC_2E_UNDERFLOWEXCEPTION | + OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return(OPC_2E_UNDERFLOWEXCEPTION); + } + else if (inexact && is_tiny) Set_underflowflag(); + } + else Dbl_set_exponent(resultp1,result_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return(NOEXCEPTION); +} + +/* + * Double Floating-point Multiply Negate Fused Add + */ + +dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) + +dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; +unsigned int *status; +{ + unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2; + register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4; + unsigned int rightp1, rightp2, rightp3, rightp4; + unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0; + register int mpy_exponent, add_exponent, count; + boolean inexact = FALSE, is_tiny = FALSE; + + unsigned int signlessleft1, signlessright1, save; + register int result_exponent, diff_exponent; + int sign_save, jumpsize; + + Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2); + Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2); + Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2); + + /* + * set sign bit of result of multiply + */ + if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) + Dbl_setzerop1(resultp1); + else + Dbl_setnegativezerop1(resultp1); + + /* + * Generate multiply exponent + */ + mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS; + + /* + * check first operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(opnd1p1)) { + if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { + if (Dbl_isnotnan(opnd2p1,opnd2p2) && + Dbl_isnotnan(opnd3p1,opnd3p2)) { + if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { + /* + * invalid since operands are infinity + * and zero + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + /* + * Check third operand for infinity with a + * sign opposite of the multiply result + */ + if (Dbl_isinfinity(opnd3p1,opnd3p2) && + (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { + /* + * invalid since attempting a magnitude + * subtraction of infinities + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + + /* + * return infinity + */ + Dbl_setinfinity_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd1p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd1p1); + } + /* + * is second operand a signaling NaN? + */ + else if (Dbl_is_signalingnan(opnd2p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd2p1); + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + /* + * is third operand a signaling NaN? + */ + else if (Dbl_is_signalingnan(opnd3p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd3p1); + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); + return(NOEXCEPTION); + } + } + + /* + * check second operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(opnd2p1)) { + if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { + if (Dbl_isnotnan(opnd3p1,opnd3p2)) { + if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) { + /* + * invalid since multiply operands are + * zero & infinity + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(opnd2p1,opnd2p2); + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + + /* + * Check third operand for infinity with a + * sign opposite of the multiply result + */ + if (Dbl_isinfinity(opnd3p1,opnd3p2) && + (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { + /* + * invalid since attempting a magnitude + * subtraction of infinities + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Dbl_makequietnan(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + + /* + * return infinity + */ + Dbl_setinfinity_exponentmantissa(resultp1,resultp2); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd2p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd2p1); + } + /* + * is third operand a signaling NaN? + */ + else if (Dbl_is_signalingnan(opnd3p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd3p1); + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); + return(NOEXCEPTION); + } + } + + /* + * check third operand for NaN's or infinity + */ + if (Dbl_isinfinity_exponent(opnd3p1)) { + if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { + /* return infinity */ + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } else { + /* + * is NaN; signaling or quiet? + */ + if (Dbl_isone_signaling(opnd3p1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(opnd3p1); + } + /* + * return quiet NaN + */ + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } + } + + /* + * Generate multiply mantissa + */ + if (Dbl_isnotzero_exponent(opnd1p1)) { + /* set hidden bit */ + Dbl_clear_signexponent_set_hidden(opnd1p1); + } + else { + /* check for zero */ + if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { + /* + * Perform the add opnd3 with zero here. + */ + if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { + if (Is_rounding_mode(ROUNDMINUS)) { + Dbl_or_signs(opnd3p1,resultp1); + } else { + Dbl_and_signs(opnd3p1,resultp1); + } + } + /* + * Now let's check for trapped underflow case. + */ + else if (Dbl_iszero_exponent(opnd3p1) && + Is_underflowtrap_enabled()) { + /* need to normalize results mantissa */ + sign_save = Dbl_signextendedsign(opnd3p1); + result_exponent = 0; + Dbl_leftshiftby1(opnd3p1,opnd3p2); + Dbl_normalize(opnd3p1,opnd3p2,result_exponent); + Dbl_set_sign(opnd3p1,/*using*/sign_save); + Dbl_setwrapped_exponent(opnd3p1,result_exponent, + unfl); + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + /* inexact = FALSE */ + return(OPC_2E_UNDERFLOWEXCEPTION); + } + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } + /* is denormalized, adjust exponent */ + Dbl_clear_signexponent(opnd1p1); + Dbl_leftshiftby1(opnd1p1,opnd1p2); + Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent); + } + /* opnd2 needs to have hidden bit set with msb in hidden bit */ + if (Dbl_isnotzero_exponent(opnd2p1)) { + Dbl_clear_signexponent_set_hidden(opnd2p1); + } + else { + /* check for zero */ + if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { + /* + * Perform the add opnd3 with zero here. + */ + if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { + if (Is_rounding_mode(ROUNDMINUS)) { + Dbl_or_signs(opnd3p1,resultp1); + } else { + Dbl_and_signs(opnd3p1,resultp1); + } + } + /* + * Now let's check for trapped underflow case. + */ + else if (Dbl_iszero_exponent(opnd3p1) && + Is_underflowtrap_enabled()) { + /* need to normalize results mantissa */ + sign_save = Dbl_signextendedsign(opnd3p1); + result_exponent = 0; + Dbl_leftshiftby1(opnd3p1,opnd3p2); + Dbl_normalize(opnd3p1,opnd3p2,result_exponent); + Dbl_set_sign(opnd3p1,/*using*/sign_save); + Dbl_setwrapped_exponent(opnd3p1,result_exponent, + unfl); + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + /* inexact = FALSE */ + return(OPC_2E_UNDERFLOWEXCEPTION); + } + Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); + return(NOEXCEPTION); + } + /* is denormalized; want to normalize */ + Dbl_clear_signexponent(opnd2p1); + Dbl_leftshiftby1(opnd2p1,opnd2p2); + Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent); + } + + /* Multiply the first two source mantissas together */ + + /* + * The intermediate result will be kept in tmpres, + * which needs enough room for 106 bits of mantissa, + * so lets call it a Double extended. + */ + Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4); + + /* + * Four bits at a time are inspected in each loop, and a + * simple shift and add multiply algorithm is used. + */ + for (count = DBL_P-1; count >= 0; count -= 4) { + Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4); + if (Dbit28p2(opnd1p2)) { + /* Fourword_add should be an ADD followed by 3 ADDC's */ + Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, + opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0); + } + if (Dbit29p2(opnd1p2)) { + Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, + opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0); + } + if (Dbit30p2(opnd1p2)) { + Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, + opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0); + } + if (Dbit31p2(opnd1p2)) { + Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, + opnd2p1, opnd2p2, 0, 0); + } + Dbl_rightshiftby4(opnd1p1,opnd1p2); + } + if (Is_dexthiddenoverflow(tmpresp1)) { + /* result mantissa >= 2 (mantissa overflow) */ + mpy_exponent++; + Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4); + } + + /* + * Restore the sign of the mpy result which was saved in resultp1. + * The exponent will continue to be kept in mpy_exponent. + */ + Dblext_set_sign(tmpresp1,Dbl_sign(resultp1)); + + /* + * No rounding is required, since the result of the multiply + * is exact in the extended format. + */ + + /* + * Now we are ready to perform the add portion of the operation. + * + * The exponents need to be kept as integers for now, since the + * multiply result might not fit into the exponent field. We + * can't overflow or underflow because of this yet, since the + * add could bring the final result back into range. + */ + add_exponent = Dbl_exponent(opnd3p1); + + /* + * Check for denormalized or zero add operand. + */ + if (add_exponent == 0) { + /* check for zero */ + if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { + /* right is zero */ + /* Left can't be zero and must be result. + * + * The final result is now in tmpres and mpy_exponent, + * and needs to be rounded and squeezed back into + * double precision format from double extended. + */ + result_exponent = mpy_exponent; + Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4, + resultp1,resultp2,resultp3,resultp4); + sign_save = Dbl_signextendedsign(resultp1);/*save sign*/ + goto round; + } + + /* + * Neither are zeroes. + * Adjust exponent and normalize add operand. + */ + sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */ + Dbl_clear_signexponent(opnd3p1); + Dbl_leftshiftby1(opnd3p1,opnd3p2); + Dbl_normalize(opnd3p1,opnd3p2,add_exponent); + Dbl_set_sign(opnd3p1,sign_save); /* restore sign */ + } else { + Dbl_clear_exponent_set_hidden(opnd3p1); + } + /* + * Copy opnd3 to the double extended variable called right. + */ + Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4); + + /* + * A zero "save" helps discover equal operands (for later), + * and is used in swapping operands (if needed). + */ + Dblext_xortointp1(tmpresp1,rightp1,/*to*/save); + + /* + * Compare magnitude of operands. + */ + Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1); + Dblext_copytoint_exponentmantissap1(rightp1,signlessright1); + if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && + Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){ + /* + * Set the left operand to the larger one by XOR swap. + * First finish the first word "save". + */ + Dblext_xorfromintp1(save,rightp1,/*to*/rightp1); + Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); + Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4, + rightp2,rightp3,rightp4); + /* also setup exponents used in rest of routine */ + diff_exponent = add_exponent - mpy_exponent; + result_exponent = add_exponent; + } else { + /* also setup exponents used in rest of routine */ + diff_exponent = mpy_exponent - add_exponent; + result_exponent = mpy_exponent; + } + /* Invariant: left is not smaller than right. */ + + /* + * Special case alignment of operands that would force alignment + * beyond the extent of the extension. A further optimization + * could special case this but only reduces the path length for + * this infrequent case. + */ + if (diff_exponent > DBLEXT_THRESHOLD) { + diff_exponent = DBLEXT_THRESHOLD; + } + + /* Align right operand by shifting it to the right */ + Dblext_clear_sign(rightp1); + Dblext_right_align(rightp1,rightp2,rightp3,rightp4, + /*shifted by*/diff_exponent); + + /* Treat sum and difference of the operands separately. */ + if ((int)save < 0) { + /* + * Difference of the two operands. Overflow can occur if the + * multiply overflowed. A borrow can occur out of the hidden + * bit and force a post normalization phase. + */ + Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4, + rightp1,rightp2,rightp3,rightp4, + resultp1,resultp2,resultp3,resultp4); + sign_save = Dbl_signextendedsign(resultp1); + if (Dbl_iszero_hidden(resultp1)) { + /* Handle normalization */ + /* A straightforward algorithm would now shift the + * result and extension left until the hidden bit + * becomes one. Not all of the extension bits need + * participate in the shift. Only the two most + * significant bits (round and guard) are needed. + * If only a single shift is needed then the guard + * bit becomes a significant low order bit and the + * extension must participate in the rounding. + * If more than a single shift is needed, then all + * bits to the right of the guard bit are zeros, + * and the guard bit may or may not be zero. */ + Dblext_leftshiftby1(resultp1,resultp2,resultp3, + resultp4); + + /* Need to check for a zero result. The sign and + * exponent fields have already been zeroed. The more + * efficient test of the full object can be used. + */ + if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) { + /* Must have been "x-x" or "x+(-x)". */ + if (Is_rounding_mode(ROUNDMINUS)) + Dbl_setone_sign(resultp1); + Dbl_copytoptr(resultp1,resultp2,dstptr); + return(NOEXCEPTION); + } + result_exponent--; + + /* Look to see if normalization is finished. */ + if (Dbl_isone_hidden(resultp1)) { + /* No further normalization is needed */ + goto round; + } + + /* Discover first one bit to determine shift amount. + * Use a modified binary search. We have already + * shifted the result one position right and still + * not found a one so the remainder of the extension + * must be zero and simplifies rounding. */ + /* Scan bytes */ + while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) { + Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4); + result_exponent -= 8; + } + /* Now narrow it down to the nibble */ + if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) { + /* The lower nibble contains the + * normalizing one */ + Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4); + result_exponent -= 4; + } + /* Select case where first bit is set (already + * normalized) otherwise select the proper shift. */ + jumpsize = Dbl_hiddenhigh3mantissa(resultp1); + if (jumpsize <= 7) switch(jumpsize) { + case 1: + Dblext_leftshiftby3(resultp1,resultp2,resultp3, + resultp4); + result_exponent -= 3; + break; + case 2: + case 3: + Dblext_leftshiftby2(resultp1,resultp2,resultp3, + resultp4); + result_exponent -= 2; + break; + case 4: + case 5: + case 6: + case 7: + Dblext_leftshiftby1(resultp1,resultp2,resultp3, + resultp4); + result_exponent -= 1; + break; + } + } /* end if (hidden...)... */ + /* Fall through and round */ + } /* end if (save < 0)... */ + else { + /* Add magnitudes */ + Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4, + rightp1,rightp2,rightp3,rightp4, + /*to*/resultp1,resultp2,resultp3,resultp4); + sign_save = Dbl_signextendedsign(resultp1); + if (Dbl_isone_hiddenoverflow(resultp1)) { + /* Prenormalization required. */ + Dblext_arithrightshiftby1(resultp1,resultp2,resultp3, + resultp4); + result_exponent++; + } /* end if hiddenoverflow... */ + } /* end else ...add magnitudes... */ + + /* Round the result. If the extension and lower two words are + * all zeros, then the result is exact. Otherwise round in the + * correct direction. Underflow is possible. If a postnormalization + * is necessary, then the mantissa is all zeros so no shift is needed. + */ + round: + if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { + Dblext_denormalize(resultp1,resultp2,resultp3,resultp4, + result_exponent,is_tiny); + } + Dbl_set_sign(resultp1,/*using*/sign_save); + if (Dblext_isnotzero_mantissap3(resultp3) || + Dblext_isnotzero_mantissap4(resultp4)) { + inexact = TRUE; + switch(Rounding_mode()) { + case ROUNDNEAREST: /* The default. */ + if (Dblext_isone_highp3(resultp3)) { + /* at least 1/2 ulp */ + if (Dblext_isnotzero_low31p3(resultp3) || + Dblext_isnotzero_mantissap4(resultp4) || + Dblext_isone_lowp2(resultp2)) { + /* either exactly half way and odd or + * more than 1/2ulp */ + Dbl_increment(resultp1,resultp2); + } + } + break; + + case ROUNDPLUS: + if (Dbl_iszero_sign(resultp1)) { + /* Round up positive results */ + Dbl_increment(resultp1,resultp2); + } + break; + + case ROUNDMINUS: + if (Dbl_isone_sign(resultp1)) { + /* Round down negative results */ + Dbl_increment(resultp1,resultp2); + } + + case ROUNDZERO:; + /* truncate is simple */ + } /* end switch... */ + if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++; + } + if (result_exponent >= DBL_INFINITY_EXPONENT) { + /* Overflow */ + if (Is_overflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return (OPC_2E_OVERFLOWEXCEPTION | + OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return (OPC_2E_OVERFLOWEXCEPTION); + } + inexact = TRUE; + Set_overflowflag(); + Dbl_setoverflow(resultp1,resultp2); + } else if (result_exponent <= 0) { /* underflow case */ + if (Is_underflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return (OPC_2E_UNDERFLOWEXCEPTION | + OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return(OPC_2E_UNDERFLOWEXCEPTION); + } + else if (inexact && is_tiny) Set_underflowflag(); + } + else Dbl_set_exponent(resultp1,result_exponent); + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return(NOEXCEPTION); +} + +/* + * Single Floating-point Multiply Fused Add + */ + +sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) + +sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; +unsigned int *status; +{ + unsigned int opnd1, opnd2, opnd3; + register unsigned int tmpresp1, tmpresp2; + unsigned int rightp1, rightp2; + unsigned int resultp1, resultp2 = 0; + register int mpy_exponent, add_exponent, count; + boolean inexact = FALSE, is_tiny = FALSE; + + unsigned int signlessleft1, signlessright1, save; + register int result_exponent, diff_exponent; + int sign_save, jumpsize; + + Sgl_copyfromptr(src1ptr,opnd1); + Sgl_copyfromptr(src2ptr,opnd2); + Sgl_copyfromptr(src3ptr,opnd3); + + /* + * set sign bit of result of multiply + */ + if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) + Sgl_setnegativezero(resultp1); + else Sgl_setzero(resultp1); + + /* + * Generate multiply exponent + */ + mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS; + + /* + * check first operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(opnd1)) { + if (Sgl_iszero_mantissa(opnd1)) { + if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) { + if (Sgl_iszero_exponentmantissa(opnd2)) { + /* + * invalid since operands are infinity + * and zero + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + /* + * Check third operand for infinity with a + * sign opposite of the multiply result + */ + if (Sgl_isinfinity(opnd3) && + (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { + /* + * invalid since attempting a magnitude + * subtraction of infinities + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + + /* + * return infinity + */ + Sgl_setinfinity_exponentmantissa(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd1); + } + /* + * is second operand a signaling NaN? + */ + else if (Sgl_is_signalingnan(opnd2)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd2); + Sgl_copytoptr(opnd2,dstptr); + return(NOEXCEPTION); + } + /* + * is third operand a signaling NaN? + */ + else if (Sgl_is_signalingnan(opnd3)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd3); + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Sgl_copytoptr(opnd1,dstptr); + return(NOEXCEPTION); + } + } + + /* + * check second operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(opnd2)) { + if (Sgl_iszero_mantissa(opnd2)) { + if (Sgl_isnotnan(opnd3)) { + if (Sgl_iszero_exponentmantissa(opnd1)) { + /* + * invalid since multiply operands are + * zero & infinity + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(opnd2); + Sgl_copytoptr(opnd2,dstptr); + return(NOEXCEPTION); + } + + /* + * Check third operand for infinity with a + * sign opposite of the multiply result + */ + if (Sgl_isinfinity(opnd3) && + (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { + /* + * invalid since attempting a magnitude + * subtraction of infinities + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + + /* + * return infinity + */ + Sgl_setinfinity_exponentmantissa(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd2)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd2); + } + /* + * is third operand a signaling NaN? + */ + else if (Sgl_is_signalingnan(opnd3)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd3); + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Sgl_copytoptr(opnd2,dstptr); + return(NOEXCEPTION); + } + } + + /* + * check third operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(opnd3)) { + if (Sgl_iszero_mantissa(opnd3)) { + /* return infinity */ + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } else { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd3)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd3); + } + /* + * return quiet NaN + */ + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } + } + + /* + * Generate multiply mantissa + */ + if (Sgl_isnotzero_exponent(opnd1)) { + /* set hidden bit */ + Sgl_clear_signexponent_set_hidden(opnd1); + } + else { + /* check for zero */ + if (Sgl_iszero_mantissa(opnd1)) { + /* + * Perform the add opnd3 with zero here. + */ + if (Sgl_iszero_exponentmantissa(opnd3)) { + if (Is_rounding_mode(ROUNDMINUS)) { + Sgl_or_signs(opnd3,resultp1); + } else { + Sgl_and_signs(opnd3,resultp1); + } + } + /* + * Now let's check for trapped underflow case. + */ + else if (Sgl_iszero_exponent(opnd3) && + Is_underflowtrap_enabled()) { + /* need to normalize results mantissa */ + sign_save = Sgl_signextendedsign(opnd3); + result_exponent = 0; + Sgl_leftshiftby1(opnd3); + Sgl_normalize(opnd3,result_exponent); + Sgl_set_sign(opnd3,/*using*/sign_save); + Sgl_setwrapped_exponent(opnd3,result_exponent, + unfl); + Sgl_copytoptr(opnd3,dstptr); + /* inexact = FALSE */ + return(OPC_2E_UNDERFLOWEXCEPTION); + } + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } + /* is denormalized, adjust exponent */ + Sgl_clear_signexponent(opnd1); + Sgl_leftshiftby1(opnd1); + Sgl_normalize(opnd1,mpy_exponent); + } + /* opnd2 needs to have hidden bit set with msb in hidden bit */ + if (Sgl_isnotzero_exponent(opnd2)) { + Sgl_clear_signexponent_set_hidden(opnd2); + } + else { + /* check for zero */ + if (Sgl_iszero_mantissa(opnd2)) { + /* + * Perform the add opnd3 with zero here. + */ + if (Sgl_iszero_exponentmantissa(opnd3)) { + if (Is_rounding_mode(ROUNDMINUS)) { + Sgl_or_signs(opnd3,resultp1); + } else { + Sgl_and_signs(opnd3,resultp1); + } + } + /* + * Now let's check for trapped underflow case. + */ + else if (Sgl_iszero_exponent(opnd3) && + Is_underflowtrap_enabled()) { + /* need to normalize results mantissa */ + sign_save = Sgl_signextendedsign(opnd3); + result_exponent = 0; + Sgl_leftshiftby1(opnd3); + Sgl_normalize(opnd3,result_exponent); + Sgl_set_sign(opnd3,/*using*/sign_save); + Sgl_setwrapped_exponent(opnd3,result_exponent, + unfl); + Sgl_copytoptr(opnd3,dstptr); + /* inexact = FALSE */ + return(OPC_2E_UNDERFLOWEXCEPTION); + } + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } + /* is denormalized; want to normalize */ + Sgl_clear_signexponent(opnd2); + Sgl_leftshiftby1(opnd2); + Sgl_normalize(opnd2,mpy_exponent); + } + + /* Multiply the first two source mantissas together */ + + /* + * The intermediate result will be kept in tmpres, + * which needs enough room for 106 bits of mantissa, + * so lets call it a Double extended. + */ + Sglext_setzero(tmpresp1,tmpresp2); + + /* + * Four bits at a time are inspected in each loop, and a + * simple shift and add multiply algorithm is used. + */ + for (count = SGL_P-1; count >= 0; count -= 4) { + Sglext_rightshiftby4(tmpresp1,tmpresp2); + if (Sbit28(opnd1)) { + /* Twoword_add should be an ADD followed by 2 ADDC's */ + Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0); + } + if (Sbit29(opnd1)) { + Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0); + } + if (Sbit30(opnd1)) { + Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0); + } + if (Sbit31(opnd1)) { + Twoword_add(tmpresp1, tmpresp2, opnd2, 0); + } + Sgl_rightshiftby4(opnd1); + } + if (Is_sexthiddenoverflow(tmpresp1)) { + /* result mantissa >= 2 (mantissa overflow) */ + mpy_exponent++; + Sglext_rightshiftby4(tmpresp1,tmpresp2); + } else { + Sglext_rightshiftby3(tmpresp1,tmpresp2); + } + + /* + * Restore the sign of the mpy result which was saved in resultp1. + * The exponent will continue to be kept in mpy_exponent. + */ + Sglext_set_sign(tmpresp1,Sgl_sign(resultp1)); + + /* + * No rounding is required, since the result of the multiply + * is exact in the extended format. + */ + + /* + * Now we are ready to perform the add portion of the operation. + * + * The exponents need to be kept as integers for now, since the + * multiply result might not fit into the exponent field. We + * can't overflow or underflow because of this yet, since the + * add could bring the final result back into range. + */ + add_exponent = Sgl_exponent(opnd3); + + /* + * Check for denormalized or zero add operand. + */ + if (add_exponent == 0) { + /* check for zero */ + if (Sgl_iszero_mantissa(opnd3)) { + /* right is zero */ + /* Left can't be zero and must be result. + * + * The final result is now in tmpres and mpy_exponent, + * and needs to be rounded and squeezed back into + * double precision format from double extended. + */ + result_exponent = mpy_exponent; + Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2); + sign_save = Sgl_signextendedsign(resultp1);/*save sign*/ + goto round; + } + + /* + * Neither are zeroes. + * Adjust exponent and normalize add operand. + */ + sign_save = Sgl_signextendedsign(opnd3); /* save sign */ + Sgl_clear_signexponent(opnd3); + Sgl_leftshiftby1(opnd3); + Sgl_normalize(opnd3,add_exponent); + Sgl_set_sign(opnd3,sign_save); /* restore sign */ + } else { + Sgl_clear_exponent_set_hidden(opnd3); + } + /* + * Copy opnd3 to the double extended variable called right. + */ + Sgl_copyto_sglext(opnd3,rightp1,rightp2); + + /* + * A zero "save" helps discover equal operands (for later), + * and is used in swapping operands (if needed). + */ + Sglext_xortointp1(tmpresp1,rightp1,/*to*/save); + + /* + * Compare magnitude of operands. + */ + Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1); + Sglext_copytoint_exponentmantissa(rightp1,signlessright1); + if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && + Sglext_ismagnitudeless(signlessleft1,signlessright1)) { + /* + * Set the left operand to the larger one by XOR swap. + * First finish the first word "save". + */ + Sglext_xorfromintp1(save,rightp1,/*to*/rightp1); + Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); + Sglext_swap_lower(tmpresp2,rightp2); + /* also setup exponents used in rest of routine */ + diff_exponent = add_exponent - mpy_exponent; + result_exponent = add_exponent; + } else { + /* also setup exponents used in rest of routine */ + diff_exponent = mpy_exponent - add_exponent; + result_exponent = mpy_exponent; + } + /* Invariant: left is not smaller than right. */ + + /* + * Special case alignment of operands that would force alignment + * beyond the extent of the extension. A further optimization + * could special case this but only reduces the path length for + * this infrequent case. + */ + if (diff_exponent > SGLEXT_THRESHOLD) { + diff_exponent = SGLEXT_THRESHOLD; + } + + /* Align right operand by shifting it to the right */ + Sglext_clear_sign(rightp1); + Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent); + + /* Treat sum and difference of the operands separately. */ + if ((int)save < 0) { + /* + * Difference of the two operands. Overflow can occur if the + * multiply overflowed. A borrow can occur out of the hidden + * bit and force a post normalization phase. + */ + Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2, + resultp1,resultp2); + sign_save = Sgl_signextendedsign(resultp1); + if (Sgl_iszero_hidden(resultp1)) { + /* Handle normalization */ + /* A straightforward algorithm would now shift the + * result and extension left until the hidden bit + * becomes one. Not all of the extension bits need + * participate in the shift. Only the two most + * significant bits (round and guard) are needed. + * If only a single shift is needed then the guard + * bit becomes a significant low order bit and the + * extension must participate in the rounding. + * If more than a single shift is needed, then all + * bits to the right of the guard bit are zeros, + * and the guard bit may or may not be zero. */ + Sglext_leftshiftby1(resultp1,resultp2); + + /* Need to check for a zero result. The sign and + * exponent fields have already been zeroed. The more + * efficient test of the full object can be used. + */ + if (Sglext_iszero(resultp1,resultp2)) { + /* Must have been "x-x" or "x+(-x)". */ + if (Is_rounding_mode(ROUNDMINUS)) + Sgl_setone_sign(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + result_exponent--; + + /* Look to see if normalization is finished. */ + if (Sgl_isone_hidden(resultp1)) { + /* No further normalization is needed */ + goto round; + } + + /* Discover first one bit to determine shift amount. + * Use a modified binary search. We have already + * shifted the result one position right and still + * not found a one so the remainder of the extension + * must be zero and simplifies rounding. */ + /* Scan bytes */ + while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) { + Sglext_leftshiftby8(resultp1,resultp2); + result_exponent -= 8; + } + /* Now narrow it down to the nibble */ + if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) { + /* The lower nibble contains the + * normalizing one */ + Sglext_leftshiftby4(resultp1,resultp2); + result_exponent -= 4; + } + /* Select case where first bit is set (already + * normalized) otherwise select the proper shift. */ + jumpsize = Sgl_hiddenhigh3mantissa(resultp1); + if (jumpsize <= 7) switch(jumpsize) { + case 1: + Sglext_leftshiftby3(resultp1,resultp2); + result_exponent -= 3; + break; + case 2: + case 3: + Sglext_leftshiftby2(resultp1,resultp2); + result_exponent -= 2; + break; + case 4: + case 5: + case 6: + case 7: + Sglext_leftshiftby1(resultp1,resultp2); + result_exponent -= 1; + break; + } + } /* end if (hidden...)... */ + /* Fall through and round */ + } /* end if (save < 0)... */ + else { + /* Add magnitudes */ + Sglext_addition(tmpresp1,tmpresp2, + rightp1,rightp2, /*to*/resultp1,resultp2); + sign_save = Sgl_signextendedsign(resultp1); + if (Sgl_isone_hiddenoverflow(resultp1)) { + /* Prenormalization required. */ + Sglext_arithrightshiftby1(resultp1,resultp2); + result_exponent++; + } /* end if hiddenoverflow... */ + } /* end else ...add magnitudes... */ + + /* Round the result. If the extension and lower two words are + * all zeros, then the result is exact. Otherwise round in the + * correct direction. Underflow is possible. If a postnormalization + * is necessary, then the mantissa is all zeros so no shift is needed. + */ + round: + if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { + Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny); + } + Sgl_set_sign(resultp1,/*using*/sign_save); + if (Sglext_isnotzero_mantissap2(resultp2)) { + inexact = TRUE; + switch(Rounding_mode()) { + case ROUNDNEAREST: /* The default. */ + if (Sglext_isone_highp2(resultp2)) { + /* at least 1/2 ulp */ + if (Sglext_isnotzero_low31p2(resultp2) || + Sglext_isone_lowp1(resultp1)) { + /* either exactly half way and odd or + * more than 1/2ulp */ + Sgl_increment(resultp1); + } + } + break; + + case ROUNDPLUS: + if (Sgl_iszero_sign(resultp1)) { + /* Round up positive results */ + Sgl_increment(resultp1); + } + break; + + case ROUNDMINUS: + if (Sgl_isone_sign(resultp1)) { + /* Round down negative results */ + Sgl_increment(resultp1); + } + + case ROUNDZERO:; + /* truncate is simple */ + } /* end switch... */ + if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++; + } + if (result_exponent >= SGL_INFINITY_EXPONENT) { + /* Overflow */ + if (Is_overflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl); + Sgl_copytoptr(resultp1,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return (OPC_2E_OVERFLOWEXCEPTION | + OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return (OPC_2E_OVERFLOWEXCEPTION); + } + inexact = TRUE; + Set_overflowflag(); + Sgl_setoverflow(resultp1); + } else if (result_exponent <= 0) { /* underflow case */ + if (Is_underflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Sgl_setwrapped_exponent(resultp1,result_exponent,unfl); + Sgl_copytoptr(resultp1,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return (OPC_2E_UNDERFLOWEXCEPTION | + OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return(OPC_2E_UNDERFLOWEXCEPTION); + } + else if (inexact && is_tiny) Set_underflowflag(); + } + else Sgl_set_exponent(resultp1,result_exponent); + Sgl_copytoptr(resultp1,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return(NOEXCEPTION); +} + +/* + * Single Floating-point Multiply Negate Fused Add + */ + +sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) + +sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; +unsigned int *status; +{ + unsigned int opnd1, opnd2, opnd3; + register unsigned int tmpresp1, tmpresp2; + unsigned int rightp1, rightp2; + unsigned int resultp1, resultp2 = 0; + register int mpy_exponent, add_exponent, count; + boolean inexact = FALSE, is_tiny = FALSE; + + unsigned int signlessleft1, signlessright1, save; + register int result_exponent, diff_exponent; + int sign_save, jumpsize; + + Sgl_copyfromptr(src1ptr,opnd1); + Sgl_copyfromptr(src2ptr,opnd2); + Sgl_copyfromptr(src3ptr,opnd3); + + /* + * set sign bit of result of multiply + */ + if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) + Sgl_setzero(resultp1); + else + Sgl_setnegativezero(resultp1); + + /* + * Generate multiply exponent + */ + mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS; + + /* + * check first operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(opnd1)) { + if (Sgl_iszero_mantissa(opnd1)) { + if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) { + if (Sgl_iszero_exponentmantissa(opnd2)) { + /* + * invalid since operands are infinity + * and zero + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + /* + * Check third operand for infinity with a + * sign opposite of the multiply result + */ + if (Sgl_isinfinity(opnd3) && + (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { + /* + * invalid since attempting a magnitude + * subtraction of infinities + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + + /* + * return infinity + */ + Sgl_setinfinity_exponentmantissa(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd1); + } + /* + * is second operand a signaling NaN? + */ + else if (Sgl_is_signalingnan(opnd2)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd2); + Sgl_copytoptr(opnd2,dstptr); + return(NOEXCEPTION); + } + /* + * is third operand a signaling NaN? + */ + else if (Sgl_is_signalingnan(opnd3)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd3); + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Sgl_copytoptr(opnd1,dstptr); + return(NOEXCEPTION); + } + } + + /* + * check second operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(opnd2)) { + if (Sgl_iszero_mantissa(opnd2)) { + if (Sgl_isnotnan(opnd3)) { + if (Sgl_iszero_exponentmantissa(opnd1)) { + /* + * invalid since multiply operands are + * zero & infinity + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(opnd2); + Sgl_copytoptr(opnd2,dstptr); + return(NOEXCEPTION); + } + + /* + * Check third operand for infinity with a + * sign opposite of the multiply result + */ + if (Sgl_isinfinity(opnd3) && + (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { + /* + * invalid since attempting a magnitude + * subtraction of infinities + */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + + /* + * return infinity + */ + Sgl_setinfinity_exponentmantissa(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd2)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd2); + } + /* + * is third operand a signaling NaN? + */ + else if (Sgl_is_signalingnan(opnd3)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd3); + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + Sgl_copytoptr(opnd2,dstptr); + return(NOEXCEPTION); + } + } + + /* + * check third operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(opnd3)) { + if (Sgl_iszero_mantissa(opnd3)) { + /* return infinity */ + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } else { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd3)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(OPC_2E_INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd3); + } + /* + * return quiet NaN + */ + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } + } + + /* + * Generate multiply mantissa + */ + if (Sgl_isnotzero_exponent(opnd1)) { + /* set hidden bit */ + Sgl_clear_signexponent_set_hidden(opnd1); + } + else { + /* check for zero */ + if (Sgl_iszero_mantissa(opnd1)) { + /* + * Perform the add opnd3 with zero here. + */ + if (Sgl_iszero_exponentmantissa(opnd3)) { + if (Is_rounding_mode(ROUNDMINUS)) { + Sgl_or_signs(opnd3,resultp1); + } else { + Sgl_and_signs(opnd3,resultp1); + } + } + /* + * Now let's check for trapped underflow case. + */ + else if (Sgl_iszero_exponent(opnd3) && + Is_underflowtrap_enabled()) { + /* need to normalize results mantissa */ + sign_save = Sgl_signextendedsign(opnd3); + result_exponent = 0; + Sgl_leftshiftby1(opnd3); + Sgl_normalize(opnd3,result_exponent); + Sgl_set_sign(opnd3,/*using*/sign_save); + Sgl_setwrapped_exponent(opnd3,result_exponent, + unfl); + Sgl_copytoptr(opnd3,dstptr); + /* inexact = FALSE */ + return(OPC_2E_UNDERFLOWEXCEPTION); + } + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } + /* is denormalized, adjust exponent */ + Sgl_clear_signexponent(opnd1); + Sgl_leftshiftby1(opnd1); + Sgl_normalize(opnd1,mpy_exponent); + } + /* opnd2 needs to have hidden bit set with msb in hidden bit */ + if (Sgl_isnotzero_exponent(opnd2)) { + Sgl_clear_signexponent_set_hidden(opnd2); + } + else { + /* check for zero */ + if (Sgl_iszero_mantissa(opnd2)) { + /* + * Perform the add opnd3 with zero here. + */ + if (Sgl_iszero_exponentmantissa(opnd3)) { + if (Is_rounding_mode(ROUNDMINUS)) { + Sgl_or_signs(opnd3,resultp1); + } else { + Sgl_and_signs(opnd3,resultp1); + } + } + /* + * Now let's check for trapped underflow case. + */ + else if (Sgl_iszero_exponent(opnd3) && + Is_underflowtrap_enabled()) { + /* need to normalize results mantissa */ + sign_save = Sgl_signextendedsign(opnd3); + result_exponent = 0; + Sgl_leftshiftby1(opnd3); + Sgl_normalize(opnd3,result_exponent); + Sgl_set_sign(opnd3,/*using*/sign_save); + Sgl_setwrapped_exponent(opnd3,result_exponent, + unfl); + Sgl_copytoptr(opnd3,dstptr); + /* inexact = FALSE */ + return(OPC_2E_UNDERFLOWEXCEPTION); + } + Sgl_copytoptr(opnd3,dstptr); + return(NOEXCEPTION); + } + /* is denormalized; want to normalize */ + Sgl_clear_signexponent(opnd2); + Sgl_leftshiftby1(opnd2); + Sgl_normalize(opnd2,mpy_exponent); + } + + /* Multiply the first two source mantissas together */ + + /* + * The intermediate result will be kept in tmpres, + * which needs enough room for 106 bits of mantissa, + * so lets call it a Double extended. + */ + Sglext_setzero(tmpresp1,tmpresp2); + + /* + * Four bits at a time are inspected in each loop, and a + * simple shift and add multiply algorithm is used. + */ + for (count = SGL_P-1; count >= 0; count -= 4) { + Sglext_rightshiftby4(tmpresp1,tmpresp2); + if (Sbit28(opnd1)) { + /* Twoword_add should be an ADD followed by 2 ADDC's */ + Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0); + } + if (Sbit29(opnd1)) { + Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0); + } + if (Sbit30(opnd1)) { + Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0); + } + if (Sbit31(opnd1)) { + Twoword_add(tmpresp1, tmpresp2, opnd2, 0); + } + Sgl_rightshiftby4(opnd1); + } + if (Is_sexthiddenoverflow(tmpresp1)) { + /* result mantissa >= 2 (mantissa overflow) */ + mpy_exponent++; + Sglext_rightshiftby4(tmpresp1,tmpresp2); + } else { + Sglext_rightshiftby3(tmpresp1,tmpresp2); + } + + /* + * Restore the sign of the mpy result which was saved in resultp1. + * The exponent will continue to be kept in mpy_exponent. + */ + Sglext_set_sign(tmpresp1,Sgl_sign(resultp1)); + + /* + * No rounding is required, since the result of the multiply + * is exact in the extended format. + */ + + /* + * Now we are ready to perform the add portion of the operation. + * + * The exponents need to be kept as integers for now, since the + * multiply result might not fit into the exponent field. We + * can't overflow or underflow because of this yet, since the + * add could bring the final result back into range. + */ + add_exponent = Sgl_exponent(opnd3); + + /* + * Check for denormalized or zero add operand. + */ + if (add_exponent == 0) { + /* check for zero */ + if (Sgl_iszero_mantissa(opnd3)) { + /* right is zero */ + /* Left can't be zero and must be result. + * + * The final result is now in tmpres and mpy_exponent, + * and needs to be rounded and squeezed back into + * double precision format from double extended. + */ + result_exponent = mpy_exponent; + Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2); + sign_save = Sgl_signextendedsign(resultp1);/*save sign*/ + goto round; + } + + /* + * Neither are zeroes. + * Adjust exponent and normalize add operand. + */ + sign_save = Sgl_signextendedsign(opnd3); /* save sign */ + Sgl_clear_signexponent(opnd3); + Sgl_leftshiftby1(opnd3); + Sgl_normalize(opnd3,add_exponent); + Sgl_set_sign(opnd3,sign_save); /* restore sign */ + } else { + Sgl_clear_exponent_set_hidden(opnd3); + } + /* + * Copy opnd3 to the double extended variable called right. + */ + Sgl_copyto_sglext(opnd3,rightp1,rightp2); + + /* + * A zero "save" helps discover equal operands (for later), + * and is used in swapping operands (if needed). + */ + Sglext_xortointp1(tmpresp1,rightp1,/*to*/save); + + /* + * Compare magnitude of operands. + */ + Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1); + Sglext_copytoint_exponentmantissa(rightp1,signlessright1); + if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && + Sglext_ismagnitudeless(signlessleft1,signlessright1)) { + /* + * Set the left operand to the larger one by XOR swap. + * First finish the first word "save". + */ + Sglext_xorfromintp1(save,rightp1,/*to*/rightp1); + Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); + Sglext_swap_lower(tmpresp2,rightp2); + /* also setup exponents used in rest of routine */ + diff_exponent = add_exponent - mpy_exponent; + result_exponent = add_exponent; + } else { + /* also setup exponents used in rest of routine */ + diff_exponent = mpy_exponent - add_exponent; + result_exponent = mpy_exponent; + } + /* Invariant: left is not smaller than right. */ + + /* + * Special case alignment of operands that would force alignment + * beyond the extent of the extension. A further optimization + * could special case this but only reduces the path length for + * this infrequent case. + */ + if (diff_exponent > SGLEXT_THRESHOLD) { + diff_exponent = SGLEXT_THRESHOLD; + } + + /* Align right operand by shifting it to the right */ + Sglext_clear_sign(rightp1); + Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent); + + /* Treat sum and difference of the operands separately. */ + if ((int)save < 0) { + /* + * Difference of the two operands. Overflow can occur if the + * multiply overflowed. A borrow can occur out of the hidden + * bit and force a post normalization phase. + */ + Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2, + resultp1,resultp2); + sign_save = Sgl_signextendedsign(resultp1); + if (Sgl_iszero_hidden(resultp1)) { + /* Handle normalization */ + /* A straightforward algorithm would now shift the + * result and extension left until the hidden bit + * becomes one. Not all of the extension bits need + * participate in the shift. Only the two most + * significant bits (round and guard) are needed. + * If only a single shift is needed then the guard + * bit becomes a significant low order bit and the + * extension must participate in the rounding. + * If more than a single shift is needed, then all + * bits to the right of the guard bit are zeros, + * and the guard bit may or may not be zero. */ + Sglext_leftshiftby1(resultp1,resultp2); + + /* Need to check for a zero result. The sign and + * exponent fields have already been zeroed. The more + * efficient test of the full object can be used. + */ + if (Sglext_iszero(resultp1,resultp2)) { + /* Must have been "x-x" or "x+(-x)". */ + if (Is_rounding_mode(ROUNDMINUS)) + Sgl_setone_sign(resultp1); + Sgl_copytoptr(resultp1,dstptr); + return(NOEXCEPTION); + } + result_exponent--; + + /* Look to see if normalization is finished. */ + if (Sgl_isone_hidden(resultp1)) { + /* No further normalization is needed */ + goto round; + } + + /* Discover first one bit to determine shift amount. + * Use a modified binary search. We have already + * shifted the result one position right and still + * not found a one so the remainder of the extension + * must be zero and simplifies rounding. */ + /* Scan bytes */ + while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) { + Sglext_leftshiftby8(resultp1,resultp2); + result_exponent -= 8; + } + /* Now narrow it down to the nibble */ + if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) { + /* The lower nibble contains the + * normalizing one */ + Sglext_leftshiftby4(resultp1,resultp2); + result_exponent -= 4; + } + /* Select case where first bit is set (already + * normalized) otherwise select the proper shift. */ + jumpsize = Sgl_hiddenhigh3mantissa(resultp1); + if (jumpsize <= 7) switch(jumpsize) { + case 1: + Sglext_leftshiftby3(resultp1,resultp2); + result_exponent -= 3; + break; + case 2: + case 3: + Sglext_leftshiftby2(resultp1,resultp2); + result_exponent -= 2; + break; + case 4: + case 5: + case 6: + case 7: + Sglext_leftshiftby1(resultp1,resultp2); + result_exponent -= 1; + break; + } + } /* end if (hidden...)... */ + /* Fall through and round */ + } /* end if (save < 0)... */ + else { + /* Add magnitudes */ + Sglext_addition(tmpresp1,tmpresp2, + rightp1,rightp2, /*to*/resultp1,resultp2); + sign_save = Sgl_signextendedsign(resultp1); + if (Sgl_isone_hiddenoverflow(resultp1)) { + /* Prenormalization required. */ + Sglext_arithrightshiftby1(resultp1,resultp2); + result_exponent++; + } /* end if hiddenoverflow... */ + } /* end else ...add magnitudes... */ + + /* Round the result. If the extension and lower two words are + * all zeros, then the result is exact. Otherwise round in the + * correct direction. Underflow is possible. If a postnormalization + * is necessary, then the mantissa is all zeros so no shift is needed. + */ + round: + if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { + Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny); + } + Sgl_set_sign(resultp1,/*using*/sign_save); + if (Sglext_isnotzero_mantissap2(resultp2)) { + inexact = TRUE; + switch(Rounding_mode()) { + case ROUNDNEAREST: /* The default. */ + if (Sglext_isone_highp2(resultp2)) { + /* at least 1/2 ulp */ + if (Sglext_isnotzero_low31p2(resultp2) || + Sglext_isone_lowp1(resultp1)) { + /* either exactly half way and odd or + * more than 1/2ulp */ + Sgl_increment(resultp1); + } + } + break; + + case ROUNDPLUS: + if (Sgl_iszero_sign(resultp1)) { + /* Round up positive results */ + Sgl_increment(resultp1); + } + break; + + case ROUNDMINUS: + if (Sgl_isone_sign(resultp1)) { + /* Round down negative results */ + Sgl_increment(resultp1); + } + + case ROUNDZERO:; + /* truncate is simple */ + } /* end switch... */ + if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++; + } + if (result_exponent >= SGL_INFINITY_EXPONENT) { + /* Overflow */ + if (Is_overflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl); + Sgl_copytoptr(resultp1,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return (OPC_2E_OVERFLOWEXCEPTION | + OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return (OPC_2E_OVERFLOWEXCEPTION); + } + inexact = TRUE; + Set_overflowflag(); + Sgl_setoverflow(resultp1); + } else if (result_exponent <= 0) { /* underflow case */ + if (Is_underflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Sgl_setwrapped_exponent(resultp1,result_exponent,unfl); + Sgl_copytoptr(resultp1,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) + return (OPC_2E_UNDERFLOWEXCEPTION | + OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return(OPC_2E_UNDERFLOWEXCEPTION); + } + else if (inexact && is_tiny) Set_underflowflag(); + } + else Sgl_set_exponent(resultp1,result_exponent); + Sgl_copytoptr(resultp1,dstptr); + if (inexact) + if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); + else Set_inexactflag(); + return(NOEXCEPTION); +} + diff --git a/kernel/arch/parisc/math-emu/fpbits.h b/kernel/arch/parisc/math-emu/fpbits.h new file mode 100644 index 000000000..cefad064d --- /dev/null +++ b/kernel/arch/parisc/math-emu/fpbits.h @@ -0,0 +1,65 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifdef __NO_PA_HDRS + PA header file -- do not include this header file for non-PA builds. +#endif + + +/* + * These macros are designed to be portable to all machines that have + * a wordsize greater than or equal to 32 bits that support the portable + * C compiler and the standard C preprocessor. Wordsize (default 32) + * and bitfield assignment (default left-to-right, unlike VAX, PDP-11) + * should be predefined using the constants HOSTWDSZ and BITFRL and + * the C compiler "-D" flag (e.g., -DHOSTWDSZ=36 -DBITFLR for the DEC-20). + * Note that the macro arguments assume that the integer being referenced + * is a 32-bit integer (right-justified on the 20) and that bit 0 is the + * most significant bit. + */ + +#ifndef HOSTWDSZ +#define HOSTWDSZ 32 +#endif + + +/*########################### Macros ######################################*/ + +/*------------------------------------------------------------------------- + * NewDeclareBitField_Reference - Declare a structure similar to the simulator + * function "DeclBitfR" except its use is restricted to occur within a larger + * enclosing structure or union definition. This declaration is an unnamed + * structure with the argument, name, as the member name and the argument, + * uname, as the element name. + *----------------------------------------------------------------------- */ +#define Bitfield_extract(start, length, object) \ + ((object) >> (HOSTWDSZ - (start) - (length)) & \ + ((unsigned)-1 >> (HOSTWDSZ - (length)))) + +#define Bitfield_signed_extract(start, length, object) \ + ((int)((object) << start) >> (HOSTWDSZ - (length))) + +#define Bitfield_mask(start, len, object) \ + ((object) & (((unsigned)-1 >> (HOSTWDSZ-len)) << (HOSTWDSZ-start-len))) + +#define Bitfield_deposit(value,start,len,object) object = \ + ((object) & ~(((unsigned)-1 >> (HOSTWDSZ-len)) << (HOSTWDSZ-start-len))) | \ + (((value) & ((unsigned)-1 >> (HOSTWDSZ-len))) << (HOSTWDSZ-start-len)) diff --git a/kernel/arch/parisc/math-emu/fpu.h b/kernel/arch/parisc/math-emu/fpu.h new file mode 100644 index 000000000..0af5c3c88 --- /dev/null +++ b/kernel/arch/parisc/math-emu/fpu.h @@ -0,0 +1,76 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/fp/fpu.h $Revision: 1.1 $ + * + * Purpose: + * <<please update with a synopis of the functionality provided by this file>> + * + * + * END_DESC +*/ + +#ifdef __NO_PA_HDRS + PA header file -- do not include this header file for non-PA builds. +#endif + + +#ifndef _MACHINE_FPU_INCLUDED /* allows multiple inclusion */ +#define _MACHINE_FPU_INCLUDED + +#if 0 +#ifndef _SYS_STDSYMS_INCLUDED +# include <sys/stdsyms.h> +#endif /* _SYS_STDSYMS_INCLUDED */ +#include <machine/pdc/pdc_rqsts.h> +#endif + +#define PA83_FPU_FLAG 0x00000001 +#define PA89_FPU_FLAG 0x00000002 +#define PA2_0_FPU_FLAG 0x00000010 + +#define TIMEX_EXTEN_FLAG 0x00000004 + +#define ROLEX_EXTEN_FLAG 0x00000008 +#define COPR_FP 0x00000080 /* Floating point -- Coprocessor 0 */ +#define SFU_MPY_DIVIDE 0x00008000 /* Multiply/Divide __ SFU 0 */ + + +#define EM_FPU_TYPE_OFFSET 272 + +/* version of EMULATION software for COPR,0,0 instruction */ +#define EMULATION_VERSION 4 + +/* + * The only was to differeniate between TIMEX and ROLEX (or PCX-S and PCX-T) + * is thorough the potential type field from the PDC_MODEL call. The + * following flags are used at assist this differeniation. + */ + +#define ROLEX_POTENTIAL_KEY_FLAGS PDC_MODEL_CPU_KEY_WORD_TO_IO +#define TIMEX_POTENTIAL_KEY_FLAGS (PDC_MODEL_CPU_KEY_QUAD_STORE | \ + PDC_MODEL_CPU_KEY_RECIP_SQRT) + + +#endif /* ! _MACHINE_FPU_INCLUDED */ diff --git a/kernel/arch/parisc/math-emu/fpudispatch.c b/kernel/arch/parisc/math-emu/fpudispatch.c new file mode 100644 index 000000000..673b73e84 --- /dev/null +++ b/kernel/arch/parisc/math-emu/fpudispatch.c @@ -0,0 +1,1443 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/fp/fpudispatch.c $Revision: 1.1 $ + * + * Purpose: + * <<please update with a synopsis of the functionality provided by this file>> + * + * External Interfaces: + * <<the following list was autogenerated, please review>> + * emfpudispatch(ir, dummy1, dummy2, fpregs) + * fpudispatch(ir, excp_code, holder, fpregs) + * + * Internal Interfaces: + * <<the following list was autogenerated, please review>> + * static u_int decode_06(u_int, u_int *) + * static u_int decode_0c(u_int, u_int, u_int, u_int *) + * static u_int decode_0e(u_int, u_int, u_int, u_int *) + * static u_int decode_26(u_int, u_int *) + * static u_int decode_2e(u_int, u_int *) + * static void update_status_cbit(u_int *, u_int, u_int, u_int) + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + +#define FPUDEBUG 0 + +#include "float.h" +#include <linux/bug.h> +#include <linux/kernel.h> +#include <asm/processor.h> +/* #include <sys/debug.h> */ +/* #include <machine/sys/mdep_private.h> */ + +#define COPR_INST 0x30000000 + +/* + * definition of extru macro. If pos and len are constants, the compiler + * will generate an extru instruction when optimized + */ +#define extru(r,pos,len) (((r) >> (31-(pos))) & (( 1 << (len)) - 1)) +/* definitions of bit field locations in the instruction */ +#define fpmajorpos 5 +#define fpr1pos 10 +#define fpr2pos 15 +#define fptpos 31 +#define fpsubpos 18 +#define fpclass1subpos 16 +#define fpclasspos 22 +#define fpfmtpos 20 +#define fpdfpos 18 +#define fpnulpos 26 +/* + * the following are the extra bits for the 0E major op + */ +#define fpxr1pos 24 +#define fpxr2pos 19 +#define fpxtpos 25 +#define fpxpos 23 +#define fp0efmtpos 20 +/* + * the following are for the multi-ops + */ +#define fprm1pos 10 +#define fprm2pos 15 +#define fptmpos 31 +#define fprapos 25 +#define fptapos 20 +#define fpmultifmt 26 +/* + * the following are for the fused FP instructions + */ + /* fprm1pos 10 */ + /* fprm2pos 15 */ +#define fpraupos 18 +#define fpxrm2pos 19 + /* fpfmtpos 20 */ +#define fpralpos 23 +#define fpxrm1pos 24 + /* fpxtpos 25 */ +#define fpfusedsubop 26 + /* fptpos 31 */ + +/* + * offset to constant zero in the FP emulation registers + */ +#define fpzeroreg (32*sizeof(double)/sizeof(u_int)) + +/* + * extract the major opcode from the instruction + */ +#define get_major(op) extru(op,fpmajorpos,6) +/* + * extract the two bit class field from the FP instruction. The class is at bit + * positions 21-22 + */ +#define get_class(op) extru(op,fpclasspos,2) +/* + * extract the 3 bit subop field. For all but class 1 instructions, it is + * located at bit positions 16-18 + */ +#define get_subop(op) extru(op,fpsubpos,3) +/* + * extract the 2 or 3 bit subop field from class 1 instructions. It is located + * at bit positions 15-16 (PA1.1) or 14-16 (PA2.0) + */ +#define get_subop1_PA1_1(op) extru(op,fpclass1subpos,2) /* PA89 (1.1) fmt */ +#define get_subop1_PA2_0(op) extru(op,fpclass1subpos,3) /* PA 2.0 fmt */ + +/* definitions of unimplemented exceptions */ +#define MAJOR_0C_EXCP 0x09 +#define MAJOR_0E_EXCP 0x0b +#define MAJOR_06_EXCP 0x03 +#define MAJOR_26_EXCP 0x23 +#define MAJOR_2E_EXCP 0x2b +#define PA83_UNIMP_EXCP 0x01 + +/* + * Special Defines for TIMEX specific code + */ + +#define FPU_TYPE_FLAG_POS (EM_FPU_TYPE_OFFSET>>2) +#define TIMEX_ROLEX_FPU_MASK (TIMEX_EXTEN_FLAG|ROLEX_EXTEN_FLAG) + +/* + * Static function definitions + */ +#define _PROTOTYPES +#if defined(_PROTOTYPES) || defined(_lint) +static u_int decode_0c(u_int, u_int, u_int, u_int *); +static u_int decode_0e(u_int, u_int, u_int, u_int *); +static u_int decode_06(u_int, u_int *); +static u_int decode_26(u_int, u_int *); +static u_int decode_2e(u_int, u_int *); +static void update_status_cbit(u_int *, u_int, u_int, u_int); +#else /* !_PROTOTYPES&&!_lint */ +static u_int decode_0c(); +static u_int decode_0e(); +static u_int decode_06(); +static u_int decode_26(); +static u_int decode_2e(); +static void update_status_cbit(); +#endif /* _PROTOTYPES&&!_lint */ + +#define VASSERT(x) + +static void parisc_linux_get_fpu_type(u_int fpregs[]) +{ + /* on pa-linux the fpu type is not filled in by the + * caller; it is constructed here + */ + if (boot_cpu_data.cpu_type == pcxs) + fpregs[FPU_TYPE_FLAG_POS] = TIMEX_EXTEN_FLAG; + else if (boot_cpu_data.cpu_type == pcxt || + boot_cpu_data.cpu_type == pcxt_) + fpregs[FPU_TYPE_FLAG_POS] = ROLEX_EXTEN_FLAG; + else if (boot_cpu_data.cpu_type >= pcxu) + fpregs[FPU_TYPE_FLAG_POS] = PA2_0_FPU_FLAG; +} + +/* + * this routine will decode the excepting floating point instruction and + * call the approiate emulation routine. + * It is called by decode_fpu with the following parameters: + * fpudispatch(current_ir, unimplemented_code, 0, &Fpu_register) + * where current_ir is the instruction to be emulated, + * unimplemented_code is the exception_code that the hardware generated + * and &Fpu_register is the address of emulated FP reg 0. + */ +u_int +fpudispatch(u_int ir, u_int excp_code, u_int holder, u_int fpregs[]) +{ + u_int class, subop; + u_int fpu_type_flags; + + /* All FP emulation code assumes that ints are 4-bytes in length */ + VASSERT(sizeof(int) == 4); + + parisc_linux_get_fpu_type(fpregs); + + fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS]; /* get fpu type flags */ + + class = get_class(ir); + if (class == 1) { + if (fpu_type_flags & PA2_0_FPU_FLAG) + subop = get_subop1_PA2_0(ir); + else + subop = get_subop1_PA1_1(ir); + } + else + subop = get_subop(ir); + + if (FPUDEBUG) printk("class %d subop %d\n", class, subop); + + switch (excp_code) { + case MAJOR_0C_EXCP: + case PA83_UNIMP_EXCP: + return(decode_0c(ir,class,subop,fpregs)); + case MAJOR_0E_EXCP: + return(decode_0e(ir,class,subop,fpregs)); + case MAJOR_06_EXCP: + return(decode_06(ir,fpregs)); + case MAJOR_26_EXCP: + return(decode_26(ir,fpregs)); + case MAJOR_2E_EXCP: + return(decode_2e(ir,fpregs)); + default: + /* "crashme Night Gallery painting nr 2. (asm_crash.s). + * This was fixed for multi-user kernels, but + * workstation kernels had a panic here. This allowed + * any arbitrary user to panic the kernel by executing + * setting the FP exception registers to strange values + * and generating an emulation trap. The emulation and + * exception code must never be able to panic the + * kernel. + */ + return(UNIMPLEMENTEDEXCEPTION); + } +} + +/* + * this routine is called by $emulation_trap to emulate a coprocessor + * instruction if one doesn't exist + */ +u_int +emfpudispatch(u_int ir, u_int dummy1, u_int dummy2, u_int fpregs[]) +{ + u_int class, subop, major; + u_int fpu_type_flags; + + /* All FP emulation code assumes that ints are 4-bytes in length */ + VASSERT(sizeof(int) == 4); + + fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS]; /* get fpu type flags */ + + major = get_major(ir); + class = get_class(ir); + if (class == 1) { + if (fpu_type_flags & PA2_0_FPU_FLAG) + subop = get_subop1_PA2_0(ir); + else + subop = get_subop1_PA1_1(ir); + } + else + subop = get_subop(ir); + switch (major) { + case 0x0C: + return(decode_0c(ir,class,subop,fpregs)); + case 0x0E: + return(decode_0e(ir,class,subop,fpregs)); + case 0x06: + return(decode_06(ir,fpregs)); + case 0x26: + return(decode_26(ir,fpregs)); + case 0x2E: + return(decode_2e(ir,fpregs)); + default: + return(PA83_UNIMP_EXCP); + } +} + + +static u_int +decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) +{ + u_int r1,r2,t; /* operand register offsets */ + u_int fmt; /* also sf for class 1 conversions */ + u_int df; /* for class 1 conversions */ + u_int *status; + u_int retval, local_status; + u_int fpu_type_flags; + + if (ir == COPR_INST) { + fpregs[0] = EMULATION_VERSION << 11; + return(NOEXCEPTION); + } + status = &fpregs[0]; /* fp status register */ + local_status = fpregs[0]; /* and local copy */ + r1 = extru(ir,fpr1pos,5) * sizeof(double)/sizeof(u_int); + if (r1 == 0) /* map fr0 source to constant zero */ + r1 = fpzeroreg; + t = extru(ir,fptpos,5) * sizeof(double)/sizeof(u_int); + if (t == 0 && class != 2) /* don't allow fr0 as a dest */ + return(MAJOR_0C_EXCP); + fmt = extru(ir,fpfmtpos,2); /* get fmt completer */ + + switch (class) { + case 0: + switch (subop) { + case 0: /* COPR 0,0 emulated above*/ + case 1: + return(MAJOR_0C_EXCP); + case 2: /* FCPY */ + switch (fmt) { + case 2: /* illegal */ + return(MAJOR_0C_EXCP); + case 3: /* quad */ + t &= ~3; /* force to even reg #s */ + r1 &= ~3; + fpregs[t+3] = fpregs[r1+3]; + fpregs[t+2] = fpregs[r1+2]; + case 1: /* double */ + fpregs[t+1] = fpregs[r1+1]; + case 0: /* single */ + fpregs[t] = fpregs[r1]; + return(NOEXCEPTION); + } + case 3: /* FABS */ + switch (fmt) { + case 2: /* illegal */ + return(MAJOR_0C_EXCP); + case 3: /* quad */ + t &= ~3; /* force to even reg #s */ + r1 &= ~3; + fpregs[t+3] = fpregs[r1+3]; + fpregs[t+2] = fpregs[r1+2]; + case 1: /* double */ + fpregs[t+1] = fpregs[r1+1]; + case 0: /* single */ + /* copy and clear sign bit */ + fpregs[t] = fpregs[r1] & 0x7fffffff; + return(NOEXCEPTION); + } + case 6: /* FNEG */ + switch (fmt) { + case 2: /* illegal */ + return(MAJOR_0C_EXCP); + case 3: /* quad */ + t &= ~3; /* force to even reg #s */ + r1 &= ~3; + fpregs[t+3] = fpregs[r1+3]; + fpregs[t+2] = fpregs[r1+2]; + case 1: /* double */ + fpregs[t+1] = fpregs[r1+1]; + case 0: /* single */ + /* copy and invert sign bit */ + fpregs[t] = fpregs[r1] ^ 0x80000000; + return(NOEXCEPTION); + } + case 7: /* FNEGABS */ + switch (fmt) { + case 2: /* illegal */ + return(MAJOR_0C_EXCP); + case 3: /* quad */ + t &= ~3; /* force to even reg #s */ + r1 &= ~3; + fpregs[t+3] = fpregs[r1+3]; + fpregs[t+2] = fpregs[r1+2]; + case 1: /* double */ + fpregs[t+1] = fpregs[r1+1]; + case 0: /* single */ + /* copy and set sign bit */ + fpregs[t] = fpregs[r1] | 0x80000000; + return(NOEXCEPTION); + } + case 4: /* FSQRT */ + switch (fmt) { + case 0: + return(sgl_fsqrt(&fpregs[r1],0, + &fpregs[t],status)); + case 1: + return(dbl_fsqrt(&fpregs[r1],0, + &fpregs[t],status)); + case 2: + case 3: /* quad not implemented */ + return(MAJOR_0C_EXCP); + } + case 5: /* FRND */ + switch (fmt) { + case 0: + return(sgl_frnd(&fpregs[r1],0, + &fpregs[t],status)); + case 1: + return(dbl_frnd(&fpregs[r1],0, + &fpregs[t],status)); + case 2: + case 3: /* quad not implemented */ + return(MAJOR_0C_EXCP); + } + } /* end of switch (subop) */ + + case 1: /* class 1 */ + df = extru(ir,fpdfpos,2); /* get dest format */ + if ((df & 2) || (fmt & 2)) { + /* + * fmt's 2 and 3 are illegal of not implemented + * quad conversions + */ + return(MAJOR_0C_EXCP); + } + /* + * encode source and dest formats into 2 bits. + * high bit is source, low bit is dest. + * bit = 1 --> double precision + */ + fmt = (fmt << 1) | df; + switch (subop) { + case 0: /* FCNVFF */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(MAJOR_0C_EXCP); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvff(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvff(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(MAJOR_0C_EXCP); + } + case 1: /* FCNVXF */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvxf(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvxf(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvxf(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvxf(&fpregs[r1],0, + &fpregs[t],status)); + } + case 2: /* FCNVFX */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvfx(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvfx(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvfx(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvfx(&fpregs[r1],0, + &fpregs[t],status)); + } + case 3: /* FCNVFXT */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvfxt(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvfxt(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvfxt(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0, + &fpregs[t],status)); + } + case 5: /* FCNVUF (PA2.0 only) */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvuf(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvuf(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvuf(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvuf(&fpregs[r1],0, + &fpregs[t],status)); + } + case 6: /* FCNVFU (PA2.0 only) */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvfu(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvfu(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvfu(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvfu(&fpregs[r1],0, + &fpregs[t],status)); + } + case 7: /* FCNVFUT (PA2.0 only) */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvfut(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvfut(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvfut(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvfut(&fpregs[r1],0, + &fpregs[t],status)); + } + case 4: /* undefined */ + return(MAJOR_0C_EXCP); + } /* end of switch subop */ + + case 2: /* class 2 */ + fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS]; + r2 = extru(ir, fpr2pos, 5) * sizeof(double)/sizeof(u_int); + if (r2 == 0) + r2 = fpzeroreg; + if (fpu_type_flags & PA2_0_FPU_FLAG) { + /* FTEST if nullify bit set, otherwise FCMP */ + if (extru(ir, fpnulpos, 1)) { /* FTEST */ + switch (fmt) { + case 0: + /* + * arg0 is not used + * second param is the t field used for + * ftest,acc and ftest,rej + * third param is the subop (y-field) + */ + BUG(); + /* Unsupported + * return(ftest(0L,extru(ir,fptpos,5), + * &fpregs[0],subop)); + */ + case 1: + case 2: + case 3: + return(MAJOR_0C_EXCP); + } + } else { /* FCMP */ + switch (fmt) { + case 0: + retval = sgl_fcmp(&fpregs[r1], + &fpregs[r2],extru(ir,fptpos,5), + &local_status); + update_status_cbit(status,local_status, + fpu_type_flags, subop); + return(retval); + case 1: + retval = dbl_fcmp(&fpregs[r1], + &fpregs[r2],extru(ir,fptpos,5), + &local_status); + update_status_cbit(status,local_status, + fpu_type_flags, subop); + return(retval); + case 2: /* illegal */ + case 3: /* quad not implemented */ + return(MAJOR_0C_EXCP); + } + } + } /* end of if for PA2.0 */ + else { /* PA1.0 & PA1.1 */ + switch (subop) { + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + return(MAJOR_0C_EXCP); + case 0: /* FCMP */ + switch (fmt) { + case 0: + retval = sgl_fcmp(&fpregs[r1], + &fpregs[r2],extru(ir,fptpos,5), + &local_status); + update_status_cbit(status,local_status, + fpu_type_flags, subop); + return(retval); + case 1: + retval = dbl_fcmp(&fpregs[r1], + &fpregs[r2],extru(ir,fptpos,5), + &local_status); + update_status_cbit(status,local_status, + fpu_type_flags, subop); + return(retval); + case 2: /* illegal */ + case 3: /* quad not implemented */ + return(MAJOR_0C_EXCP); + } + case 1: /* FTEST */ + switch (fmt) { + case 0: + /* + * arg0 is not used + * second param is the t field used for + * ftest,acc and ftest,rej + * third param is the subop (y-field) + */ + BUG(); + /* unsupported + * return(ftest(0L,extru(ir,fptpos,5), + * &fpregs[0],subop)); + */ + case 1: + case 2: + case 3: + return(MAJOR_0C_EXCP); + } + } /* end of switch subop */ + } /* end of else for PA1.0 & PA1.1 */ + case 3: /* class 3 */ + r2 = extru(ir,fpr2pos,5) * sizeof(double)/sizeof(u_int); + if (r2 == 0) + r2 = fpzeroreg; + switch (subop) { + case 5: + case 6: + case 7: + return(MAJOR_0C_EXCP); + + case 0: /* FADD */ + switch (fmt) { + case 0: + return(sgl_fadd(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 1: + return(dbl_fadd(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 2: /* illegal */ + case 3: /* quad not implemented */ + return(MAJOR_0C_EXCP); + } + case 1: /* FSUB */ + switch (fmt) { + case 0: + return(sgl_fsub(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 1: + return(dbl_fsub(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 2: /* illegal */ + case 3: /* quad not implemented */ + return(MAJOR_0C_EXCP); + } + case 2: /* FMPY */ + switch (fmt) { + case 0: + return(sgl_fmpy(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 1: + return(dbl_fmpy(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 2: /* illegal */ + case 3: /* quad not implemented */ + return(MAJOR_0C_EXCP); + } + case 3: /* FDIV */ + switch (fmt) { + case 0: + return(sgl_fdiv(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 1: + return(dbl_fdiv(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 2: /* illegal */ + case 3: /* quad not implemented */ + return(MAJOR_0C_EXCP); + } + case 4: /* FREM */ + switch (fmt) { + case 0: + return(sgl_frem(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 1: + return(dbl_frem(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 2: /* illegal */ + case 3: /* quad not implemented */ + return(MAJOR_0C_EXCP); + } + } /* end of class 3 switch */ + } /* end of switch(class) */ + + /* If we get here, something is really wrong! */ + return(MAJOR_0C_EXCP); +} + +static u_int +decode_0e(ir,class,subop,fpregs) +u_int ir,class,subop; +u_int fpregs[]; +{ + u_int r1,r2,t; /* operand register offsets */ + u_int fmt; /* also sf for class 1 conversions */ + u_int df; /* dest format for class 1 conversions */ + u_int *status; + u_int retval, local_status; + u_int fpu_type_flags; + + status = &fpregs[0]; + local_status = fpregs[0]; + r1 = ((extru(ir,fpr1pos,5)<<1)|(extru(ir,fpxr1pos,1))); + if (r1 == 0) + r1 = fpzeroreg; + t = ((extru(ir,fptpos,5)<<1)|(extru(ir,fpxtpos,1))); + if (t == 0 && class != 2) + return(MAJOR_0E_EXCP); + if (class < 2) /* class 0 or 1 has 2 bit fmt */ + fmt = extru(ir,fpfmtpos,2); + else /* class 2 and 3 have 1 bit fmt */ + fmt = extru(ir,fp0efmtpos,1); + /* + * An undefined combination, double precision accessing the + * right half of a FPR, can get us into trouble. + * Let's just force proper alignment on it. + */ + if (fmt == DBL) { + r1 &= ~1; + if (class != 1) + t &= ~1; + } + + switch (class) { + case 0: + switch (subop) { + case 0: /* unimplemented */ + case 1: + return(MAJOR_0E_EXCP); + case 2: /* FCPY */ + switch (fmt) { + case 2: + case 3: + return(MAJOR_0E_EXCP); + case 1: /* double */ + fpregs[t+1] = fpregs[r1+1]; + case 0: /* single */ + fpregs[t] = fpregs[r1]; + return(NOEXCEPTION); + } + case 3: /* FABS */ + switch (fmt) { + case 2: + case 3: + return(MAJOR_0E_EXCP); + case 1: /* double */ + fpregs[t+1] = fpregs[r1+1]; + case 0: /* single */ + fpregs[t] = fpregs[r1] & 0x7fffffff; + return(NOEXCEPTION); + } + case 6: /* FNEG */ + switch (fmt) { + case 2: + case 3: + return(MAJOR_0E_EXCP); + case 1: /* double */ + fpregs[t+1] = fpregs[r1+1]; + case 0: /* single */ + fpregs[t] = fpregs[r1] ^ 0x80000000; + return(NOEXCEPTION); + } + case 7: /* FNEGABS */ + switch (fmt) { + case 2: + case 3: + return(MAJOR_0E_EXCP); + case 1: /* double */ + fpregs[t+1] = fpregs[r1+1]; + case 0: /* single */ + fpregs[t] = fpregs[r1] | 0x80000000; + return(NOEXCEPTION); + } + case 4: /* FSQRT */ + switch (fmt) { + case 0: + return(sgl_fsqrt(&fpregs[r1],0, + &fpregs[t], status)); + case 1: + return(dbl_fsqrt(&fpregs[r1],0, + &fpregs[t], status)); + case 2: + case 3: + return(MAJOR_0E_EXCP); + } + case 5: /* FRMD */ + switch (fmt) { + case 0: + return(sgl_frnd(&fpregs[r1],0, + &fpregs[t], status)); + case 1: + return(dbl_frnd(&fpregs[r1],0, + &fpregs[t], status)); + case 2: + case 3: + return(MAJOR_0E_EXCP); + } + } /* end of switch (subop */ + + case 1: /* class 1 */ + df = extru(ir,fpdfpos,2); /* get dest format */ + /* + * Fix Crashme problem (writing to 31R in double precision) + * here too. + */ + if (df == DBL) { + t &= ~1; + } + if ((df & 2) || (fmt & 2)) + return(MAJOR_0E_EXCP); + + fmt = (fmt << 1) | df; + switch (subop) { + case 0: /* FCNVFF */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(MAJOR_0E_EXCP); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvff(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvff(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(MAJOR_0E_EXCP); + } + case 1: /* FCNVXF */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvxf(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvxf(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvxf(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvxf(&fpregs[r1],0, + &fpregs[t],status)); + } + case 2: /* FCNVFX */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvfx(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvfx(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvfx(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvfx(&fpregs[r1],0, + &fpregs[t],status)); + } + case 3: /* FCNVFXT */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvfxt(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvfxt(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvfxt(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0, + &fpregs[t],status)); + } + case 5: /* FCNVUF (PA2.0 only) */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvuf(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvuf(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvuf(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvuf(&fpregs[r1],0, + &fpregs[t],status)); + } + case 6: /* FCNVFU (PA2.0 only) */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvfu(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvfu(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvfu(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvfu(&fpregs[r1],0, + &fpregs[t],status)); + } + case 7: /* FCNVFUT (PA2.0 only) */ + switch(fmt) { + case 0: /* sgl/sgl */ + return(sgl_to_sgl_fcnvfut(&fpregs[r1],0, + &fpregs[t],status)); + case 1: /* sgl/dbl */ + return(sgl_to_dbl_fcnvfut(&fpregs[r1],0, + &fpregs[t],status)); + case 2: /* dbl/sgl */ + return(dbl_to_sgl_fcnvfut(&fpregs[r1],0, + &fpregs[t],status)); + case 3: /* dbl/dbl */ + return(dbl_to_dbl_fcnvfut(&fpregs[r1],0, + &fpregs[t],status)); + } + case 4: /* undefined */ + return(MAJOR_0C_EXCP); + } /* end of switch subop */ + case 2: /* class 2 */ + /* + * Be careful out there. + * Crashme can generate cases where FR31R is specified + * as the source or target of a double precision operation. + * Since we just pass the address of the floating-point + * register to the emulation routines, this can cause + * corruption of fpzeroreg. + */ + if (fmt == DBL) + r2 = (extru(ir,fpr2pos,5)<<1); + else + r2 = ((extru(ir,fpr2pos,5)<<1)|(extru(ir,fpxr2pos,1))); + fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS]; + if (r2 == 0) + r2 = fpzeroreg; + if (fpu_type_flags & PA2_0_FPU_FLAG) { + /* FTEST if nullify bit set, otherwise FCMP */ + if (extru(ir, fpnulpos, 1)) { /* FTEST */ + /* not legal */ + return(MAJOR_0E_EXCP); + } else { /* FCMP */ + switch (fmt) { + /* + * fmt is only 1 bit long + */ + case 0: + retval = sgl_fcmp(&fpregs[r1], + &fpregs[r2],extru(ir,fptpos,5), + &local_status); + update_status_cbit(status,local_status, + fpu_type_flags, subop); + return(retval); + case 1: + retval = dbl_fcmp(&fpregs[r1], + &fpregs[r2],extru(ir,fptpos,5), + &local_status); + update_status_cbit(status,local_status, + fpu_type_flags, subop); + return(retval); + } + } + } /* end of if for PA2.0 */ + else { /* PA1.0 & PA1.1 */ + switch (subop) { + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + return(MAJOR_0E_EXCP); + case 0: /* FCMP */ + switch (fmt) { + /* + * fmt is only 1 bit long + */ + case 0: + retval = sgl_fcmp(&fpregs[r1], + &fpregs[r2],extru(ir,fptpos,5), + &local_status); + update_status_cbit(status,local_status, + fpu_type_flags, subop); + return(retval); + case 1: + retval = dbl_fcmp(&fpregs[r1], + &fpregs[r2],extru(ir,fptpos,5), + &local_status); + update_status_cbit(status,local_status, + fpu_type_flags, subop); + return(retval); + } + } /* end of switch subop */ + } /* end of else for PA1.0 & PA1.1 */ + case 3: /* class 3 */ + /* + * Be careful out there. + * Crashme can generate cases where FR31R is specified + * as the source or target of a double precision operation. + * Since we just pass the address of the floating-point + * register to the emulation routines, this can cause + * corruption of fpzeroreg. + */ + if (fmt == DBL) + r2 = (extru(ir,fpr2pos,5)<<1); + else + r2 = ((extru(ir,fpr2pos,5)<<1)|(extru(ir,fpxr2pos,1))); + if (r2 == 0) + r2 = fpzeroreg; + switch (subop) { + case 5: + case 6: + case 7: + return(MAJOR_0E_EXCP); + + /* + * Note that fmt is only 1 bit for class 3 */ + case 0: /* FADD */ + switch (fmt) { + case 0: + return(sgl_fadd(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 1: + return(dbl_fadd(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + } + case 1: /* FSUB */ + switch (fmt) { + case 0: + return(sgl_fsub(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 1: + return(dbl_fsub(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + } + case 2: /* FMPY or XMPYU */ + /* + * check for integer multiply (x bit set) + */ + if (extru(ir,fpxpos,1)) { + /* + * emulate XMPYU + */ + switch (fmt) { + case 0: + /* + * bad instruction if t specifies + * the right half of a register + */ + if (t & 1) + return(MAJOR_0E_EXCP); + BUG(); + /* unsupported + * impyu(&fpregs[r1],&fpregs[r2], + * &fpregs[t]); + */ + return(NOEXCEPTION); + case 1: + return(MAJOR_0E_EXCP); + } + } + else { /* FMPY */ + switch (fmt) { + case 0: + return(sgl_fmpy(&fpregs[r1], + &fpregs[r2],&fpregs[t],status)); + case 1: + return(dbl_fmpy(&fpregs[r1], + &fpregs[r2],&fpregs[t],status)); + } + } + case 3: /* FDIV */ + switch (fmt) { + case 0: + return(sgl_fdiv(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 1: + return(dbl_fdiv(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + } + case 4: /* FREM */ + switch (fmt) { + case 0: + return(sgl_frem(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + case 1: + return(dbl_frem(&fpregs[r1],&fpregs[r2], + &fpregs[t],status)); + } + } /* end of class 3 switch */ + } /* end of switch(class) */ + + /* If we get here, something is really wrong! */ + return(MAJOR_0E_EXCP); +} + + +/* + * routine to decode the 06 (FMPYADD and FMPYCFXT) instruction + */ +static u_int +decode_06(ir,fpregs) +u_int ir; +u_int fpregs[]; +{ + u_int rm1, rm2, tm, ra, ta; /* operands */ + u_int fmt; + u_int error = 0; + u_int status; + u_int fpu_type_flags; + union { + double dbl; + float flt; + struct { u_int i1; u_int i2; } ints; + } mtmp, atmp; + + + status = fpregs[0]; /* use a local copy of status reg */ + fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS]; /* get fpu type flags */ + fmt = extru(ir, fpmultifmt, 1); /* get sgl/dbl flag */ + if (fmt == 0) { /* DBL */ + rm1 = extru(ir, fprm1pos, 5) * sizeof(double)/sizeof(u_int); + if (rm1 == 0) + rm1 = fpzeroreg; + rm2 = extru(ir, fprm2pos, 5) * sizeof(double)/sizeof(u_int); + if (rm2 == 0) + rm2 = fpzeroreg; + tm = extru(ir, fptmpos, 5) * sizeof(double)/sizeof(u_int); + if (tm == 0) + return(MAJOR_06_EXCP); + ra = extru(ir, fprapos, 5) * sizeof(double)/sizeof(u_int); + ta = extru(ir, fptapos, 5) * sizeof(double)/sizeof(u_int); + if (ta == 0) + return(MAJOR_06_EXCP); + + if (fpu_type_flags & TIMEX_ROLEX_FPU_MASK) { + + if (ra == 0) { + /* special case FMPYCFXT, see sgl case below */ + if (dbl_fmpy(&fpregs[rm1],&fpregs[rm2], + &mtmp.ints.i1,&status)) + error = 1; + if (dbl_to_sgl_fcnvfxt(&fpregs[ta], + &atmp.ints.i1,&atmp.ints.i1,&status)) + error = 1; + } + else { + + if (dbl_fmpy(&fpregs[rm1],&fpregs[rm2],&mtmp.ints.i1, + &status)) + error = 1; + if (dbl_fadd(&fpregs[ta], &fpregs[ra], &atmp.ints.i1, + &status)) + error = 1; + } + } + + else + + { + if (ra == 0) + ra = fpzeroreg; + + if (dbl_fmpy(&fpregs[rm1],&fpregs[rm2],&mtmp.ints.i1, + &status)) + error = 1; + if (dbl_fadd(&fpregs[ta], &fpregs[ra], &atmp.ints.i1, + &status)) + error = 1; + + } + + if (error) + return(MAJOR_06_EXCP); + else { + /* copy results */ + fpregs[tm] = mtmp.ints.i1; + fpregs[tm+1] = mtmp.ints.i2; + fpregs[ta] = atmp.ints.i1; + fpregs[ta+1] = atmp.ints.i2; + fpregs[0] = status; + return(NOEXCEPTION); + } + } + else { /* SGL */ + /* + * calculate offsets for single precision numbers + * See table 6-14 in PA-89 architecture for mapping + */ + rm1 = (extru(ir,fprm1pos,4) | 0x10 ) << 1; /* get offset */ + rm1 |= extru(ir,fprm1pos-4,1); /* add right word offset */ + + rm2 = (extru(ir,fprm2pos,4) | 0x10 ) << 1; /* get offset */ + rm2 |= extru(ir,fprm2pos-4,1); /* add right word offset */ + + tm = (extru(ir,fptmpos,4) | 0x10 ) << 1; /* get offset */ + tm |= extru(ir,fptmpos-4,1); /* add right word offset */ + + ra = (extru(ir,fprapos,4) | 0x10 ) << 1; /* get offset */ + ra |= extru(ir,fprapos-4,1); /* add right word offset */ + + ta = (extru(ir,fptapos,4) | 0x10 ) << 1; /* get offset */ + ta |= extru(ir,fptapos-4,1); /* add right word offset */ + + if (ra == 0x20 &&(fpu_type_flags & TIMEX_ROLEX_FPU_MASK)) { + /* special case FMPYCFXT (really 0) + * This instruction is only present on the Timex and + * Rolex fpu's in so if it is the special case and + * one of these fpu's we run the FMPYCFXT instruction + */ + if (sgl_fmpy(&fpregs[rm1],&fpregs[rm2],&mtmp.ints.i1, + &status)) + error = 1; + if (sgl_to_sgl_fcnvfxt(&fpregs[ta],&atmp.ints.i1, + &atmp.ints.i1,&status)) + error = 1; + } + else { + if (sgl_fmpy(&fpregs[rm1],&fpregs[rm2],&mtmp.ints.i1, + &status)) + error = 1; + if (sgl_fadd(&fpregs[ta], &fpregs[ra], &atmp.ints.i1, + &status)) + error = 1; + } + if (error) + return(MAJOR_06_EXCP); + else { + /* copy results */ + fpregs[tm] = mtmp.ints.i1; + fpregs[ta] = atmp.ints.i1; + fpregs[0] = status; + return(NOEXCEPTION); + } + } +} + +/* + * routine to decode the 26 (FMPYSUB) instruction + */ +static u_int +decode_26(ir,fpregs) +u_int ir; +u_int fpregs[]; +{ + u_int rm1, rm2, tm, ra, ta; /* operands */ + u_int fmt; + u_int error = 0; + u_int status; + union { + double dbl; + float flt; + struct { u_int i1; u_int i2; } ints; + } mtmp, atmp; + + + status = fpregs[0]; + fmt = extru(ir, fpmultifmt, 1); /* get sgl/dbl flag */ + if (fmt == 0) { /* DBL */ + rm1 = extru(ir, fprm1pos, 5) * sizeof(double)/sizeof(u_int); + if (rm1 == 0) + rm1 = fpzeroreg; + rm2 = extru(ir, fprm2pos, 5) * sizeof(double)/sizeof(u_int); + if (rm2 == 0) + rm2 = fpzeroreg; + tm = extru(ir, fptmpos, 5) * sizeof(double)/sizeof(u_int); + if (tm == 0) + return(MAJOR_26_EXCP); + ra = extru(ir, fprapos, 5) * sizeof(double)/sizeof(u_int); + if (ra == 0) + return(MAJOR_26_EXCP); + ta = extru(ir, fptapos, 5) * sizeof(double)/sizeof(u_int); + if (ta == 0) + return(MAJOR_26_EXCP); + + if (dbl_fmpy(&fpregs[rm1],&fpregs[rm2],&mtmp.ints.i1,&status)) + error = 1; + if (dbl_fsub(&fpregs[ta], &fpregs[ra], &atmp.ints.i1,&status)) + error = 1; + if (error) + return(MAJOR_26_EXCP); + else { + /* copy results */ + fpregs[tm] = mtmp.ints.i1; + fpregs[tm+1] = mtmp.ints.i2; + fpregs[ta] = atmp.ints.i1; + fpregs[ta+1] = atmp.ints.i2; + fpregs[0] = status; + return(NOEXCEPTION); + } + } + else { /* SGL */ + /* + * calculate offsets for single precision numbers + * See table 6-14 in PA-89 architecture for mapping + */ + rm1 = (extru(ir,fprm1pos,4) | 0x10 ) << 1; /* get offset */ + rm1 |= extru(ir,fprm1pos-4,1); /* add right word offset */ + + rm2 = (extru(ir,fprm2pos,4) | 0x10 ) << 1; /* get offset */ + rm2 |= extru(ir,fprm2pos-4,1); /* add right word offset */ + + tm = (extru(ir,fptmpos,4) | 0x10 ) << 1; /* get offset */ + tm |= extru(ir,fptmpos-4,1); /* add right word offset */ + + ra = (extru(ir,fprapos,4) | 0x10 ) << 1; /* get offset */ + ra |= extru(ir,fprapos-4,1); /* add right word offset */ + + ta = (extru(ir,fptapos,4) | 0x10 ) << 1; /* get offset */ + ta |= extru(ir,fptapos-4,1); /* add right word offset */ + + if (sgl_fmpy(&fpregs[rm1],&fpregs[rm2],&mtmp.ints.i1,&status)) + error = 1; + if (sgl_fsub(&fpregs[ta], &fpregs[ra], &atmp.ints.i1,&status)) + error = 1; + if (error) + return(MAJOR_26_EXCP); + else { + /* copy results */ + fpregs[tm] = mtmp.ints.i1; + fpregs[ta] = atmp.ints.i1; + fpregs[0] = status; + return(NOEXCEPTION); + } + } + +} + +/* + * routine to decode the 2E (FMPYFADD,FMPYNFADD) instructions + */ +static u_int +decode_2e(ir,fpregs) +u_int ir; +u_int fpregs[]; +{ + u_int rm1, rm2, ra, t; /* operands */ + u_int fmt; + + fmt = extru(ir,fpfmtpos,1); /* get fmt completer */ + if (fmt == DBL) { /* DBL */ + rm1 = extru(ir,fprm1pos,5) * sizeof(double)/sizeof(u_int); + if (rm1 == 0) + rm1 = fpzeroreg; + rm2 = extru(ir,fprm2pos,5) * sizeof(double)/sizeof(u_int); + if (rm2 == 0) + rm2 = fpzeroreg; + ra = ((extru(ir,fpraupos,3)<<2)|(extru(ir,fpralpos,3)>>1)) * + sizeof(double)/sizeof(u_int); + if (ra == 0) + ra = fpzeroreg; + t = extru(ir,fptpos,5) * sizeof(double)/sizeof(u_int); + if (t == 0) + return(MAJOR_2E_EXCP); + + if (extru(ir,fpfusedsubop,1)) { /* fmpyfadd or fmpynfadd? */ + return(dbl_fmpynfadd(&fpregs[rm1], &fpregs[rm2], + &fpregs[ra], &fpregs[0], &fpregs[t])); + } else { + return(dbl_fmpyfadd(&fpregs[rm1], &fpregs[rm2], + &fpregs[ra], &fpregs[0], &fpregs[t])); + } + } /* end DBL */ + else { /* SGL */ + rm1 = (extru(ir,fprm1pos,5)<<1)|(extru(ir,fpxrm1pos,1)); + if (rm1 == 0) + rm1 = fpzeroreg; + rm2 = (extru(ir,fprm2pos,5)<<1)|(extru(ir,fpxrm2pos,1)); + if (rm2 == 0) + rm2 = fpzeroreg; + ra = (extru(ir,fpraupos,3)<<3)|extru(ir,fpralpos,3); + if (ra == 0) + ra = fpzeroreg; + t = ((extru(ir,fptpos,5)<<1)|(extru(ir,fpxtpos,1))); + if (t == 0) + return(MAJOR_2E_EXCP); + + if (extru(ir,fpfusedsubop,1)) { /* fmpyfadd or fmpynfadd? */ + return(sgl_fmpynfadd(&fpregs[rm1], &fpregs[rm2], + &fpregs[ra], &fpregs[0], &fpregs[t])); + } else { + return(sgl_fmpyfadd(&fpregs[rm1], &fpregs[rm2], + &fpregs[ra], &fpregs[0], &fpregs[t])); + } + } /* end SGL */ +} + +/* + * update_status_cbit + * + * This routine returns the correct FP status register value in + * *status, based on the C-bit & V-bit returned by the FCMP + * emulation routine in new_status. The architecture type + * (PA83, PA89 or PA2.0) is available in fpu_type. The y_field + * and the architecture type are used to determine what flavor + * of FCMP is being emulated. + */ +static void +update_status_cbit(status, new_status, fpu_type, y_field) +u_int *status, new_status; +u_int fpu_type; +u_int y_field; +{ + /* + * For PA89 FPU's which implement the Compare Queue and + * for PA2.0 FPU's, update the Compare Queue if the y-field = 0, + * otherwise update the specified bit in the Compare Array. + * Note that the y-field will always be 0 for non-PA2.0 FPU's. + */ + if ((fpu_type & TIMEX_EXTEN_FLAG) || + (fpu_type & ROLEX_EXTEN_FLAG) || + (fpu_type & PA2_0_FPU_FLAG)) { + if (y_field == 0) { + *status = ((*status & 0x04000000) >> 5) | /* old Cbit */ + ((*status & 0x003ff000) >> 1) | /* old CQ */ + (new_status & 0xffc007ff); /* all other bits*/ + } else { + *status = (*status & 0x04000000) | /* old Cbit */ + ((new_status & 0x04000000) >> (y_field+4)) | + (new_status & ~0x04000000 & /* other bits */ + ~(0x04000000 >> (y_field+4))); + } + } + /* if PA83, just update the C-bit */ + else { + *status = new_status; + } +} diff --git a/kernel/arch/parisc/math-emu/frnd.c b/kernel/arch/parisc/math-emu/frnd.c new file mode 100644 index 000000000..904b3844b --- /dev/null +++ b/kernel/arch/parisc/math-emu/frnd.c @@ -0,0 +1,252 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * Purpose: + * Single Floating-point Round to Integer + * Double Floating-point Round to Integer + * Quad Floating-point Round to Integer (returns unimplemented) + * + * External Interfaces: + * dbl_frnd(srcptr,nullptr,dstptr,status) + * sgl_frnd(srcptr,nullptr,dstptr,status) + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" +#include "dbl_float.h" +#include "cnv_float.h" + +/* + * Single Floating-point Round to Integer + */ + +/*ARGSUSED*/ +int +sgl_frnd(sgl_floating_point *srcptr, + unsigned int *nullptr, + sgl_floating_point *dstptr, + unsigned int *status) +{ + register unsigned int src, result; + register int src_exponent; + register boolean inexact = FALSE; + + src = *srcptr; + /* + * check source operand for NaN or infinity + */ + if ((src_exponent = Sgl_exponent(src)) == SGL_INFINITY_EXPONENT) { + /* + * is signaling NaN? + */ + if (Sgl_isone_signaling(src)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(src); + } + /* + * return quiet NaN or infinity + */ + *dstptr = src; + return(NOEXCEPTION); + } + /* + * Need to round? + */ + if ((src_exponent -= SGL_BIAS) >= SGL_P - 1) { + *dstptr = src; + return(NOEXCEPTION); + } + /* + * Generate result + */ + if (src_exponent >= 0) { + Sgl_clear_exponent_set_hidden(src); + result = src; + Sgl_rightshift(result,(SGL_P-1) - (src_exponent)); + /* check for inexact */ + if (Sgl_isinexact_to_fix(src,src_exponent)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(src)) Sgl_increment(result); + break; + case ROUNDMINUS: + if (Sgl_isone_sign(src)) Sgl_increment(result); + break; + case ROUNDNEAREST: + if (Sgl_isone_roundbit(src,src_exponent)) + if (Sgl_isone_stickybit(src,src_exponent) + || (Sgl_isone_lowmantissa(result))) + Sgl_increment(result); + } + } + Sgl_leftshift(result,(SGL_P-1) - (src_exponent)); + if (Sgl_isone_hiddenoverflow(result)) + Sgl_set_exponent(result,src_exponent + (SGL_BIAS+1)); + else Sgl_set_exponent(result,src_exponent + SGL_BIAS); + } + else { + result = src; /* set sign */ + Sgl_setzero_exponentmantissa(result); + /* check for inexact */ + if (Sgl_isnotzero_exponentmantissa(src)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(src)) + Sgl_set_exponent(result,SGL_BIAS); + break; + case ROUNDMINUS: + if (Sgl_isone_sign(src)) + Sgl_set_exponent(result,SGL_BIAS); + break; + case ROUNDNEAREST: + if (src_exponent == -1) + if (Sgl_isnotzero_mantissa(src)) + Sgl_set_exponent(result,SGL_BIAS); + } + } + } + *dstptr = result; + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} + +/* + * Double Floating-point Round to Integer + */ + +/*ARGSUSED*/ +int +dbl_frnd( + dbl_floating_point *srcptr, + unsigned int *nullptr, + dbl_floating_point *dstptr, + unsigned int *status) +{ + register unsigned int srcp1, srcp2, resultp1, resultp2; + register int src_exponent; + register boolean inexact = FALSE; + + Dbl_copyfromptr(srcptr,srcp1,srcp2); + /* + * check source operand for NaN or infinity + */ + if ((src_exponent = Dbl_exponent(srcp1)) == DBL_INFINITY_EXPONENT) { + /* + * is signaling NaN? + */ + if (Dbl_isone_signaling(srcp1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Dbl_set_quiet(srcp1); + } + /* + * return quiet NaN or infinity + */ + Dbl_copytoptr(srcp1,srcp2,dstptr); + return(NOEXCEPTION); + } + /* + * Need to round? + */ + if ((src_exponent -= DBL_BIAS) >= DBL_P - 1) { + Dbl_copytoptr(srcp1,srcp2,dstptr); + return(NOEXCEPTION); + } + /* + * Generate result + */ + if (src_exponent >= 0) { + Dbl_clear_exponent_set_hidden(srcp1); + resultp1 = srcp1; + resultp2 = srcp2; + Dbl_rightshift(resultp1,resultp2,(DBL_P-1) - (src_exponent)); + /* check for inexact */ + if (Dbl_isinexact_to_fix(srcp1,srcp2,src_exponent)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(srcp1)) + Dbl_increment(resultp1,resultp2); + break; + case ROUNDMINUS: + if (Dbl_isone_sign(srcp1)) + Dbl_increment(resultp1,resultp2); + break; + case ROUNDNEAREST: + if (Dbl_isone_roundbit(srcp1,srcp2,src_exponent)) + if (Dbl_isone_stickybit(srcp1,srcp2,src_exponent) + || (Dbl_isone_lowmantissap2(resultp2))) + Dbl_increment(resultp1,resultp2); + } + } + Dbl_leftshift(resultp1,resultp2,(DBL_P-1) - (src_exponent)); + if (Dbl_isone_hiddenoverflow(resultp1)) + Dbl_set_exponent(resultp1,src_exponent + (DBL_BIAS+1)); + else Dbl_set_exponent(resultp1,src_exponent + DBL_BIAS); + } + else { + resultp1 = srcp1; /* set sign */ + Dbl_setzero_exponentmantissa(resultp1,resultp2); + /* check for inexact */ + if (Dbl_isnotzero_exponentmantissa(srcp1,srcp2)) { + inexact = TRUE; + /* round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Dbl_iszero_sign(srcp1)) + Dbl_set_exponent(resultp1,DBL_BIAS); + break; + case ROUNDMINUS: + if (Dbl_isone_sign(srcp1)) + Dbl_set_exponent(resultp1,DBL_BIAS); + break; + case ROUNDNEAREST: + if (src_exponent == -1) + if (Dbl_isnotzero_mantissa(srcp1,srcp2)) + Dbl_set_exponent(resultp1,DBL_BIAS); + } + } + } + Dbl_copytoptr(resultp1,resultp2,dstptr); + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/hppa.h b/kernel/arch/parisc/math-emu/hppa.h new file mode 100644 index 000000000..5d3d52f73 --- /dev/null +++ b/kernel/arch/parisc/math-emu/hppa.h @@ -0,0 +1,42 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifdef __NO_PA_HDRS + PA header file -- do not include this header file for non-PA builds. +#endif + + +/* amount is assumed to be a constant between 0 and 32 (non-inclusive) */ +#define Shiftdouble(left,right,amount,dest) \ + /* int left, right, amount, dest; */ \ + dest = ((left) << (32-(amount))) | ((unsigned int)(right) >> (amount)) + +/* amount must be less than 32 */ +#define Variableshiftdouble(left,right,amount,dest) \ + /* unsigned int left, right; int amount, dest; */ \ + if (amount == 0) dest = right; \ + else dest = ((((unsigned) left)&0x7fffffff) << (32-(amount))) | \ + ((unsigned) right >> (amount)) + +/* amount must be between 0 and 32 (non-inclusive) */ +#define Variable_shift_double(left,right,amount,dest) \ + /* unsigned int left, right; int amount, dest; */ \ + dest = (left << (32-(amount))) | ((unsigned) right >> (amount)) diff --git a/kernel/arch/parisc/math-emu/math-emu.h b/kernel/arch/parisc/math-emu/math-emu.h new file mode 100644 index 000000000..3a99f5929 --- /dev/null +++ b/kernel/arch/parisc/math-emu/math-emu.h @@ -0,0 +1,27 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _PARISC_MATH_EMU_H +#define _PARISC_MATH_EMU_H + +#include <asm/ptrace.h> +extern int handle_fpe(struct pt_regs *regs); + +#endif diff --git a/kernel/arch/parisc/math-emu/sfadd.c b/kernel/arch/parisc/math-emu/sfadd.c new file mode 100644 index 000000000..f802cd6c7 --- /dev/null +++ b/kernel/arch/parisc/math-emu/sfadd.c @@ -0,0 +1,518 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/sfadd.c $Revision: 1.1 $ + * + * Purpose: + * Single_add: add two single precision values. + * + * External Interfaces: + * sgl_fadd(leftptr, rightptr, dstptr, status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" + +/* + * Single_add: add two single precision values. + */ +int +sgl_fadd( + sgl_floating_point *leftptr, + sgl_floating_point *rightptr, + sgl_floating_point *dstptr, + unsigned int *status) + { + register unsigned int left, right, result, extent; + register unsigned int signless_upper_left, signless_upper_right, save; + + + register int result_exponent, right_exponent, diff_exponent; + register int sign_save, jumpsize; + register boolean inexact = FALSE; + register boolean underflowtrap; + + /* Create local copies of the numbers */ + left = *leftptr; + right = *rightptr; + + /* A zero "save" helps discover equal operands (for later), * + * and is used in swapping operands (if needed). */ + Sgl_xortointp1(left,right,/*to*/save); + + /* + * check first operand for NaN's or infinity + */ + if ((result_exponent = Sgl_exponent(left)) == SGL_INFINITY_EXPONENT) + { + if (Sgl_iszero_mantissa(left)) + { + if (Sgl_isnotnan(right)) + { + if (Sgl_isinfinity(right) && save!=0) + { + /* + * invalid since operands are opposite signed infinity's + */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * return infinity + */ + *dstptr = left; + return(NOEXCEPTION); + } + } + else + { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(left)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(left); + } + /* + * is second operand a signaling NaN? + */ + else if (Sgl_is_signalingnan(right)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(right); + *dstptr = right; + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + *dstptr = left; + return(NOEXCEPTION); + } + } /* End left NaN or Infinity processing */ + /* + * check second operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(right)) + { + if (Sgl_iszero_mantissa(right)) + { + /* return infinity */ + *dstptr = right; + return(NOEXCEPTION); + } + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(right)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(right); + } + /* + * return quiet NaN + */ + *dstptr = right; + return(NOEXCEPTION); + } /* End right NaN or Infinity processing */ + + /* Invariant: Must be dealing with finite numbers */ + + /* Compare operands by removing the sign */ + Sgl_copytoint_exponentmantissa(left,signless_upper_left); + Sgl_copytoint_exponentmantissa(right,signless_upper_right); + + /* sign difference selects add or sub operation. */ + if(Sgl_ismagnitudeless(signless_upper_left,signless_upper_right)) + { + /* Set the left operand to the larger one by XOR swap * + * First finish the first word using "save" */ + Sgl_xorfromintp1(save,right,/*to*/right); + Sgl_xorfromintp1(save,left,/*to*/left); + result_exponent = Sgl_exponent(left); + } + /* Invariant: left is not smaller than right. */ + + if((right_exponent = Sgl_exponent(right)) == 0) + { + /* Denormalized operands. First look for zeroes */ + if(Sgl_iszero_mantissa(right)) + { + /* right is zero */ + if(Sgl_iszero_exponentmantissa(left)) + { + /* Both operands are zeros */ + if(Is_rounding_mode(ROUNDMINUS)) + { + Sgl_or_signs(left,/*with*/right); + } + else + { + Sgl_and_signs(left,/*with*/right); + } + } + else + { + /* Left is not a zero and must be the result. Trapped + * underflows are signaled if left is denormalized. Result + * is always exact. */ + if( (result_exponent == 0) && Is_underflowtrap_enabled() ) + { + /* need to normalize results mantissa */ + sign_save = Sgl_signextendedsign(left); + Sgl_leftshiftby1(left); + Sgl_normalize(left,result_exponent); + Sgl_set_sign(left,/*using*/sign_save); + Sgl_setwrapped_exponent(left,result_exponent,unfl); + *dstptr = left; + return(UNDERFLOWEXCEPTION); + } + } + *dstptr = left; + return(NOEXCEPTION); + } + + /* Neither are zeroes */ + Sgl_clear_sign(right); /* Exponent is already cleared */ + if(result_exponent == 0 ) + { + /* Both operands are denormalized. The result must be exact + * and is simply calculated. A sum could become normalized and a + * difference could cancel to a true zero. */ + if( (/*signed*/int) save < 0 ) + { + Sgl_subtract(left,/*minus*/right,/*into*/result); + if(Sgl_iszero_mantissa(result)) + { + if(Is_rounding_mode(ROUNDMINUS)) + { + Sgl_setone_sign(result); + } + else + { + Sgl_setzero_sign(result); + } + *dstptr = result; + return(NOEXCEPTION); + } + } + else + { + Sgl_addition(left,right,/*into*/result); + if(Sgl_isone_hidden(result)) + { + *dstptr = result; + return(NOEXCEPTION); + } + } + if(Is_underflowtrap_enabled()) + { + /* need to normalize result */ + sign_save = Sgl_signextendedsign(result); + Sgl_leftshiftby1(result); + Sgl_normalize(result,result_exponent); + Sgl_set_sign(result,/*using*/sign_save); + Sgl_setwrapped_exponent(result,result_exponent,unfl); + *dstptr = result; + return(UNDERFLOWEXCEPTION); + } + *dstptr = result; + return(NOEXCEPTION); + } + right_exponent = 1; /* Set exponent to reflect different bias + * with denomalized numbers. */ + } + else + { + Sgl_clear_signexponent_set_hidden(right); + } + Sgl_clear_exponent_set_hidden(left); + diff_exponent = result_exponent - right_exponent; + + /* + * Special case alignment of operands that would force alignment + * beyond the extent of the extension. A further optimization + * could special case this but only reduces the path length for this + * infrequent case. + */ + if(diff_exponent > SGL_THRESHOLD) + { + diff_exponent = SGL_THRESHOLD; + } + + /* Align right operand by shifting to right */ + Sgl_right_align(/*operand*/right,/*shifted by*/diff_exponent, + /*and lower to*/extent); + + /* Treat sum and difference of the operands separately. */ + if( (/*signed*/int) save < 0 ) + { + /* + * Difference of the two operands. Their can be no overflow. A + * borrow can occur out of the hidden bit and force a post + * normalization phase. + */ + Sgl_subtract_withextension(left,/*minus*/right,/*with*/extent,/*into*/result); + if(Sgl_iszero_hidden(result)) + { + /* Handle normalization */ + /* A straightforward algorithm would now shift the result + * and extension left until the hidden bit becomes one. Not + * all of the extension bits need participate in the shift. + * Only the two most significant bits (round and guard) are + * needed. If only a single shift is needed then the guard + * bit becomes a significant low order bit and the extension + * must participate in the rounding. If more than a single + * shift is needed, then all bits to the right of the guard + * bit are zeros, and the guard bit may or may not be zero. */ + sign_save = Sgl_signextendedsign(result); + Sgl_leftshiftby1_withextent(result,extent,result); + + /* Need to check for a zero result. The sign and exponent + * fields have already been zeroed. The more efficient test + * of the full object can be used. + */ + if(Sgl_iszero(result)) + /* Must have been "x-x" or "x+(-x)". */ + { + if(Is_rounding_mode(ROUNDMINUS)) Sgl_setone_sign(result); + *dstptr = result; + return(NOEXCEPTION); + } + result_exponent--; + /* Look to see if normalization is finished. */ + if(Sgl_isone_hidden(result)) + { + if(result_exponent==0) + { + /* Denormalized, exponent should be zero. Left operand * + * was normalized, so extent (guard, round) was zero */ + goto underflow; + } + else + { + /* No further normalization is needed. */ + Sgl_set_sign(result,/*using*/sign_save); + Ext_leftshiftby1(extent); + goto round; + } + } + + /* Check for denormalized, exponent should be zero. Left * + * operand was normalized, so extent (guard, round) was zero */ + if(!(underflowtrap = Is_underflowtrap_enabled()) && + result_exponent==0) goto underflow; + + /* Shift extension to complete one bit of normalization and + * update exponent. */ + Ext_leftshiftby1(extent); + + /* Discover first one bit to determine shift amount. Use a + * modified binary search. We have already shifted the result + * one position right and still not found a one so the remainder + * of the extension must be zero and simplifies rounding. */ + /* Scan bytes */ + while(Sgl_iszero_hiddenhigh7mantissa(result)) + { + Sgl_leftshiftby8(result); + if((result_exponent -= 8) <= 0 && !underflowtrap) + goto underflow; + } + /* Now narrow it down to the nibble */ + if(Sgl_iszero_hiddenhigh3mantissa(result)) + { + /* The lower nibble contains the normalizing one */ + Sgl_leftshiftby4(result); + if((result_exponent -= 4) <= 0 && !underflowtrap) + goto underflow; + } + /* Select case were first bit is set (already normalized) + * otherwise select the proper shift. */ + if((jumpsize = Sgl_hiddenhigh3mantissa(result)) > 7) + { + /* Already normalized */ + if(result_exponent <= 0) goto underflow; + Sgl_set_sign(result,/*using*/sign_save); + Sgl_set_exponent(result,/*using*/result_exponent); + *dstptr = result; + return(NOEXCEPTION); + } + Sgl_sethigh4bits(result,/*using*/sign_save); + switch(jumpsize) + { + case 1: + { + Sgl_leftshiftby3(result); + result_exponent -= 3; + break; + } + case 2: + case 3: + { + Sgl_leftshiftby2(result); + result_exponent -= 2; + break; + } + case 4: + case 5: + case 6: + case 7: + { + Sgl_leftshiftby1(result); + result_exponent -= 1; + break; + } + } + if(result_exponent > 0) + { + Sgl_set_exponent(result,/*using*/result_exponent); + *dstptr = result; + return(NOEXCEPTION); /* Sign bit is already set */ + } + /* Fixup potential underflows */ + underflow: + if(Is_underflowtrap_enabled()) + { + Sgl_set_sign(result,sign_save); + Sgl_setwrapped_exponent(result,result_exponent,unfl); + *dstptr = result; + /* inexact = FALSE; */ + return(UNDERFLOWEXCEPTION); + } + /* + * Since we cannot get an inexact denormalized result, + * we can now return. + */ + Sgl_right_align(result,/*by*/(1-result_exponent),extent); + Sgl_clear_signexponent(result); + Sgl_set_sign(result,sign_save); + *dstptr = result; + return(NOEXCEPTION); + } /* end if(hidden...)... */ + /* Fall through and round */ + } /* end if(save < 0)... */ + else + { + /* Add magnitudes */ + Sgl_addition(left,right,/*to*/result); + if(Sgl_isone_hiddenoverflow(result)) + { + /* Prenormalization required. */ + Sgl_rightshiftby1_withextent(result,extent,extent); + Sgl_arithrightshiftby1(result); + result_exponent++; + } /* end if hiddenoverflow... */ + } /* end else ...add magnitudes... */ + + /* Round the result. If the extension is all zeros,then the result is + * exact. Otherwise round in the correct direction. No underflow is + * possible. If a postnormalization is necessary, then the mantissa is + * all zeros so no shift is needed. */ + round: + if(Ext_isnotzero(extent)) + { + inexact = TRUE; + switch(Rounding_mode()) + { + case ROUNDNEAREST: /* The default. */ + if(Ext_isone_sign(extent)) + { + /* at least 1/2 ulp */ + if(Ext_isnotzero_lower(extent) || + Sgl_isone_lowmantissa(result)) + { + /* either exactly half way and odd or more than 1/2ulp */ + Sgl_increment(result); + } + } + break; + + case ROUNDPLUS: + if(Sgl_iszero_sign(result)) + { + /* Round up positive results */ + Sgl_increment(result); + } + break; + + case ROUNDMINUS: + if(Sgl_isone_sign(result)) + { + /* Round down negative results */ + Sgl_increment(result); + } + + case ROUNDZERO:; + /* truncate is simple */ + } /* end switch... */ + if(Sgl_isone_hiddenoverflow(result)) result_exponent++; + } + if(result_exponent == SGL_INFINITY_EXPONENT) + { + /* Overflow */ + if(Is_overflowtrap_enabled()) + { + Sgl_setwrapped_exponent(result,result_exponent,ovfl); + *dstptr = result; + if (inexact) + if (Is_inexacttrap_enabled()) + return(OVERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return(OVERFLOWEXCEPTION); + } + else + { + Set_overflowflag(); + inexact = TRUE; + Sgl_setoverflow(result); + } + } + else Sgl_set_exponent(result,result_exponent); + *dstptr = result; + if(inexact) + if(Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + return(NOEXCEPTION); + } diff --git a/kernel/arch/parisc/math-emu/sfcmp.c b/kernel/arch/parisc/math-emu/sfcmp.c new file mode 100644 index 000000000..1466fb46e --- /dev/null +++ b/kernel/arch/parisc/math-emu/sfcmp.c @@ -0,0 +1,155 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/sfcmp.c $Revision: 1.1 $ + * + * Purpose: + * sgl_cmp: compare two values + * + * External Interfaces: + * sgl_fcmp(leftptr, rightptr, cond, status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" + +/* + * sgl_cmp: compare two values + */ +int +sgl_fcmp (sgl_floating_point * leftptr, sgl_floating_point * rightptr, + unsigned int cond, unsigned int *status) + + /* The predicate to be tested */ + + { + register unsigned int left, right; + register int xorresult; + + /* Create local copies of the numbers */ + left = *leftptr; + right = *rightptr; + + /* + * Test for NaN + */ + if( (Sgl_exponent(left) == SGL_INFINITY_EXPONENT) + || (Sgl_exponent(right) == SGL_INFINITY_EXPONENT) ) + { + /* Check if a NaN is involved. Signal an invalid exception when + * comparing a signaling NaN or when comparing quiet NaNs and the + * low bit of the condition is set */ + if( ( (Sgl_exponent(left) == SGL_INFINITY_EXPONENT) + && Sgl_isnotzero_mantissa(left) + && (Exception(cond) || Sgl_isone_signaling(left))) + || + ( (Sgl_exponent(right) == SGL_INFINITY_EXPONENT) + && Sgl_isnotzero_mantissa(right) + && (Exception(cond) || Sgl_isone_signaling(right)) ) ) + { + if( Is_invalidtrap_enabled() ) { + Set_status_cbit(Unordered(cond)); + return(INVALIDEXCEPTION); + } + else Set_invalidflag(); + Set_status_cbit(Unordered(cond)); + return(NOEXCEPTION); + } + /* All the exceptional conditions are handled, now special case + NaN compares */ + else if( ((Sgl_exponent(left) == SGL_INFINITY_EXPONENT) + && Sgl_isnotzero_mantissa(left)) + || + ((Sgl_exponent(right) == SGL_INFINITY_EXPONENT) + && Sgl_isnotzero_mantissa(right)) ) + { + /* NaNs always compare unordered. */ + Set_status_cbit(Unordered(cond)); + return(NOEXCEPTION); + } + /* infinities will drop down to the normal compare mechanisms */ + } + /* First compare for unequal signs => less or greater or + * special equal case */ + Sgl_xortointp1(left,right,xorresult); + if( xorresult < 0 ) + { + /* left negative => less, left positive => greater. + * equal is possible if both operands are zeros. */ + if( Sgl_iszero_exponentmantissa(left) + && Sgl_iszero_exponentmantissa(right) ) + { + Set_status_cbit(Equal(cond)); + } + else if( Sgl_isone_sign(left) ) + { + Set_status_cbit(Lessthan(cond)); + } + else + { + Set_status_cbit(Greaterthan(cond)); + } + } + /* Signs are the same. Treat negative numbers separately + * from the positives because of the reversed sense. */ + else if( Sgl_all(left) == Sgl_all(right) ) + { + Set_status_cbit(Equal(cond)); + } + else if( Sgl_iszero_sign(left) ) + { + /* Positive compare */ + if( Sgl_all(left) < Sgl_all(right) ) + { + Set_status_cbit(Lessthan(cond)); + } + else + { + Set_status_cbit(Greaterthan(cond)); + } + } + else + { + /* Negative compare. Signed or unsigned compares + * both work the same. That distinction is only + * important when the sign bits differ. */ + if( Sgl_all(left) > Sgl_all(right) ) + { + Set_status_cbit(Lessthan(cond)); + } + else + { + Set_status_cbit(Greaterthan(cond)); + } + } + return(NOEXCEPTION); + } diff --git a/kernel/arch/parisc/math-emu/sfdiv.c b/kernel/arch/parisc/math-emu/sfdiv.c new file mode 100644 index 000000000..3e2a4d6da --- /dev/null +++ b/kernel/arch/parisc/math-emu/sfdiv.c @@ -0,0 +1,392 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/sfdiv.c $Revision: 1.1 $ + * + * Purpose: + * Single Precision Floating-point Divide + * + * External Interfaces: + * sgl_fdiv(srcptr1,srcptr2,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" + +/* + * Single Precision Floating-point Divide + */ + +int +sgl_fdiv (sgl_floating_point * srcptr1, sgl_floating_point * srcptr2, + sgl_floating_point * dstptr, unsigned int *status) +{ + register unsigned int opnd1, opnd2, opnd3, result; + register int dest_exponent, count; + register boolean inexact = FALSE, guardbit = FALSE, stickybit = FALSE; + boolean is_tiny; + + opnd1 = *srcptr1; + opnd2 = *srcptr2; + /* + * set sign bit of result + */ + if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) Sgl_setnegativezero(result); + else Sgl_setzero(result); + /* + * check first operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(opnd1)) { + if (Sgl_iszero_mantissa(opnd1)) { + if (Sgl_isnotnan(opnd2)) { + if (Sgl_isinfinity(opnd2)) { + /* + * invalid since both operands + * are infinity + */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * return infinity + */ + Sgl_setinfinity_exponentmantissa(result); + *dstptr = result; + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd1); + } + /* + * is second operand a signaling NaN? + */ + else if (Sgl_is_signalingnan(opnd2)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd2); + *dstptr = opnd2; + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + *dstptr = opnd1; + return(NOEXCEPTION); + } + } + /* + * check second operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(opnd2)) { + if (Sgl_iszero_mantissa(opnd2)) { + /* + * return zero + */ + Sgl_setzero_exponentmantissa(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd2)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd2); + } + /* + * return quiet NaN + */ + *dstptr = opnd2; + return(NOEXCEPTION); + } + /* + * check for division by zero + */ + if (Sgl_iszero_exponentmantissa(opnd2)) { + if (Sgl_iszero_exponentmantissa(opnd1)) { + /* invalid since both operands are zero */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(result); + *dstptr = result; + return(NOEXCEPTION); + } + if (Is_divisionbyzerotrap_enabled()) + return(DIVISIONBYZEROEXCEPTION); + Set_divisionbyzeroflag(); + Sgl_setinfinity_exponentmantissa(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * Generate exponent + */ + dest_exponent = Sgl_exponent(opnd1) - Sgl_exponent(opnd2) + SGL_BIAS; + + /* + * Generate mantissa + */ + if (Sgl_isnotzero_exponent(opnd1)) { + /* set hidden bit */ + Sgl_clear_signexponent_set_hidden(opnd1); + } + else { + /* check for zero */ + if (Sgl_iszero_mantissa(opnd1)) { + Sgl_setzero_exponentmantissa(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* is denormalized; want to normalize */ + Sgl_clear_signexponent(opnd1); + Sgl_leftshiftby1(opnd1); + Sgl_normalize(opnd1,dest_exponent); + } + /* opnd2 needs to have hidden bit set with msb in hidden bit */ + if (Sgl_isnotzero_exponent(opnd2)) { + Sgl_clear_signexponent_set_hidden(opnd2); + } + else { + /* is denormalized; want to normalize */ + Sgl_clear_signexponent(opnd2); + Sgl_leftshiftby1(opnd2); + while(Sgl_iszero_hiddenhigh7mantissa(opnd2)) { + Sgl_leftshiftby8(opnd2); + dest_exponent += 8; + } + if(Sgl_iszero_hiddenhigh3mantissa(opnd2)) { + Sgl_leftshiftby4(opnd2); + dest_exponent += 4; + } + while(Sgl_iszero_hidden(opnd2)) { + Sgl_leftshiftby1(opnd2); + dest_exponent += 1; + } + } + + /* Divide the source mantissas */ + + /* + * A non_restoring divide algorithm is used. + */ + Sgl_subtract(opnd1,opnd2,opnd1); + Sgl_setzero(opnd3); + for (count=1;count<=SGL_P && Sgl_all(opnd1);count++) { + Sgl_leftshiftby1(opnd1); + Sgl_leftshiftby1(opnd3); + if (Sgl_iszero_sign(opnd1)) { + Sgl_setone_lowmantissa(opnd3); + Sgl_subtract(opnd1,opnd2,opnd1); + } + else Sgl_addition(opnd1,opnd2,opnd1); + } + if (count <= SGL_P) { + Sgl_leftshiftby1(opnd3); + Sgl_setone_lowmantissa(opnd3); + Sgl_leftshift(opnd3,SGL_P-count); + if (Sgl_iszero_hidden(opnd3)) { + Sgl_leftshiftby1(opnd3); + dest_exponent--; + } + } + else { + if (Sgl_iszero_hidden(opnd3)) { + /* need to get one more bit of result */ + Sgl_leftshiftby1(opnd1); + Sgl_leftshiftby1(opnd3); + if (Sgl_iszero_sign(opnd1)) { + Sgl_setone_lowmantissa(opnd3); + Sgl_subtract(opnd1,opnd2,opnd1); + } + else Sgl_addition(opnd1,opnd2,opnd1); + dest_exponent--; + } + if (Sgl_iszero_sign(opnd1)) guardbit = TRUE; + stickybit = Sgl_all(opnd1); + } + inexact = guardbit | stickybit; + + /* + * round result + */ + if (inexact && (dest_exponent > 0 || Is_underflowtrap_enabled())) { + Sgl_clear_signexponent(opnd3); + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(result)) + Sgl_increment_mantissa(opnd3); + break; + case ROUNDMINUS: + if (Sgl_isone_sign(result)) + Sgl_increment_mantissa(opnd3); + break; + case ROUNDNEAREST: + if (guardbit) { + if (stickybit || Sgl_isone_lowmantissa(opnd3)) + Sgl_increment_mantissa(opnd3); + } + } + if (Sgl_isone_hidden(opnd3)) dest_exponent++; + } + Sgl_set_mantissa(result,opnd3); + + /* + * Test for overflow + */ + if (dest_exponent >= SGL_INFINITY_EXPONENT) { + /* trap if OVERFLOWTRAP enabled */ + if (Is_overflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Sgl_setwrapped_exponent(result,dest_exponent,ovfl); + *dstptr = result; + if (inexact) + if (Is_inexacttrap_enabled()) + return(OVERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return(OVERFLOWEXCEPTION); + } + Set_overflowflag(); + /* set result to infinity or largest number */ + Sgl_setoverflow(result); + inexact = TRUE; + } + /* + * Test for underflow + */ + else if (dest_exponent <= 0) { + /* trap if UNDERFLOWTRAP enabled */ + if (Is_underflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Sgl_setwrapped_exponent(result,dest_exponent,unfl); + *dstptr = result; + if (inexact) + if (Is_inexacttrap_enabled()) + return(UNDERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return(UNDERFLOWEXCEPTION); + } + + /* Determine if should set underflow flag */ + is_tiny = TRUE; + if (dest_exponent == 0 && inexact) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(result)) { + Sgl_increment(opnd3); + if (Sgl_isone_hiddenoverflow(opnd3)) + is_tiny = FALSE; + Sgl_decrement(opnd3); + } + break; + case ROUNDMINUS: + if (Sgl_isone_sign(result)) { + Sgl_increment(opnd3); + if (Sgl_isone_hiddenoverflow(opnd3)) + is_tiny = FALSE; + Sgl_decrement(opnd3); + } + break; + case ROUNDNEAREST: + if (guardbit && (stickybit || + Sgl_isone_lowmantissa(opnd3))) { + Sgl_increment(opnd3); + if (Sgl_isone_hiddenoverflow(opnd3)) + is_tiny = FALSE; + Sgl_decrement(opnd3); + } + break; + } + } + + /* + * denormalize result or set to signed zero + */ + stickybit = inexact; + Sgl_denormalize(opnd3,dest_exponent,guardbit,stickybit,inexact); + + /* return rounded number */ + if (inexact) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(result)) { + Sgl_increment(opnd3); + } + break; + case ROUNDMINUS: + if (Sgl_isone_sign(result)) { + Sgl_increment(opnd3); + } + break; + case ROUNDNEAREST: + if (guardbit && (stickybit || + Sgl_isone_lowmantissa(opnd3))) { + Sgl_increment(opnd3); + } + break; + } + if (is_tiny) Set_underflowflag(); + } + Sgl_set_exponentmantissa(result,opnd3); + } + else Sgl_set_exponent(result,dest_exponent); + *dstptr = result; + /* check for inexact */ + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/sfmpy.c b/kernel/arch/parisc/math-emu/sfmpy.c new file mode 100644 index 000000000..afa406983 --- /dev/null +++ b/kernel/arch/parisc/math-emu/sfmpy.c @@ -0,0 +1,380 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/sfmpy.c $Revision: 1.1 $ + * + * Purpose: + * Single Precision Floating-point Multiply + * + * External Interfaces: + * sgl_fmpy(srcptr1,srcptr2,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" + +/* + * Single Precision Floating-point Multiply + */ + +int +sgl_fmpy( + sgl_floating_point *srcptr1, + sgl_floating_point *srcptr2, + sgl_floating_point *dstptr, + unsigned int *status) +{ + register unsigned int opnd1, opnd2, opnd3, result; + register int dest_exponent, count; + register boolean inexact = FALSE, guardbit = FALSE, stickybit = FALSE; + boolean is_tiny; + + opnd1 = *srcptr1; + opnd2 = *srcptr2; + /* + * set sign bit of result + */ + if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) Sgl_setnegativezero(result); + else Sgl_setzero(result); + /* + * check first operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(opnd1)) { + if (Sgl_iszero_mantissa(opnd1)) { + if (Sgl_isnotnan(opnd2)) { + if (Sgl_iszero_exponentmantissa(opnd2)) { + /* + * invalid since operands are infinity + * and zero + */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * return infinity + */ + Sgl_setinfinity_exponentmantissa(result); + *dstptr = result; + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd1); + } + /* + * is second operand a signaling NaN? + */ + else if (Sgl_is_signalingnan(opnd2)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd2); + *dstptr = opnd2; + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + *dstptr = opnd1; + return(NOEXCEPTION); + } + } + /* + * check second operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(opnd2)) { + if (Sgl_iszero_mantissa(opnd2)) { + if (Sgl_iszero_exponentmantissa(opnd1)) { + /* invalid since operands are zero & infinity */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(opnd2); + *dstptr = opnd2; + return(NOEXCEPTION); + } + /* + * return infinity + */ + Sgl_setinfinity_exponentmantissa(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd2)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd2); + } + /* + * return quiet NaN + */ + *dstptr = opnd2; + return(NOEXCEPTION); + } + /* + * Generate exponent + */ + dest_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS; + + /* + * Generate mantissa + */ + if (Sgl_isnotzero_exponent(opnd1)) { + /* set hidden bit */ + Sgl_clear_signexponent_set_hidden(opnd1); + } + else { + /* check for zero */ + if (Sgl_iszero_mantissa(opnd1)) { + Sgl_setzero_exponentmantissa(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* is denormalized, adjust exponent */ + Sgl_clear_signexponent(opnd1); + Sgl_leftshiftby1(opnd1); + Sgl_normalize(opnd1,dest_exponent); + } + /* opnd2 needs to have hidden bit set with msb in hidden bit */ + if (Sgl_isnotzero_exponent(opnd2)) { + Sgl_clear_signexponent_set_hidden(opnd2); + } + else { + /* check for zero */ + if (Sgl_iszero_mantissa(opnd2)) { + Sgl_setzero_exponentmantissa(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* is denormalized; want to normalize */ + Sgl_clear_signexponent(opnd2); + Sgl_leftshiftby1(opnd2); + Sgl_normalize(opnd2,dest_exponent); + } + + /* Multiply two source mantissas together */ + + Sgl_leftshiftby4(opnd2); /* make room for guard bits */ + Sgl_setzero(opnd3); + /* + * Four bits at a time are inspected in each loop, and a + * simple shift and add multiply algorithm is used. + */ + for (count=1;count<SGL_P;count+=4) { + stickybit |= Slow4(opnd3); + Sgl_rightshiftby4(opnd3); + if (Sbit28(opnd1)) Sall(opnd3) += (Sall(opnd2) << 3); + if (Sbit29(opnd1)) Sall(opnd3) += (Sall(opnd2) << 2); + if (Sbit30(opnd1)) Sall(opnd3) += (Sall(opnd2) << 1); + if (Sbit31(opnd1)) Sall(opnd3) += Sall(opnd2); + Sgl_rightshiftby4(opnd1); + } + /* make sure result is left-justified */ + if (Sgl_iszero_sign(opnd3)) { + Sgl_leftshiftby1(opnd3); + } + else { + /* result mantissa >= 2. */ + dest_exponent++; + } + /* check for denormalized result */ + while (Sgl_iszero_sign(opnd3)) { + Sgl_leftshiftby1(opnd3); + dest_exponent--; + } + /* + * check for guard, sticky and inexact bits + */ + stickybit |= Sgl_all(opnd3) << (SGL_BITLENGTH - SGL_EXP_LENGTH + 1); + guardbit = Sbit24(opnd3); + inexact = guardbit | stickybit; + + /* re-align mantissa */ + Sgl_rightshiftby8(opnd3); + + /* + * round result + */ + if (inexact && (dest_exponent>0 || Is_underflowtrap_enabled())) { + Sgl_clear_signexponent(opnd3); + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(result)) + Sgl_increment(opnd3); + break; + case ROUNDMINUS: + if (Sgl_isone_sign(result)) + Sgl_increment(opnd3); + break; + case ROUNDNEAREST: + if (guardbit) { + if (stickybit || Sgl_isone_lowmantissa(opnd3)) + Sgl_increment(opnd3); + } + } + if (Sgl_isone_hidden(opnd3)) dest_exponent++; + } + Sgl_set_mantissa(result,opnd3); + + /* + * Test for overflow + */ + if (dest_exponent >= SGL_INFINITY_EXPONENT) { + /* trap if OVERFLOWTRAP enabled */ + if (Is_overflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Sgl_setwrapped_exponent(result,dest_exponent,ovfl); + *dstptr = result; + if (inexact) + if (Is_inexacttrap_enabled()) + return(OVERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return(OVERFLOWEXCEPTION); + } + inexact = TRUE; + Set_overflowflag(); + /* set result to infinity or largest number */ + Sgl_setoverflow(result); + } + /* + * Test for underflow + */ + else if (dest_exponent <= 0) { + /* trap if UNDERFLOWTRAP enabled */ + if (Is_underflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Sgl_setwrapped_exponent(result,dest_exponent,unfl); + *dstptr = result; + if (inexact) + if (Is_inexacttrap_enabled()) + return(UNDERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return(UNDERFLOWEXCEPTION); + } + + /* Determine if should set underflow flag */ + is_tiny = TRUE; + if (dest_exponent == 0 && inexact) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(result)) { + Sgl_increment(opnd3); + if (Sgl_isone_hiddenoverflow(opnd3)) + is_tiny = FALSE; + Sgl_decrement(opnd3); + } + break; + case ROUNDMINUS: + if (Sgl_isone_sign(result)) { + Sgl_increment(opnd3); + if (Sgl_isone_hiddenoverflow(opnd3)) + is_tiny = FALSE; + Sgl_decrement(opnd3); + } + break; + case ROUNDNEAREST: + if (guardbit && (stickybit || + Sgl_isone_lowmantissa(opnd3))) { + Sgl_increment(opnd3); + if (Sgl_isone_hiddenoverflow(opnd3)) + is_tiny = FALSE; + Sgl_decrement(opnd3); + } + break; + } + } + + /* + * denormalize result or set to signed zero + */ + stickybit = inexact; + Sgl_denormalize(opnd3,dest_exponent,guardbit,stickybit,inexact); + + /* return zero or smallest number */ + if (inexact) { + switch (Rounding_mode()) { + case ROUNDPLUS: + if (Sgl_iszero_sign(result)) { + Sgl_increment(opnd3); + } + break; + case ROUNDMINUS: + if (Sgl_isone_sign(result)) { + Sgl_increment(opnd3); + } + break; + case ROUNDNEAREST: + if (guardbit && (stickybit || + Sgl_isone_lowmantissa(opnd3))) { + Sgl_increment(opnd3); + } + break; + } + if (is_tiny) Set_underflowflag(); + } + Sgl_set_exponentmantissa(result,opnd3); + } + else Sgl_set_exponent(result,dest_exponent); + *dstptr = result; + + /* check for inexact */ + if (inexact) { + if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + } + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/sfrem.c b/kernel/arch/parisc/math-emu/sfrem.c new file mode 100644 index 000000000..3a1b7a34d --- /dev/null +++ b/kernel/arch/parisc/math-emu/sfrem.c @@ -0,0 +1,290 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/sfrem.c $Revision: 1.1 $ + * + * Purpose: + * Single Precision Floating-point Remainder + * + * External Interfaces: + * sgl_frem(srcptr1,srcptr2,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + + +#include "float.h" +#include "sgl_float.h" + +/* + * Single Precision Floating-point Remainder + */ + +int +sgl_frem (sgl_floating_point * srcptr1, sgl_floating_point * srcptr2, + sgl_floating_point * dstptr, unsigned int *status) +{ + register unsigned int opnd1, opnd2, result; + register int opnd1_exponent, opnd2_exponent, dest_exponent, stepcount; + register boolean roundup = FALSE; + + opnd1 = *srcptr1; + opnd2 = *srcptr2; + /* + * check first operand for NaN's or infinity + */ + if ((opnd1_exponent = Sgl_exponent(opnd1)) == SGL_INFINITY_EXPONENT) { + if (Sgl_iszero_mantissa(opnd1)) { + if (Sgl_isnotnan(opnd2)) { + /* invalid since first operand is infinity */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(result); + *dstptr = result; + return(NOEXCEPTION); + } + } + else { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd1)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd1); + } + /* + * is second operand a signaling NaN? + */ + else if (Sgl_is_signalingnan(opnd2)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) + return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd2); + *dstptr = opnd2; + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + *dstptr = opnd1; + return(NOEXCEPTION); + } + } + /* + * check second operand for NaN's or infinity + */ + if ((opnd2_exponent = Sgl_exponent(opnd2)) == SGL_INFINITY_EXPONENT) { + if (Sgl_iszero_mantissa(opnd2)) { + /* + * return first operand + */ + *dstptr = opnd1; + return(NOEXCEPTION); + } + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(opnd2)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(opnd2); + } + /* + * return quiet NaN + */ + *dstptr = opnd2; + return(NOEXCEPTION); + } + /* + * check second operand for zero + */ + if (Sgl_iszero_exponentmantissa(opnd2)) { + /* invalid since second operand is zero */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(result); + *dstptr = result; + return(NOEXCEPTION); + } + + /* + * get sign of result + */ + result = opnd1; + + /* + * check for denormalized operands + */ + if (opnd1_exponent == 0) { + /* check for zero */ + if (Sgl_iszero_mantissa(opnd1)) { + *dstptr = opnd1; + return(NOEXCEPTION); + } + /* normalize, then continue */ + opnd1_exponent = 1; + Sgl_normalize(opnd1,opnd1_exponent); + } + else { + Sgl_clear_signexponent_set_hidden(opnd1); + } + if (opnd2_exponent == 0) { + /* normalize, then continue */ + opnd2_exponent = 1; + Sgl_normalize(opnd2,opnd2_exponent); + } + else { + Sgl_clear_signexponent_set_hidden(opnd2); + } + + /* find result exponent and divide step loop count */ + dest_exponent = opnd2_exponent - 1; + stepcount = opnd1_exponent - opnd2_exponent; + + /* + * check for opnd1/opnd2 < 1 + */ + if (stepcount < 0) { + /* + * check for opnd1/opnd2 > 1/2 + * + * In this case n will round to 1, so + * r = opnd1 - opnd2 + */ + if (stepcount == -1 && Sgl_isgreaterthan(opnd1,opnd2)) { + Sgl_all(result) = ~Sgl_all(result); /* set sign */ + /* align opnd2 with opnd1 */ + Sgl_leftshiftby1(opnd2); + Sgl_subtract(opnd2,opnd1,opnd2); + /* now normalize */ + while (Sgl_iszero_hidden(opnd2)) { + Sgl_leftshiftby1(opnd2); + dest_exponent--; + } + Sgl_set_exponentmantissa(result,opnd2); + goto testforunderflow; + } + /* + * opnd1/opnd2 <= 1/2 + * + * In this case n will round to zero, so + * r = opnd1 + */ + Sgl_set_exponentmantissa(result,opnd1); + dest_exponent = opnd1_exponent; + goto testforunderflow; + } + + /* + * Generate result + * + * Do iterative subtract until remainder is less than operand 2. + */ + while (stepcount-- > 0 && Sgl_all(opnd1)) { + if (Sgl_isnotlessthan(opnd1,opnd2)) + Sgl_subtract(opnd1,opnd2,opnd1); + Sgl_leftshiftby1(opnd1); + } + /* + * Do last subtract, then determine which way to round if remainder + * is exactly 1/2 of opnd2 + */ + if (Sgl_isnotlessthan(opnd1,opnd2)) { + Sgl_subtract(opnd1,opnd2,opnd1); + roundup = TRUE; + } + if (stepcount > 0 || Sgl_iszero(opnd1)) { + /* division is exact, remainder is zero */ + Sgl_setzero_exponentmantissa(result); + *dstptr = result; + return(NOEXCEPTION); + } + + /* + * Check for cases where opnd1/opnd2 < n + * + * In this case the result's sign will be opposite that of + * opnd1. The mantissa also needs some correction. + */ + Sgl_leftshiftby1(opnd1); + if (Sgl_isgreaterthan(opnd1,opnd2)) { + Sgl_invert_sign(result); + Sgl_subtract((opnd2<<1),opnd1,opnd1); + } + /* check for remainder being exactly 1/2 of opnd2 */ + else if (Sgl_isequal(opnd1,opnd2) && roundup) { + Sgl_invert_sign(result); + } + + /* normalize result's mantissa */ + while (Sgl_iszero_hidden(opnd1)) { + dest_exponent--; + Sgl_leftshiftby1(opnd1); + } + Sgl_set_exponentmantissa(result,opnd1); + + /* + * Test for underflow + */ + testforunderflow: + if (dest_exponent <= 0) { + /* trap if UNDERFLOWTRAP enabled */ + if (Is_underflowtrap_enabled()) { + /* + * Adjust bias of result + */ + Sgl_setwrapped_exponent(result,dest_exponent,unfl); + *dstptr = result; + /* frem is always exact */ + return(UNDERFLOWEXCEPTION); + } + /* + * denormalize result or set to signed zero + */ + if (dest_exponent >= (1 - SGL_P)) { + Sgl_rightshift_exponentmantissa(result,1-dest_exponent); + } + else { + Sgl_setzero_exponentmantissa(result); + } + } + else Sgl_set_exponent(result,dest_exponent); + *dstptr = result; + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/sfsqrt.c b/kernel/arch/parisc/math-emu/sfsqrt.c new file mode 100644 index 000000000..4657a12c9 --- /dev/null +++ b/kernel/arch/parisc/math-emu/sfsqrt.c @@ -0,0 +1,187 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/sfsqrt.c $Revision: 1.1 $ + * + * Purpose: + * Single Floating-point Square Root + * + * External Interfaces: + * sgl_fsqrt(srcptr,nullptr,dstptr,status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" + +/* + * Single Floating-point Square Root + */ + +/*ARGSUSED*/ +unsigned int +sgl_fsqrt( + sgl_floating_point *srcptr, + unsigned int *nullptr, + sgl_floating_point *dstptr, + unsigned int *status) +{ + register unsigned int src, result; + register int src_exponent; + register unsigned int newbit, sum; + register boolean guardbit = FALSE, even_exponent; + + src = *srcptr; + /* + * check source operand for NaN or infinity + */ + if ((src_exponent = Sgl_exponent(src)) == SGL_INFINITY_EXPONENT) { + /* + * is signaling NaN? + */ + if (Sgl_isone_signaling(src)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(src); + } + /* + * Return quiet NaN or positive infinity. + * Fall through to negative test if negative infinity. + */ + if (Sgl_iszero_sign(src) || Sgl_isnotzero_mantissa(src)) { + *dstptr = src; + return(NOEXCEPTION); + } + } + + /* + * check for zero source operand + */ + if (Sgl_iszero_exponentmantissa(src)) { + *dstptr = src; + return(NOEXCEPTION); + } + + /* + * check for negative source operand + */ + if (Sgl_isone_sign(src)) { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_makequietnan(src); + *dstptr = src; + return(NOEXCEPTION); + } + + /* + * Generate result + */ + if (src_exponent > 0) { + even_exponent = Sgl_hidden(src); + Sgl_clear_signexponent_set_hidden(src); + } + else { + /* normalize operand */ + Sgl_clear_signexponent(src); + src_exponent++; + Sgl_normalize(src,src_exponent); + even_exponent = src_exponent & 1; + } + if (even_exponent) { + /* exponent is even */ + /* Add comment here. Explain why odd exponent needs correction */ + Sgl_leftshiftby1(src); + } + /* + * Add comment here. Explain following algorithm. + * + * Trust me, it works. + * + */ + Sgl_setzero(result); + newbit = 1 << SGL_P; + while (newbit && Sgl_isnotzero(src)) { + Sgl_addition(result,newbit,sum); + if(sum <= Sgl_all(src)) { + /* update result */ + Sgl_addition(result,(newbit<<1),result); + Sgl_subtract(src,sum,src); + } + Sgl_rightshiftby1(newbit); + Sgl_leftshiftby1(src); + } + /* correct exponent for pre-shift */ + if (even_exponent) { + Sgl_rightshiftby1(result); + } + + /* check for inexact */ + if (Sgl_isnotzero(src)) { + if (!even_exponent && Sgl_islessthan(result,src)) + Sgl_increment(result); + guardbit = Sgl_lowmantissa(result); + Sgl_rightshiftby1(result); + + /* now round result */ + switch (Rounding_mode()) { + case ROUNDPLUS: + Sgl_increment(result); + break; + case ROUNDNEAREST: + /* stickybit is always true, so guardbit + * is enough to determine rounding */ + if (guardbit) { + Sgl_increment(result); + } + break; + } + /* increment result exponent by 1 if mantissa overflowed */ + if (Sgl_isone_hiddenoverflow(result)) src_exponent+=2; + + if (Is_inexacttrap_enabled()) { + Sgl_set_exponent(result, + ((src_exponent-SGL_BIAS)>>1)+SGL_BIAS); + *dstptr = result; + return(INEXACTEXCEPTION); + } + else Set_inexactflag(); + } + else { + Sgl_rightshiftby1(result); + } + Sgl_set_exponent(result,((src_exponent-SGL_BIAS)>>1)+SGL_BIAS); + *dstptr = result; + return(NOEXCEPTION); +} diff --git a/kernel/arch/parisc/math-emu/sfsub.c b/kernel/arch/parisc/math-emu/sfsub.c new file mode 100644 index 000000000..5f90d0f31 --- /dev/null +++ b/kernel/arch/parisc/math-emu/sfsub.c @@ -0,0 +1,521 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * BEGIN_DESC + * + * File: + * @(#) pa/spmath/sfsub.c $Revision: 1.1 $ + * + * Purpose: + * Single_subtract: subtract two single precision values. + * + * External Interfaces: + * sgl_fsub(leftptr, rightptr, dstptr, status) + * + * Internal Interfaces: + * + * Theory: + * <<please update with a overview of the operation of this file>> + * + * END_DESC +*/ + + +#include "float.h" +#include "sgl_float.h" + +/* + * Single_subtract: subtract two single precision values. + */ +int +sgl_fsub( + sgl_floating_point *leftptr, + sgl_floating_point *rightptr, + sgl_floating_point *dstptr, + unsigned int *status) + { + register unsigned int left, right, result, extent; + register unsigned int signless_upper_left, signless_upper_right, save; + + register int result_exponent, right_exponent, diff_exponent; + register int sign_save, jumpsize; + register boolean inexact = FALSE, underflowtrap; + + /* Create local copies of the numbers */ + left = *leftptr; + right = *rightptr; + + /* A zero "save" helps discover equal operands (for later), * + * and is used in swapping operands (if needed). */ + Sgl_xortointp1(left,right,/*to*/save); + + /* + * check first operand for NaN's or infinity + */ + if ((result_exponent = Sgl_exponent(left)) == SGL_INFINITY_EXPONENT) + { + if (Sgl_iszero_mantissa(left)) + { + if (Sgl_isnotnan(right)) + { + if (Sgl_isinfinity(right) && save==0) + { + /* + * invalid since operands are same signed infinity's + */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + Set_invalidflag(); + Sgl_makequietnan(result); + *dstptr = result; + return(NOEXCEPTION); + } + /* + * return infinity + */ + *dstptr = left; + return(NOEXCEPTION); + } + } + else + { + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(left)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(left); + } + /* + * is second operand a signaling NaN? + */ + else if (Sgl_is_signalingnan(right)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(right); + *dstptr = right; + return(NOEXCEPTION); + } + /* + * return quiet NaN + */ + *dstptr = left; + return(NOEXCEPTION); + } + } /* End left NaN or Infinity processing */ + /* + * check second operand for NaN's or infinity + */ + if (Sgl_isinfinity_exponent(right)) + { + if (Sgl_iszero_mantissa(right)) + { + /* return infinity */ + Sgl_invert_sign(right); + *dstptr = right; + return(NOEXCEPTION); + } + /* + * is NaN; signaling or quiet? + */ + if (Sgl_isone_signaling(right)) + { + /* trap if INVALIDTRAP enabled */ + if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); + /* make NaN quiet */ + Set_invalidflag(); + Sgl_set_quiet(right); + } + /* + * return quiet NaN + */ + *dstptr = right; + return(NOEXCEPTION); + } /* End right NaN or Infinity processing */ + + /* Invariant: Must be dealing with finite numbers */ + + /* Compare operands by removing the sign */ + Sgl_copytoint_exponentmantissa(left,signless_upper_left); + Sgl_copytoint_exponentmantissa(right,signless_upper_right); + + /* sign difference selects sub or add operation. */ + if(Sgl_ismagnitudeless(signless_upper_left,signless_upper_right)) + { + /* Set the left operand to the larger one by XOR swap * + * First finish the first word using "save" */ + Sgl_xorfromintp1(save,right,/*to*/right); + Sgl_xorfromintp1(save,left,/*to*/left); + result_exponent = Sgl_exponent(left); + Sgl_invert_sign(left); + } + /* Invariant: left is not smaller than right. */ + + if((right_exponent = Sgl_exponent(right)) == 0) + { + /* Denormalized operands. First look for zeroes */ + if(Sgl_iszero_mantissa(right)) + { + /* right is zero */ + if(Sgl_iszero_exponentmantissa(left)) + { + /* Both operands are zeros */ + Sgl_invert_sign(right); + if(Is_rounding_mode(ROUNDMINUS)) + { + Sgl_or_signs(left,/*with*/right); + } + else + { + Sgl_and_signs(left,/*with*/right); + } + } + else + { + /* Left is not a zero and must be the result. Trapped + * underflows are signaled if left is denormalized. Result + * is always exact. */ + if( (result_exponent == 0) && Is_underflowtrap_enabled() ) + { + /* need to normalize results mantissa */ + sign_save = Sgl_signextendedsign(left); + Sgl_leftshiftby1(left); + Sgl_normalize(left,result_exponent); + Sgl_set_sign(left,/*using*/sign_save); + Sgl_setwrapped_exponent(left,result_exponent,unfl); + *dstptr = left; + /* inexact = FALSE */ + return(UNDERFLOWEXCEPTION); + } + } + *dstptr = left; + return(NOEXCEPTION); + } + + /* Neither are zeroes */ + Sgl_clear_sign(right); /* Exponent is already cleared */ + if(result_exponent == 0 ) + { + /* Both operands are denormalized. The result must be exact + * and is simply calculated. A sum could become normalized and a + * difference could cancel to a true zero. */ + if( (/*signed*/int) save >= 0 ) + { + Sgl_subtract(left,/*minus*/right,/*into*/result); + if(Sgl_iszero_mantissa(result)) + { + if(Is_rounding_mode(ROUNDMINUS)) + { + Sgl_setone_sign(result); + } + else + { + Sgl_setzero_sign(result); + } + *dstptr = result; + return(NOEXCEPTION); + } + } + else + { + Sgl_addition(left,right,/*into*/result); + if(Sgl_isone_hidden(result)) + { + *dstptr = result; + return(NOEXCEPTION); + } + } + if(Is_underflowtrap_enabled()) + { + /* need to normalize result */ + sign_save = Sgl_signextendedsign(result); + Sgl_leftshiftby1(result); + Sgl_normalize(result,result_exponent); + Sgl_set_sign(result,/*using*/sign_save); + Sgl_setwrapped_exponent(result,result_exponent,unfl); + *dstptr = result; + /* inexact = FALSE */ + return(UNDERFLOWEXCEPTION); + } + *dstptr = result; + return(NOEXCEPTION); + } + right_exponent = 1; /* Set exponent to reflect different bias + * with denomalized numbers. */ + } + else + { + Sgl_clear_signexponent_set_hidden(right); + } + Sgl_clear_exponent_set_hidden(left); + diff_exponent = result_exponent - right_exponent; + + /* + * Special case alignment of operands that would force alignment + * beyond the extent of the extension. A further optimization + * could special case this but only reduces the path length for this + * infrequent case. + */ + if(diff_exponent > SGL_THRESHOLD) + { + diff_exponent = SGL_THRESHOLD; + } + + /* Align right operand by shifting to right */ + Sgl_right_align(/*operand*/right,/*shifted by*/diff_exponent, + /*and lower to*/extent); + + /* Treat sum and difference of the operands separately. */ + if( (/*signed*/int) save >= 0 ) + { + /* + * Difference of the two operands. Their can be no overflow. A + * borrow can occur out of the hidden bit and force a post + * normalization phase. + */ + Sgl_subtract_withextension(left,/*minus*/right,/*with*/extent,/*into*/result); + if(Sgl_iszero_hidden(result)) + { + /* Handle normalization */ + /* A straightforward algorithm would now shift the result + * and extension left until the hidden bit becomes one. Not + * all of the extension bits need participate in the shift. + * Only the two most significant bits (round and guard) are + * needed. If only a single shift is needed then the guard + * bit becomes a significant low order bit and the extension + * must participate in the rounding. If more than a single + * shift is needed, then all bits to the right of the guard + * bit are zeros, and the guard bit may or may not be zero. */ + sign_save = Sgl_signextendedsign(result); + Sgl_leftshiftby1_withextent(result,extent,result); + + /* Need to check for a zero result. The sign and exponent + * fields have already been zeroed. The more efficient test + * of the full object can be used. + */ + if(Sgl_iszero(result)) + /* Must have been "x-x" or "x+(-x)". */ + { + if(Is_rounding_mode(ROUNDMINUS)) Sgl_setone_sign(result); + *dstptr = result; + return(NOEXCEPTION); + } + result_exponent--; + /* Look to see if normalization is finished. */ + if(Sgl_isone_hidden(result)) + { + if(result_exponent==0) + { + /* Denormalized, exponent should be zero. Left operand * + * was normalized, so extent (guard, round) was zero */ + goto underflow; + } + else + { + /* No further normalization is needed. */ + Sgl_set_sign(result,/*using*/sign_save); + Ext_leftshiftby1(extent); + goto round; + } + } + + /* Check for denormalized, exponent should be zero. Left * + * operand was normalized, so extent (guard, round) was zero */ + if(!(underflowtrap = Is_underflowtrap_enabled()) && + result_exponent==0) goto underflow; + + /* Shift extension to complete one bit of normalization and + * update exponent. */ + Ext_leftshiftby1(extent); + + /* Discover first one bit to determine shift amount. Use a + * modified binary search. We have already shifted the result + * one position right and still not found a one so the remainder + * of the extension must be zero and simplifies rounding. */ + /* Scan bytes */ + while(Sgl_iszero_hiddenhigh7mantissa(result)) + { + Sgl_leftshiftby8(result); + if((result_exponent -= 8) <= 0 && !underflowtrap) + goto underflow; + } + /* Now narrow it down to the nibble */ + if(Sgl_iszero_hiddenhigh3mantissa(result)) + { + /* The lower nibble contains the normalizing one */ + Sgl_leftshiftby4(result); + if((result_exponent -= 4) <= 0 && !underflowtrap) + goto underflow; + } + /* Select case were first bit is set (already normalized) + * otherwise select the proper shift. */ + if((jumpsize = Sgl_hiddenhigh3mantissa(result)) > 7) + { + /* Already normalized */ + if(result_exponent <= 0) goto underflow; + Sgl_set_sign(result,/*using*/sign_save); + Sgl_set_exponent(result,/*using*/result_exponent); + *dstptr = result; + return(NOEXCEPTION); + } + Sgl_sethigh4bits(result,/*using*/sign_save); + switch(jumpsize) + { + case 1: + { + Sgl_leftshiftby3(result); + result_exponent -= 3; + break; + } + case 2: + case 3: + { + Sgl_leftshiftby2(result); + result_exponent -= 2; + break; + } + case 4: + case 5: + case 6: + case 7: + { + Sgl_leftshiftby1(result); + result_exponent -= 1; + break; + } + } + if(result_exponent > 0) + { + Sgl_set_exponent(result,/*using*/result_exponent); + *dstptr = result; /* Sign bit is already set */ + return(NOEXCEPTION); + } + /* Fixup potential underflows */ + underflow: + if(Is_underflowtrap_enabled()) + { + Sgl_set_sign(result,sign_save); + Sgl_setwrapped_exponent(result,result_exponent,unfl); + *dstptr = result; + /* inexact = FALSE */ + return(UNDERFLOWEXCEPTION); + } + /* + * Since we cannot get an inexact denormalized result, + * we can now return. + */ + Sgl_right_align(result,/*by*/(1-result_exponent),extent); + Sgl_clear_signexponent(result); + Sgl_set_sign(result,sign_save); + *dstptr = result; + return(NOEXCEPTION); + } /* end if(hidden...)... */ + /* Fall through and round */ + } /* end if(save >= 0)... */ + else + { + /* Add magnitudes */ + Sgl_addition(left,right,/*to*/result); + if(Sgl_isone_hiddenoverflow(result)) + { + /* Prenormalization required. */ + Sgl_rightshiftby1_withextent(result,extent,extent); + Sgl_arithrightshiftby1(result); + result_exponent++; + } /* end if hiddenoverflow... */ + } /* end else ...sub magnitudes... */ + + /* Round the result. If the extension is all zeros,then the result is + * exact. Otherwise round in the correct direction. No underflow is + * possible. If a postnormalization is necessary, then the mantissa is + * all zeros so no shift is needed. */ + round: + if(Ext_isnotzero(extent)) + { + inexact = TRUE; + switch(Rounding_mode()) + { + case ROUNDNEAREST: /* The default. */ + if(Ext_isone_sign(extent)) + { + /* at least 1/2 ulp */ + if(Ext_isnotzero_lower(extent) || + Sgl_isone_lowmantissa(result)) + { + /* either exactly half way and odd or more than 1/2ulp */ + Sgl_increment(result); + } + } + break; + + case ROUNDPLUS: + if(Sgl_iszero_sign(result)) + { + /* Round up positive results */ + Sgl_increment(result); + } + break; + + case ROUNDMINUS: + if(Sgl_isone_sign(result)) + { + /* Round down negative results */ + Sgl_increment(result); + } + + case ROUNDZERO:; + /* truncate is simple */ + } /* end switch... */ + if(Sgl_isone_hiddenoverflow(result)) result_exponent++; + } + if(result_exponent == SGL_INFINITY_EXPONENT) + { + /* Overflow */ + if(Is_overflowtrap_enabled()) + { + Sgl_setwrapped_exponent(result,result_exponent,ovfl); + *dstptr = result; + if (inexact) + if (Is_inexacttrap_enabled()) + return(OVERFLOWEXCEPTION | INEXACTEXCEPTION); + else Set_inexactflag(); + return(OVERFLOWEXCEPTION); + } + else + { + Set_overflowflag(); + inexact = TRUE; + Sgl_setoverflow(result); + } + } + else Sgl_set_exponent(result,result_exponent); + *dstptr = result; + if(inexact) + if(Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); + else Set_inexactflag(); + return(NOEXCEPTION); + } diff --git a/kernel/arch/parisc/math-emu/sgl_float.h b/kernel/arch/parisc/math-emu/sgl_float.h new file mode 100644 index 000000000..4ee4cc95e --- /dev/null +++ b/kernel/arch/parisc/math-emu/sgl_float.h @@ -0,0 +1,486 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Floating-point emulation code + * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifdef __NO_PA_HDRS + PA header file -- do not include this header file for non-PA builds. +#endif + +/* 32-bit word grabbing functions */ +#define Sgl_firstword(value) Sall(value) +#define Sgl_secondword(value) dummy_location +#define Sgl_thirdword(value) dummy_location +#define Sgl_fourthword(value) dummy_location + +#define Sgl_sign(object) Ssign(object) +#define Sgl_exponent(object) Sexponent(object) +#define Sgl_signexponent(object) Ssignexponent(object) +#define Sgl_mantissa(object) Smantissa(object) +#define Sgl_exponentmantissa(object) Sexponentmantissa(object) +#define Sgl_all(object) Sall(object) + +/* sgl_and_signs ANDs the sign bits of each argument and puts the result + * into the first argument. sgl_or_signs ors those same sign bits */ +#define Sgl_and_signs( src1dst, src2) \ + Sall(src1dst) = (Sall(src2)|~((unsigned int)1<<31)) & Sall(src1dst) +#define Sgl_or_signs( src1dst, src2) \ + Sall(src1dst) = (Sall(src2)&((unsigned int)1<<31)) | Sall(src1dst) + +/* The hidden bit is always the low bit of the exponent */ +#define Sgl_clear_exponent_set_hidden(srcdst) Deposit_sexponent(srcdst,1) +#define Sgl_clear_signexponent_set_hidden(srcdst) \ + Deposit_ssignexponent(srcdst,1) +#define Sgl_clear_sign(srcdst) Sall(srcdst) &= ~((unsigned int)1<<31) +#define Sgl_clear_signexponent(srcdst) Sall(srcdst) &= 0x007fffff + +/* varamount must be less than 32 for the next three functions */ +#define Sgl_rightshift(srcdst, varamount) \ + Sall(srcdst) >>= varamount +#define Sgl_leftshift(srcdst, varamount) \ + Sall(srcdst) <<= varamount +#define Sgl_rightshift_exponentmantissa(srcdst, varamount) \ + Sall(srcdst) = \ + (Sexponentmantissa(srcdst) >> varamount) | \ + (Sall(srcdst) & ((unsigned int)1<<31)) + +#define Sgl_leftshiftby1_withextent(left,right,result) \ + Shiftdouble(Sall(left),Extall(right),31,Sall(result)) + +#define Sgl_rightshiftby1_withextent(left,right,dst) \ + Shiftdouble(Sall(left),Extall(right),1,Extall(right)) +#define Sgl_arithrightshiftby1(srcdst) \ + Sall(srcdst) = (int)Sall(srcdst) >> 1 + +/* Sign extend the sign bit with an integer destination */ +#define Sgl_signextendedsign(value) Ssignedsign(value) + +#define Sgl_isone_hidden(sgl_value) (Shidden(sgl_value)) +#define Sgl_increment(sgl_value) Sall(sgl_value) += 1 +#define Sgl_increment_mantissa(sgl_value) \ + Deposit_smantissa(sgl_value,sgl_value+1) +#define Sgl_decrement(sgl_value) Sall(sgl_value) -= 1 + +#define Sgl_isone_sign(sgl_value) (Is_ssign(sgl_value)!=0) +#define Sgl_isone_hiddenoverflow(sgl_value) \ + (Is_shiddenoverflow(sgl_value)!=0) +#define Sgl_isone_lowmantissa(sgl_value) (Is_slow(sgl_value)!=0) +#define Sgl_isone_signaling(sgl_value) (Is_ssignaling(sgl_value)!=0) +#define Sgl_is_signalingnan(sgl_value) (Ssignalingnan(sgl_value)==0x1ff) +#define Sgl_isnotzero(sgl_value) (Sall(sgl_value)!=0) +#define Sgl_isnotzero_hiddenhigh7mantissa(sgl_value) \ + (Shiddenhigh7mantissa(sgl_value)!=0) +#define Sgl_isnotzero_low4(sgl_value) (Slow4(sgl_value)!=0) +#define Sgl_isnotzero_exponent(sgl_value) (Sexponent(sgl_value)!=0) +#define Sgl_isnotzero_mantissa(sgl_value) (Smantissa(sgl_value)!=0) +#define Sgl_isnotzero_exponentmantissa(sgl_value) \ + (Sexponentmantissa(sgl_value)!=0) +#define Sgl_iszero(sgl_value) (Sall(sgl_value)==0) +#define Sgl_iszero_signaling(sgl_value) (Is_ssignaling(sgl_value)==0) +#define Sgl_iszero_hidden(sgl_value) (Is_shidden(sgl_value)==0) +#define Sgl_iszero_hiddenoverflow(sgl_value) \ + (Is_shiddenoverflow(sgl_value)==0) +#define Sgl_iszero_hiddenhigh3mantissa(sgl_value) \ + (Shiddenhigh3mantissa(sgl_value)==0) +#define Sgl_iszero_hiddenhigh7mantissa(sgl_value) \ + (Shiddenhigh7mantissa(sgl_value)==0) +#define Sgl_iszero_sign(sgl_value) (Is_ssign(sgl_value)==0) +#define Sgl_iszero_exponent(sgl_value) (Sexponent(sgl_value)==0) +#define Sgl_iszero_mantissa(sgl_value) (Smantissa(sgl_value)==0) +#define Sgl_iszero_exponentmantissa(sgl_value) \ + (Sexponentmantissa(sgl_value)==0) +#define Sgl_isinfinity_exponent(sgl_value) \ + (Sgl_exponent(sgl_value)==SGL_INFINITY_EXPONENT) +#define Sgl_isnotinfinity_exponent(sgl_value) \ + (Sgl_exponent(sgl_value)!=SGL_INFINITY_EXPONENT) +#define Sgl_isinfinity(sgl_value) \ + (Sgl_exponent(sgl_value)==SGL_INFINITY_EXPONENT && \ + Sgl_mantissa(sgl_value)==0) +#define Sgl_isnan(sgl_value) \ + (Sgl_exponent(sgl_value)==SGL_INFINITY_EXPONENT && \ + Sgl_mantissa(sgl_value)!=0) +#define Sgl_isnotnan(sgl_value) \ + (Sgl_exponent(sgl_value)!=SGL_INFINITY_EXPONENT || \ + Sgl_mantissa(sgl_value)==0) +#define Sgl_islessthan(sgl_op1,sgl_op2) \ + (Sall(sgl_op1) < Sall(sgl_op2)) +#define Sgl_isgreaterthan(sgl_op1,sgl_op2) \ + (Sall(sgl_op1) > Sall(sgl_op2)) +#define Sgl_isnotlessthan(sgl_op1,sgl_op2) \ + (Sall(sgl_op1) >= Sall(sgl_op2)) +#define Sgl_isequal(sgl_op1,sgl_op2) \ + (Sall(sgl_op1) == Sall(sgl_op2)) + +#define Sgl_leftshiftby8(sgl_value) \ + Sall(sgl_value) <<= 8 +#define Sgl_leftshiftby4(sgl_value) \ + Sall(sgl_value) <<= 4 +#define Sgl_leftshiftby3(sgl_value) \ + Sall(sgl_value) <<= 3 +#define Sgl_leftshiftby2(sgl_value) \ + Sall(sgl_value) <<= 2 +#define Sgl_leftshiftby1(sgl_value) \ + Sall(sgl_value) <<= 1 +#define Sgl_rightshiftby1(sgl_value) \ + Sall(sgl_value) >>= 1 +#define Sgl_rightshiftby4(sgl_value) \ + Sall(sgl_value) >>= 4 +#define Sgl_rightshiftby8(sgl_value) \ + Sall(sgl_value) >>= 8 + +#define Sgl_ismagnitudeless(signlessleft,signlessright) \ +/* unsigned int signlessleft, signlessright; */ \ + (signlessleft < signlessright) + + +#define Sgl_copytoint_exponentmantissa(source,dest) \ + dest = Sexponentmantissa(source) + +/* A quiet NaN has the high mantissa bit clear and at least on other (in this + * case the adjacent bit) bit set. */ +#define Sgl_set_quiet(sgl_value) Deposit_shigh2mantissa(sgl_value,1) +#define Sgl_set_exponent(sgl_value,exp) Deposit_sexponent(sgl_value,exp) + +#define Sgl_set_mantissa(dest,value) Deposit_smantissa(dest,value) +#define Sgl_set_exponentmantissa(dest,value) \ + Deposit_sexponentmantissa(dest,value) + +/* An infinity is represented with the max exponent and a zero mantissa */ +#define Sgl_setinfinity_exponent(sgl_value) \ + Deposit_sexponent(sgl_value,SGL_INFINITY_EXPONENT) +#define Sgl_setinfinity_exponentmantissa(sgl_value) \ + Deposit_sexponentmantissa(sgl_value, \ + (SGL_INFINITY_EXPONENT << (32-(1+SGL_EXP_LENGTH)))) +#define Sgl_setinfinitypositive(sgl_value) \ + Sall(sgl_value) = (SGL_INFINITY_EXPONENT << (32-(1+SGL_EXP_LENGTH))) +#define Sgl_setinfinitynegative(sgl_value) \ + Sall(sgl_value) = (SGL_INFINITY_EXPONENT << (32-(1+SGL_EXP_LENGTH))) \ + | ((unsigned int)1<<31) +#define Sgl_setinfinity(sgl_value,sign) \ + Sall(sgl_value) = (SGL_INFINITY_EXPONENT << (32-(1+SGL_EXP_LENGTH))) | \ + ((unsigned int)sign << 31) +#define Sgl_sethigh4bits(sgl_value, extsign) \ + Deposit_shigh4(sgl_value,extsign) +#define Sgl_set_sign(sgl_value,sign) Deposit_ssign(sgl_value,sign) +#define Sgl_invert_sign(sgl_value) \ + Deposit_ssign(sgl_value,~Ssign(sgl_value)) +#define Sgl_setone_sign(sgl_value) Deposit_ssign(sgl_value,1) +#define Sgl_setone_lowmantissa(sgl_value) Deposit_slow(sgl_value,1) +#define Sgl_setzero_sign(sgl_value) Sall(sgl_value) &= 0x7fffffff +#define Sgl_setzero_exponent(sgl_value) Sall(sgl_value) &= 0x807fffff +#define Sgl_setzero_mantissa(sgl_value) Sall(sgl_value) &= 0xff800000 +#define Sgl_setzero_exponentmantissa(sgl_value) Sall(sgl_value) &= 0x80000000 +#define Sgl_setzero(sgl_value) Sall(sgl_value) = 0 +#define Sgl_setnegativezero(sgl_value) Sall(sgl_value) = (unsigned int)1 << 31 + +/* Use following macro for both overflow & underflow conditions */ +#define ovfl - +#define unfl + +#define Sgl_setwrapped_exponent(sgl_value,exponent,op) \ + Deposit_sexponent(sgl_value,(exponent op SGL_WRAP)) + +#define Sgl_setlargestpositive(sgl_value) \ + Sall(sgl_value) = ((SGL_EMAX+SGL_BIAS) << (32-(1+SGL_EXP_LENGTH))) \ + | ((1<<(32-(1+SGL_EXP_LENGTH))) - 1 ) +#define Sgl_setlargestnegative(sgl_value) \ + Sall(sgl_value) = ((SGL_EMAX+SGL_BIAS) << (32-(1+SGL_EXP_LENGTH))) \ + | ((1<<(32-(1+SGL_EXP_LENGTH))) - 1 ) \ + | ((unsigned int)1<<31) + +#define Sgl_setnegativeinfinity(sgl_value) \ + Sall(sgl_value) = \ + ((1<<SGL_EXP_LENGTH) | SGL_INFINITY_EXPONENT) << (32-(1+SGL_EXP_LENGTH)) +#define Sgl_setlargest(sgl_value,sign) \ + Sall(sgl_value) = (unsigned int)sign << 31 | \ + (((SGL_EMAX+SGL_BIAS) << (32-(1+SGL_EXP_LENGTH))) \ + | ((1 << (32-(1+SGL_EXP_LENGTH))) - 1 )) +#define Sgl_setlargest_exponentmantissa(sgl_value) \ + Sall(sgl_value) = Sall(sgl_value) & ((unsigned int)1<<31) | \ + (((SGL_EMAX+SGL_BIAS) << (32-(1+SGL_EXP_LENGTH))) \ + | ((1 << (32-(1+SGL_EXP_LENGTH))) - 1 )) + +/* The high bit is always zero so arithmetic or logical shifts will work. */ +#define Sgl_right_align(srcdst,shift,extent) \ + /* sgl_floating_point srcdst; int shift; extension extent */ \ + if (shift < 32) { \ + Extall(extent) = Sall(srcdst) << (32-(shift)); \ + Sall(srcdst) >>= shift; \ + } \ + else { \ + Extall(extent) = Sall(srcdst); \ + Sall(srcdst) = 0; \ + } +#define Sgl_hiddenhigh3mantissa(sgl_value) Shiddenhigh3mantissa(sgl_value) +#define Sgl_hidden(sgl_value) Shidden(sgl_value) +#define Sgl_lowmantissa(sgl_value) Slow(sgl_value) + +/* The left argument is never smaller than the right argument */ +#define Sgl_subtract(sgl_left,sgl_right,sgl_result) \ + Sall(sgl_result) = Sall(sgl_left) - Sall(sgl_right) + +/* Subtract right augmented with extension from left augmented with zeros and + * store into result and extension. */ +#define Sgl_subtract_withextension(left,right,extent,result) \ + /* sgl_floating_point left,right,result; extension extent */ \ + Sgl_subtract(left,right,result); \ + if((Extall(extent) = 0-Extall(extent))) \ + Sall(result) = Sall(result)-1 + +#define Sgl_addition(sgl_left,sgl_right,sgl_result) \ + Sall(sgl_result) = Sall(sgl_left) + Sall(sgl_right) + +#define Sgl_xortointp1(left,right,result) \ + result = Sall(left) XOR Sall(right); + +#define Sgl_xorfromintp1(left,right,result) \ + Sall(result) = left XOR Sall(right) + +/* Need to Initialize */ +#define Sgl_makequietnan(dest) \ + Sall(dest) = ((SGL_EMAX+SGL_BIAS)+1)<< (32-(1+SGL_EXP_LENGTH)) \ + | (1<<(32-(1+SGL_EXP_LENGTH+2))) +#define Sgl_makesignalingnan(dest) \ + Sall(dest) = ((SGL_EMAX+SGL_BIAS)+1)<< (32-(1+SGL_EXP_LENGTH)) \ + | (1<<(32-(1+SGL_EXP_LENGTH+1))) + +#define Sgl_normalize(sgl_opnd,exponent) \ + while(Sgl_iszero_hiddenhigh7mantissa(sgl_opnd)) { \ + Sgl_leftshiftby8(sgl_opnd); \ + exponent -= 8; \ + } \ + if(Sgl_iszero_hiddenhigh3mantissa(sgl_opnd)) { \ + Sgl_leftshiftby4(sgl_opnd); \ + exponent -= 4; \ + } \ + while(Sgl_iszero_hidden(sgl_opnd)) { \ + Sgl_leftshiftby1(sgl_opnd); \ + exponent -= 1; \ + } + +#define Sgl_setoverflow(sgl_opnd) \ + /* set result to infinity or largest number */ \ + switch (Rounding_mode()) { \ + case ROUNDPLUS: \ + if (Sgl_isone_sign(sgl_opnd)) { \ + Sgl_setlargestnegative(sgl_opnd); \ + } \ + else { \ + Sgl_setinfinitypositive(sgl_opnd); \ + } \ + break; \ + case ROUNDMINUS: \ + if (Sgl_iszero_sign(sgl_opnd)) { \ + Sgl_setlargestpositive(sgl_opnd); \ + } \ + else { \ + Sgl_setinfinitynegative(sgl_opnd); \ + } \ + break; \ + case ROUNDNEAREST: \ + Sgl_setinfinity_exponentmantissa(sgl_opnd); \ + break; \ + case ROUNDZERO: \ + Sgl_setlargest_exponentmantissa(sgl_opnd); \ + } + +#define Sgl_denormalize(opnd,exponent,guard,sticky,inexact) \ + Sgl_clear_signexponent_set_hidden(opnd); \ + if (exponent >= (1 - SGL_P)) { \ + guard = (Sall(opnd) >> -exponent) & 1; \ + if (exponent < 0) sticky |= Sall(opnd) << (32+exponent); \ + inexact = guard | sticky; \ + Sall(opnd) >>= (1-exponent); \ + } \ + else { \ + guard = 0; \ + sticky |= Sall(opnd); \ + inexact = sticky; \ + Sgl_setzero(opnd); \ + } + +/* + * The fused multiply add instructions requires a single extended format, + * with 48 bits of mantissa. + */ +#define SGLEXT_THRESHOLD 48 + +#define Sglext_setzero(valA,valB) \ + Sextallp1(valA) = 0; Sextallp2(valB) = 0 + +#define Sglext_isnotzero_mantissap2(valB) (Sextallp2(valB)!=0) +#define Sglext_isone_lowp1(val) (Sextlowp1(val)!=0) +#define Sglext_isone_highp2(val) (Sexthighp2(val)!=0) +#define Sglext_isnotzero_low31p2(val) (Sextlow31p2(val)!=0) +#define Sglext_iszero(valA,valB) (Sextallp1(valA)==0 && Sextallp2(valB)==0) + +#define Sgl_copytoptr(src,destptr) *destptr = src +#define Sgl_copyfromptr(srcptr,dest) dest = *srcptr +#define Sglext_copy(srca,srcb,desta,destb) \ + Sextallp1(desta) = Sextallp1(srca); \ + Sextallp2(destb) = Sextallp2(srcb) +#define Sgl_copyto_sglext(src1,dest1,dest2) \ + Sextallp1(dest1) = Sall(src1); Sextallp2(dest2) = 0 + +#define Sglext_swap_lower(leftp2,rightp2) \ + Sextallp2(leftp2) = Sextallp2(leftp2) XOR Sextallp2(rightp2); \ + Sextallp2(rightp2) = Sextallp2(leftp2) XOR Sextallp2(rightp2); \ + Sextallp2(leftp2) = Sextallp2(leftp2) XOR Sextallp2(rightp2) + +#define Sglext_setone_lowmantissap2(value) Deposit_dlowp2(value,1) + +/* The high bit is always zero so arithmetic or logical shifts will work. */ +#define Sglext_right_align(srcdstA,srcdstB,shift) \ + {int shiftamt, sticky; \ + shiftamt = shift % 32; \ + sticky = 0; \ + switch (shift/32) { \ + case 0: if (shiftamt > 0) { \ + sticky = Sextallp2(srcdstB) << 32 - (shiftamt); \ + Variable_shift_double(Sextallp1(srcdstA), \ + Sextallp2(srcdstB),shiftamt,Sextallp2(srcdstB)); \ + Sextallp1(srcdstA) >>= shiftamt; \ + } \ + break; \ + case 1: if (shiftamt > 0) { \ + sticky = (Sextallp1(srcdstA) << 32 - (shiftamt)) | \ + Sextallp2(srcdstB); \ + } \ + else { \ + sticky = Sextallp2(srcdstB); \ + } \ + Sextallp2(srcdstB) = Sextallp1(srcdstA) >> shiftamt; \ + Sextallp1(srcdstA) = 0; \ + break; \ + } \ + if (sticky) Sglext_setone_lowmantissap2(srcdstB); \ + } + +/* The left argument is never smaller than the right argument */ +#define Sglext_subtract(lefta,leftb,righta,rightb,resulta,resultb) \ + if( Sextallp2(rightb) > Sextallp2(leftb) ) Sextallp1(lefta)--; \ + Sextallp2(resultb) = Sextallp2(leftb) - Sextallp2(rightb); \ + Sextallp1(resulta) = Sextallp1(lefta) - Sextallp1(righta) + +#define Sglext_addition(lefta,leftb,righta,rightb,resulta,resultb) \ + /* If the sum of the low words is less than either source, then \ + * an overflow into the next word occurred. */ \ + if ((Sextallp2(resultb) = Sextallp2(leftb)+Sextallp2(rightb)) < \ + Sextallp2(rightb)) \ + Sextallp1(resulta) = Sextallp1(lefta)+Sextallp1(righta)+1; \ + else Sextallp1(resulta) = Sextallp1(lefta)+Sextallp1(righta) + + +#define Sglext_arithrightshiftby1(srcdstA,srcdstB) \ + Shiftdouble(Sextallp1(srcdstA),Sextallp2(srcdstB),1,Sextallp2(srcdstB)); \ + Sextallp1(srcdstA) = (int)Sextallp1(srcdstA) >> 1 + +#define Sglext_leftshiftby8(valA,valB) \ + Shiftdouble(Sextallp1(valA),Sextallp2(valB),24,Sextallp1(valA)); \ + Sextallp2(valB) <<= 8 +#define Sglext_leftshiftby4(valA,valB) \ + Shiftdouble(Sextallp1(valA),Sextallp2(valB),28,Sextallp1(valA)); \ + Sextallp2(valB) <<= 4 +#define Sglext_leftshiftby3(valA,valB) \ + Shiftdouble(Sextallp1(valA),Sextallp2(valB),29,Sextallp1(valA)); \ + Sextallp2(valB) <<= 3 +#define Sglext_leftshiftby2(valA,valB) \ + Shiftdouble(Sextallp1(valA),Sextallp2(valB),30,Sextallp1(valA)); \ + Sextallp2(valB) <<= 2 +#define Sglext_leftshiftby1(valA,valB) \ + Shiftdouble(Sextallp1(valA),Sextallp2(valB),31,Sextallp1(valA)); \ + Sextallp2(valB) <<= 1 + +#define Sglext_rightshiftby4(valueA,valueB) \ + Shiftdouble(Sextallp1(valueA),Sextallp2(valueB),4,Sextallp2(valueB)); \ + Sextallp1(valueA) >>= 4 +#define Sglext_rightshiftby3(valueA,valueB) \ + Shiftdouble(Sextallp1(valueA),Sextallp2(valueB),3,Sextallp2(valueB)); \ + Sextallp1(valueA) >>= 3 +#define Sglext_rightshiftby1(valueA,valueB) \ + Shiftdouble(Sextallp1(valueA),Sextallp2(valueB),1,Sextallp2(valueB)); \ + Sextallp1(valueA) >>= 1 + +#define Sglext_xortointp1(left,right,result) Sgl_xortointp1(left,right,result) +#define Sglext_xorfromintp1(left,right,result) \ + Sgl_xorfromintp1(left,right,result) +#define Sglext_copytoint_exponentmantissa(src,dest) \ + Sgl_copytoint_exponentmantissa(src,dest) +#define Sglext_ismagnitudeless(signlessleft,signlessright) \ + Sgl_ismagnitudeless(signlessleft,signlessright) + +#define Sglext_set_sign(dbl_value,sign) Sgl_set_sign(dbl_value,sign) +#define Sglext_clear_signexponent_set_hidden(srcdst) \ + Sgl_clear_signexponent_set_hidden(srcdst) +#define Sglext_clear_signexponent(srcdst) Sgl_clear_signexponent(srcdst) +#define Sglext_clear_sign(srcdst) Sgl_clear_sign(srcdst) +#define Sglext_isone_hidden(dbl_value) Sgl_isone_hidden(dbl_value) + +#define Sglext_denormalize(opndp1,opndp2,exponent,is_tiny) \ + {int sticky; \ + is_tiny = TRUE; \ + if (exponent == 0 && Sextallp2(opndp2)) { \ + switch (Rounding_mode()) { \ + case ROUNDPLUS: \ + if (Sgl_iszero_sign(opndp1)) \ + if (Sgl_isone_hiddenoverflow(opndp1 + 1)) \ + is_tiny = FALSE; \ + break; \ + case ROUNDMINUS: \ + if (Sgl_isone_sign(opndp1)) { \ + if (Sgl_isone_hiddenoverflow(opndp1 + 1)) \ + is_tiny = FALSE; \ + } \ + break; \ + case ROUNDNEAREST: \ + if (Sglext_isone_highp2(opndp2) && \ + (Sglext_isone_lowp1(opndp1) || \ + Sglext_isnotzero_low31p2(opndp2))) \ + if (Sgl_isone_hiddenoverflow(opndp1 + 1)) \ + is_tiny = FALSE; \ + break; \ + } \ + } \ + Sglext_clear_signexponent_set_hidden(opndp1); \ + if (exponent >= (1-DBL_P)) { \ + if (exponent >= -31) { \ + if (exponent > -31) { \ + sticky = Sextallp2(opndp2) << 31+exponent; \ + Variable_shift_double(opndp1,opndp2,1-exponent,opndp2); \ + Sextallp1(opndp1) >>= 1-exponent; \ + } \ + else { \ + sticky = Sextallp2(opndp2); \ + Sextallp2(opndp2) = Sextallp1(opndp1); \ + Sextallp1(opndp1) = 0; \ + } \ + } \ + else { \ + sticky = (Sextallp1(opndp1) << 31+exponent) | \ + Sextallp2(opndp2); \ + Sextallp2(opndp2) = Sextallp1(opndp1) >> -31-exponent; \ + Sextallp1(opndp1) = 0; \ + } \ + } \ + else { \ + sticky = Sextallp1(opndp1) | Sextallp2(opndp2); \ + Sglext_setzero(opndp1,opndp2); \ + } \ + if (sticky) Sglext_setone_lowmantissap2(opndp2); \ + exponent = 0; \ + } |