diff options
Diffstat (limited to 'kernel/arch/mips/math-emu')
36 files changed, 6342 insertions, 0 deletions
diff --git a/kernel/arch/mips/math-emu/Makefile b/kernel/arch/mips/math-emu/Makefile new file mode 100644 index 000000000..2e5f96275 --- /dev/null +++ b/kernel/arch/mips/math-emu/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for the Linux/MIPS kernel FPU emulation. +# + +obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \ + dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \ + dp_tint.o dp_fint.o \ + sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \ + sp_tint.o sp_fint.o \ + dsemul.o + +lib-y += ieee754d.o \ + dp_tlong.o dp_flong.o dp_sqrt.o \ + sp_tlong.o sp_flong.o sp_sqrt.o + +obj-$(CONFIG_DEBUG_FS) += me-debugfs.o diff --git a/kernel/arch/mips/math-emu/cp1emu.c b/kernel/arch/mips/math-emu/cp1emu.c new file mode 100644 index 000000000..22b9b2cb9 --- /dev/null +++ b/kernel/arch/mips/math-emu/cp1emu.c @@ -0,0 +1,2218 @@ +/* + * cp1emu.c: a MIPS coprocessor 1 (FPU) instruction emulator + * + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2000 MIPS Technologies, Inc. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * A complete emulator for MIPS coprocessor 1 instructions. This is + * required for #float(switch) or #float(trap), where it catches all + * COP1 instructions via the "CoProcessor Unusable" exception. + * + * More surprisingly it is also required for #float(ieee), to help out + * the hardware FPU at the boundaries of the IEEE-754 representation + * (denormalised values, infinities, underflow, etc). It is made + * quite nasty because emulation of some non-COP1 instructions is + * required, e.g. in branch delay slots. + * + * Note if you know that you won't have an FPU, then you'll get much + * better performance by compiling with -msoft-float! + */ +#include <linux/sched.h> +#include <linux/debugfs.h> +#include <linux/kconfig.h> +#include <linux/percpu-defs.h> +#include <linux/perf_event.h> + +#include <asm/branch.h> +#include <asm/inst.h> +#include <asm/ptrace.h> +#include <asm/signal.h> +#include <asm/uaccess.h> + +#include <asm/cpu-info.h> +#include <asm/processor.h> +#include <asm/fpu_emulator.h> +#include <asm/fpu.h> +#include <asm/mips-r2-to-r6-emul.h> + +#include "ieee754.h" + +/* Function which emulates a floating point instruction. */ + +static int fpu_emu(struct pt_regs *, struct mips_fpu_struct *, + mips_instruction); + +static int fpux_emu(struct pt_regs *, + struct mips_fpu_struct *, mips_instruction, void *__user *); + +/* Control registers */ + +#define FPCREG_RID 0 /* $0 = revision id */ +#define FPCREG_FCCR 25 /* $25 = fccr */ +#define FPCREG_FEXR 26 /* $26 = fexr */ +#define FPCREG_FENR 28 /* $28 = fenr */ +#define FPCREG_CSR 31 /* $31 = csr */ + +/* convert condition code register number to csr bit */ +const unsigned int fpucondbit[8] = { + FPU_CSR_COND, + FPU_CSR_COND1, + FPU_CSR_COND2, + FPU_CSR_COND3, + FPU_CSR_COND4, + FPU_CSR_COND5, + FPU_CSR_COND6, + FPU_CSR_COND7 +}; + +/* (microMIPS) Convert certain microMIPS instructions to MIPS32 format. */ +static const int sd_format[] = {16, 17, 0, 0, 0, 0, 0, 0}; +static const int sdps_format[] = {16, 17, 22, 0, 0, 0, 0, 0}; +static const int dwl_format[] = {17, 20, 21, 0, 0, 0, 0, 0}; +static const int swl_format[] = {16, 20, 21, 0, 0, 0, 0, 0}; + +/* + * This functions translates a 32-bit microMIPS instruction + * into a 32-bit MIPS32 instruction. Returns 0 on success + * and SIGILL otherwise. + */ +static int microMIPS32_to_MIPS32(union mips_instruction *insn_ptr) +{ + union mips_instruction insn = *insn_ptr; + union mips_instruction mips32_insn = insn; + int func, fmt, op; + + switch (insn.mm_i_format.opcode) { + case mm_ldc132_op: + mips32_insn.mm_i_format.opcode = ldc1_op; + mips32_insn.mm_i_format.rt = insn.mm_i_format.rs; + mips32_insn.mm_i_format.rs = insn.mm_i_format.rt; + break; + case mm_lwc132_op: + mips32_insn.mm_i_format.opcode = lwc1_op; + mips32_insn.mm_i_format.rt = insn.mm_i_format.rs; + mips32_insn.mm_i_format.rs = insn.mm_i_format.rt; + break; + case mm_sdc132_op: + mips32_insn.mm_i_format.opcode = sdc1_op; + mips32_insn.mm_i_format.rt = insn.mm_i_format.rs; + mips32_insn.mm_i_format.rs = insn.mm_i_format.rt; + break; + case mm_swc132_op: + mips32_insn.mm_i_format.opcode = swc1_op; + mips32_insn.mm_i_format.rt = insn.mm_i_format.rs; + mips32_insn.mm_i_format.rs = insn.mm_i_format.rt; + break; + case mm_pool32i_op: + /* NOTE: offset is << by 1 if in microMIPS mode. */ + if ((insn.mm_i_format.rt == mm_bc1f_op) || + (insn.mm_i_format.rt == mm_bc1t_op)) { + mips32_insn.fb_format.opcode = cop1_op; + mips32_insn.fb_format.bc = bc_op; + mips32_insn.fb_format.flag = + (insn.mm_i_format.rt == mm_bc1t_op) ? 1 : 0; + } else + return SIGILL; + break; + case mm_pool32f_op: + switch (insn.mm_fp0_format.func) { + case mm_32f_01_op: + case mm_32f_11_op: + case mm_32f_02_op: + case mm_32f_12_op: + case mm_32f_41_op: + case mm_32f_51_op: + case mm_32f_42_op: + case mm_32f_52_op: + op = insn.mm_fp0_format.func; + if (op == mm_32f_01_op) + func = madd_s_op; + else if (op == mm_32f_11_op) + func = madd_d_op; + else if (op == mm_32f_02_op) + func = nmadd_s_op; + else if (op == mm_32f_12_op) + func = nmadd_d_op; + else if (op == mm_32f_41_op) + func = msub_s_op; + else if (op == mm_32f_51_op) + func = msub_d_op; + else if (op == mm_32f_42_op) + func = nmsub_s_op; + else + func = nmsub_d_op; + mips32_insn.fp6_format.opcode = cop1x_op; + mips32_insn.fp6_format.fr = insn.mm_fp6_format.fr; + mips32_insn.fp6_format.ft = insn.mm_fp6_format.ft; + mips32_insn.fp6_format.fs = insn.mm_fp6_format.fs; + mips32_insn.fp6_format.fd = insn.mm_fp6_format.fd; + mips32_insn.fp6_format.func = func; + break; + case mm_32f_10_op: + func = -1; /* Invalid */ + op = insn.mm_fp5_format.op & 0x7; + if (op == mm_ldxc1_op) + func = ldxc1_op; + else if (op == mm_sdxc1_op) + func = sdxc1_op; + else if (op == mm_lwxc1_op) + func = lwxc1_op; + else if (op == mm_swxc1_op) + func = swxc1_op; + + if (func != -1) { + mips32_insn.r_format.opcode = cop1x_op; + mips32_insn.r_format.rs = + insn.mm_fp5_format.base; + mips32_insn.r_format.rt = + insn.mm_fp5_format.index; + mips32_insn.r_format.rd = 0; + mips32_insn.r_format.re = insn.mm_fp5_format.fd; + mips32_insn.r_format.func = func; + } else + return SIGILL; + break; + case mm_32f_40_op: + op = -1; /* Invalid */ + if (insn.mm_fp2_format.op == mm_fmovt_op) + op = 1; + else if (insn.mm_fp2_format.op == mm_fmovf_op) + op = 0; + if (op != -1) { + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp2_format.fmt]; + mips32_insn.fp0_format.ft = + (insn.mm_fp2_format.cc<<2) + op; + mips32_insn.fp0_format.fs = + insn.mm_fp2_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp2_format.fd; + mips32_insn.fp0_format.func = fmovc_op; + } else + return SIGILL; + break; + case mm_32f_60_op: + func = -1; /* Invalid */ + if (insn.mm_fp0_format.op == mm_fadd_op) + func = fadd_op; + else if (insn.mm_fp0_format.op == mm_fsub_op) + func = fsub_op; + else if (insn.mm_fp0_format.op == mm_fmul_op) + func = fmul_op; + else if (insn.mm_fp0_format.op == mm_fdiv_op) + func = fdiv_op; + if (func != -1) { + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp0_format.fmt]; + mips32_insn.fp0_format.ft = + insn.mm_fp0_format.ft; + mips32_insn.fp0_format.fs = + insn.mm_fp0_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp0_format.fd; + mips32_insn.fp0_format.func = func; + } else + return SIGILL; + break; + case mm_32f_70_op: + func = -1; /* Invalid */ + if (insn.mm_fp0_format.op == mm_fmovn_op) + func = fmovn_op; + else if (insn.mm_fp0_format.op == mm_fmovz_op) + func = fmovz_op; + if (func != -1) { + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp0_format.fmt]; + mips32_insn.fp0_format.ft = + insn.mm_fp0_format.ft; + mips32_insn.fp0_format.fs = + insn.mm_fp0_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp0_format.fd; + mips32_insn.fp0_format.func = func; + } else + return SIGILL; + break; + case mm_32f_73_op: /* POOL32FXF */ + switch (insn.mm_fp1_format.op) { + case mm_movf0_op: + case mm_movf1_op: + case mm_movt0_op: + case mm_movt1_op: + if ((insn.mm_fp1_format.op & 0x7f) == + mm_movf0_op) + op = 0; + else + op = 1; + mips32_insn.r_format.opcode = spec_op; + mips32_insn.r_format.rs = insn.mm_fp4_format.fs; + mips32_insn.r_format.rt = + (insn.mm_fp4_format.cc << 2) + op; + mips32_insn.r_format.rd = insn.mm_fp4_format.rt; + mips32_insn.r_format.re = 0; + mips32_insn.r_format.func = movc_op; + break; + case mm_fcvtd0_op: + case mm_fcvtd1_op: + case mm_fcvts0_op: + case mm_fcvts1_op: + if ((insn.mm_fp1_format.op & 0x7f) == + mm_fcvtd0_op) { + func = fcvtd_op; + fmt = swl_format[insn.mm_fp3_format.fmt]; + } else { + func = fcvts_op; + fmt = dwl_format[insn.mm_fp3_format.fmt]; + } + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = fmt; + mips32_insn.fp0_format.ft = 0; + mips32_insn.fp0_format.fs = + insn.mm_fp3_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp3_format.rt; + mips32_insn.fp0_format.func = func; + break; + case mm_fmov0_op: + case mm_fmov1_op: + case mm_fabs0_op: + case mm_fabs1_op: + case mm_fneg0_op: + case mm_fneg1_op: + if ((insn.mm_fp1_format.op & 0x7f) == + mm_fmov0_op) + func = fmov_op; + else if ((insn.mm_fp1_format.op & 0x7f) == + mm_fabs0_op) + func = fabs_op; + else + func = fneg_op; + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp3_format.fmt]; + mips32_insn.fp0_format.ft = 0; + mips32_insn.fp0_format.fs = + insn.mm_fp3_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp3_format.rt; + mips32_insn.fp0_format.func = func; + break; + case mm_ffloorl_op: + case mm_ffloorw_op: + case mm_fceill_op: + case mm_fceilw_op: + case mm_ftruncl_op: + case mm_ftruncw_op: + case mm_froundl_op: + case mm_froundw_op: + case mm_fcvtl_op: + case mm_fcvtw_op: + if (insn.mm_fp1_format.op == mm_ffloorl_op) + func = ffloorl_op; + else if (insn.mm_fp1_format.op == mm_ffloorw_op) + func = ffloor_op; + else if (insn.mm_fp1_format.op == mm_fceill_op) + func = fceill_op; + else if (insn.mm_fp1_format.op == mm_fceilw_op) + func = fceil_op; + else if (insn.mm_fp1_format.op == mm_ftruncl_op) + func = ftruncl_op; + else if (insn.mm_fp1_format.op == mm_ftruncw_op) + func = ftrunc_op; + else if (insn.mm_fp1_format.op == mm_froundl_op) + func = froundl_op; + else if (insn.mm_fp1_format.op == mm_froundw_op) + func = fround_op; + else if (insn.mm_fp1_format.op == mm_fcvtl_op) + func = fcvtl_op; + else + func = fcvtw_op; + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sd_format[insn.mm_fp1_format.fmt]; + mips32_insn.fp0_format.ft = 0; + mips32_insn.fp0_format.fs = + insn.mm_fp1_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp1_format.rt; + mips32_insn.fp0_format.func = func; + break; + case mm_frsqrt_op: + case mm_fsqrt_op: + case mm_frecip_op: + if (insn.mm_fp1_format.op == mm_frsqrt_op) + func = frsqrt_op; + else if (insn.mm_fp1_format.op == mm_fsqrt_op) + func = fsqrt_op; + else + func = frecip_op; + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp1_format.fmt]; + mips32_insn.fp0_format.ft = 0; + mips32_insn.fp0_format.fs = + insn.mm_fp1_format.fs; + mips32_insn.fp0_format.fd = + insn.mm_fp1_format.rt; + mips32_insn.fp0_format.func = func; + break; + case mm_mfc1_op: + case mm_mtc1_op: + case mm_cfc1_op: + case mm_ctc1_op: + case mm_mfhc1_op: + case mm_mthc1_op: + if (insn.mm_fp1_format.op == mm_mfc1_op) + op = mfc_op; + else if (insn.mm_fp1_format.op == mm_mtc1_op) + op = mtc_op; + else if (insn.mm_fp1_format.op == mm_cfc1_op) + op = cfc_op; + else if (insn.mm_fp1_format.op == mm_ctc1_op) + op = ctc_op; + else if (insn.mm_fp1_format.op == mm_mfhc1_op) + op = mfhc_op; + else + op = mthc_op; + mips32_insn.fp1_format.opcode = cop1_op; + mips32_insn.fp1_format.op = op; + mips32_insn.fp1_format.rt = + insn.mm_fp1_format.rt; + mips32_insn.fp1_format.fs = + insn.mm_fp1_format.fs; + mips32_insn.fp1_format.fd = 0; + mips32_insn.fp1_format.func = 0; + break; + default: + return SIGILL; + } + break; + case mm_32f_74_op: /* c.cond.fmt */ + mips32_insn.fp0_format.opcode = cop1_op; + mips32_insn.fp0_format.fmt = + sdps_format[insn.mm_fp4_format.fmt]; + mips32_insn.fp0_format.ft = insn.mm_fp4_format.rt; + mips32_insn.fp0_format.fs = insn.mm_fp4_format.fs; + mips32_insn.fp0_format.fd = insn.mm_fp4_format.cc << 2; + mips32_insn.fp0_format.func = + insn.mm_fp4_format.cond | MM_MIPS32_COND_FC; + break; + default: + return SIGILL; + } + break; + default: + return SIGILL; + } + + *insn_ptr = mips32_insn; + return 0; +} + +/* + * Redundant with logic already in kernel/branch.c, + * embedded in compute_return_epc. At some point, + * a single subroutine should be used across both + * modules. + */ +static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, + unsigned long *contpc) +{ + union mips_instruction insn = (union mips_instruction)dec_insn.insn; + unsigned int fcr31; + unsigned int bit = 0; + + switch (insn.i_format.opcode) { + case spec_op: + switch (insn.r_format.func) { + case jalr_op: + regs->regs[insn.r_format.rd] = + regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + /* Fall through */ + case jr_op: + /* For R6, JR already emulated in jalr_op */ + if (NO_R6EMU && insn.r_format.opcode == jr_op) + break; + *contpc = regs->regs[insn.r_format.rs]; + return 1; + } + break; + case bcond_op: + switch (insn.i_format.rt) { + case bltzal_op: + case bltzall_op: + if (NO_R6EMU && (insn.i_format.rs || + insn.i_format.rt == bltzall_op)) + break; + + regs->regs[31] = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + /* Fall through */ + case bltzl_op: + if (NO_R6EMU) + break; + case bltz_op: + if ((long)regs->regs[insn.i_format.rs] < 0) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case bgezal_op: + case bgezall_op: + if (NO_R6EMU && (insn.i_format.rs || + insn.i_format.rt == bgezall_op)) + break; + + regs->regs[31] = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + /* Fall through */ + case bgezl_op: + if (NO_R6EMU) + break; + case bgez_op: + if ((long)regs->regs[insn.i_format.rs] >= 0) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + } + break; + case jalx_op: + set_isa16_mode(bit); + case jal_op: + regs->regs[31] = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + /* Fall through */ + case j_op: + *contpc = regs->cp0_epc + dec_insn.pc_inc; + *contpc >>= 28; + *contpc <<= 28; + *contpc |= (insn.j_format.target << 2); + /* Set microMIPS mode bit: XOR for jalx. */ + *contpc ^= bit; + return 1; + case beql_op: + if (NO_R6EMU) + break; + case beq_op: + if (regs->regs[insn.i_format.rs] == + regs->regs[insn.i_format.rt]) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case bnel_op: + if (NO_R6EMU) + break; + case bne_op: + if (regs->regs[insn.i_format.rs] != + regs->regs[insn.i_format.rt]) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case blezl_op: + if (NO_R6EMU) + break; + case blez_op: + + /* + * Compact branches for R6 for the + * blez and blezl opcodes. + * BLEZ | rs = 0 | rt != 0 == BLEZALC + * BLEZ | rs = rt != 0 == BGEZALC + * BLEZ | rs != 0 | rt != 0 == BGEUC + * BLEZL | rs = 0 | rt != 0 == BLEZC + * BLEZL | rs = rt != 0 == BGEZC + * BLEZL | rs != 0 | rt != 0 == BGEC + * + * For real BLEZ{,L}, rt is always 0. + */ + if (cpu_has_mips_r6 && insn.i_format.rt) { + if ((insn.i_format.opcode == blez_op) && + ((!insn.i_format.rs && insn.i_format.rt) || + (insn.i_format.rs == insn.i_format.rt))) + regs->regs[31] = regs->cp0_epc + + dec_insn.pc_inc; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + } + if ((long)regs->regs[insn.i_format.rs] <= 0) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case bgtzl_op: + if (NO_R6EMU) + break; + case bgtz_op: + /* + * Compact branches for R6 for the + * bgtz and bgtzl opcodes. + * BGTZ | rs = 0 | rt != 0 == BGTZALC + * BGTZ | rs = rt != 0 == BLTZALC + * BGTZ | rs != 0 | rt != 0 == BLTUC + * BGTZL | rs = 0 | rt != 0 == BGTZC + * BGTZL | rs = rt != 0 == BLTZC + * BGTZL | rs != 0 | rt != 0 == BLTC + * + * *ZALC varint for BGTZ &&& rt != 0 + * For real GTZ{,L}, rt is always 0. + */ + if (cpu_has_mips_r6 && insn.i_format.rt) { + if ((insn.i_format.opcode == blez_op) && + ((!insn.i_format.rs && insn.i_format.rt) || + (insn.i_format.rs == insn.i_format.rt))) + regs->regs[31] = regs->cp0_epc + + dec_insn.pc_inc; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + } + + if ((long)regs->regs[insn.i_format.rs] > 0) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case cbcond0_op: + case cbcond1_op: + if (!cpu_has_mips_r6) + break; + if (insn.i_format.rt && !insn.i_format.rs) + regs->regs[31] = regs->cp0_epc + 4; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; +#ifdef CONFIG_CPU_CAVIUM_OCTEON + case lwc2_op: /* This is bbit0 on Octeon */ + if ((regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt)) == 0) + *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + 8; + return 1; + case ldc2_op: /* This is bbit032 on Octeon */ + if ((regs->regs[insn.i_format.rs] & (1ull<<(insn.i_format.rt + 32))) == 0) + *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + 8; + return 1; + case swc2_op: /* This is bbit1 on Octeon */ + if (regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt)) + *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + 8; + return 1; + case sdc2_op: /* This is bbit132 on Octeon */ + if (regs->regs[insn.i_format.rs] & (1ull<<(insn.i_format.rt + 32))) + *contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + 8; + return 1; +#else + case bc6_op: + /* + * Only valid for MIPS R6 but we can still end up + * here from a broken userland so just tell emulator + * this is not a branch and let it break later on. + */ + if (!cpu_has_mips_r6) + break; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + case balc6_op: + if (!cpu_has_mips_r6) + break; + regs->regs[31] = regs->cp0_epc + 4; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + case beqzcjic_op: + if (!cpu_has_mips_r6) + break; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + case bnezcjialc_op: + if (!cpu_has_mips_r6) + break; + if (!insn.i_format.rs) + regs->regs[31] = regs->cp0_epc + 4; + *contpc = regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; +#endif + case cop0_op: + case cop1_op: + /* Need to check for R6 bc1nez and bc1eqz branches */ + if (cpu_has_mips_r6 && + ((insn.i_format.rs == bc1eqz_op) || + (insn.i_format.rs == bc1nez_op))) { + bit = 0; + switch (insn.i_format.rs) { + case bc1eqz_op: + if (get_fpr32(¤t->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1) + bit = 1; + break; + case bc1nez_op: + if (!(get_fpr32(¤t->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1)) + bit = 1; + break; + } + if (bit) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + + return 1; + } + /* R2/R6 compatible cop1 instruction. Fall through */ + case cop2_op: + case cop1x_op: + if (insn.i_format.rs == bc_op) { + preempt_disable(); + if (is_fpu_owner()) + fcr31 = read_32bit_cp1_register(CP1_STATUS); + else + fcr31 = current->thread.fpu.fcr31; + preempt_enable(); + + bit = (insn.i_format.rt >> 2); + bit += (bit != 0); + bit += 23; + switch (insn.i_format.rt & 3) { + case 0: /* bc1f */ + case 2: /* bc1fl */ + if (~fcr31 & (1 << bit)) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + case 1: /* bc1t */ + case 3: /* bc1tl */ + if (fcr31 & (1 << bit)) + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + (insn.i_format.simmediate << 2); + else + *contpc = regs->cp0_epc + + dec_insn.pc_inc + + dec_insn.next_pc_inc; + return 1; + } + } + break; + } + return 0; +} + +/* + * In the Linux kernel, we support selection of FPR format on the + * basis of the Status.FR bit. If an FPU is not present, the FR bit + * is hardwired to zero, which would imply a 32-bit FPU even for + * 64-bit CPUs so we rather look at TIF_32BIT_FPREGS. + * FPU emu is slow and bulky and optimizing this function offers fairly + * sizeable benefits so we try to be clever and make this function return + * a constant whenever possible, that is on 64-bit kernels without O32 + * compatibility enabled and on 32-bit without 64-bit FPU support. + */ +static inline int cop1_64bit(struct pt_regs *xcp) +{ + if (config_enabled(CONFIG_64BIT) && !config_enabled(CONFIG_MIPS32_O32)) + return 1; + else if (config_enabled(CONFIG_32BIT) && + !config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT)) + return 0; + + return !test_thread_flag(TIF_32BIT_FPREGS); +} + +static inline bool hybrid_fprs(void) +{ + return test_thread_flag(TIF_HYBRID_FPREGS); +} + +#define SIFROMREG(si, x) \ +do { \ + if (cop1_64bit(xcp) && !hybrid_fprs()) \ + (si) = (int)get_fpr32(&ctx->fpr[x], 0); \ + else \ + (si) = (int)get_fpr32(&ctx->fpr[(x) & ~1], (x) & 1); \ +} while (0) + +#define SITOREG(si, x) \ +do { \ + if (cop1_64bit(xcp) && !hybrid_fprs()) { \ + unsigned i; \ + set_fpr32(&ctx->fpr[x], 0, si); \ + for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \ + set_fpr32(&ctx->fpr[x], i, 0); \ + } else { \ + set_fpr32(&ctx->fpr[(x) & ~1], (x) & 1, si); \ + } \ +} while (0) + +#define SIFROMHREG(si, x) ((si) = (int)get_fpr32(&ctx->fpr[x], 1)) + +#define SITOHREG(si, x) \ +do { \ + unsigned i; \ + set_fpr32(&ctx->fpr[x], 1, si); \ + for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \ + set_fpr32(&ctx->fpr[x], i, 0); \ +} while (0) + +#define DIFROMREG(di, x) \ + ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0)) + +#define DITOREG(di, x) \ +do { \ + unsigned fpr, i; \ + fpr = (x) & ~(cop1_64bit(xcp) == 0); \ + set_fpr64(&ctx->fpr[fpr], 0, di); \ + for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \ + set_fpr64(&ctx->fpr[fpr], i, 0); \ +} while (0) + +#define SPFROMREG(sp, x) SIFROMREG((sp).bits, x) +#define SPTOREG(sp, x) SITOREG((sp).bits, x) +#define DPFROMREG(dp, x) DIFROMREG((dp).bits, x) +#define DPTOREG(dp, x) DITOREG((dp).bits, x) + +/* + * Emulate a CFC1 instruction. + */ +static inline void cop1_cfc(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + mips_instruction ir) +{ + u32 fcr31 = ctx->fcr31; + u32 value = 0; + + switch (MIPSInst_RD(ir)) { + case FPCREG_CSR: + value = fcr31; + pr_debug("%p gpr[%d]<-csr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + break; + + case FPCREG_FENR: + if (!cpu_has_mips_r) + break; + value = (fcr31 >> (FPU_CSR_FS_S - MIPS_FENR_FS_S)) & + MIPS_FENR_FS; + value |= fcr31 & (FPU_CSR_ALL_E | FPU_CSR_RM); + pr_debug("%p gpr[%d]<-enr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + break; + + case FPCREG_FEXR: + if (!cpu_has_mips_r) + break; + value = fcr31 & (FPU_CSR_ALL_X | FPU_CSR_ALL_S); + pr_debug("%p gpr[%d]<-exr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + break; + + case FPCREG_FCCR: + if (!cpu_has_mips_r) + break; + value = (fcr31 >> (FPU_CSR_COND_S - MIPS_FCCR_COND0_S)) & + MIPS_FCCR_COND0; + value |= (fcr31 >> (FPU_CSR_COND1_S - MIPS_FCCR_COND1_S)) & + (MIPS_FCCR_CONDX & ~MIPS_FCCR_COND0); + pr_debug("%p gpr[%d]<-ccr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + break; + + case FPCREG_RID: + value = boot_cpu_data.fpu_id; + break; + + default: + break; + } + + if (MIPSInst_RT(ir)) + xcp->regs[MIPSInst_RT(ir)] = value; +} + +/* + * Emulate a CTC1 instruction. + */ +static inline void cop1_ctc(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + mips_instruction ir) +{ + u32 fcr31 = ctx->fcr31; + u32 value; + u32 mask; + + if (MIPSInst_RT(ir) == 0) + value = 0; + else + value = xcp->regs[MIPSInst_RT(ir)]; + + switch (MIPSInst_RD(ir)) { + case FPCREG_CSR: + pr_debug("%p gpr[%d]->csr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + + /* Preserve read-only bits. */ + mask = boot_cpu_data.fpu_msk31; + fcr31 = (value & ~mask) | (fcr31 & mask); + break; + + case FPCREG_FENR: + if (!cpu_has_mips_r) + break; + pr_debug("%p gpr[%d]->enr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + fcr31 &= ~(FPU_CSR_FS | FPU_CSR_ALL_E | FPU_CSR_RM); + fcr31 |= (value << (FPU_CSR_FS_S - MIPS_FENR_FS_S)) & + FPU_CSR_FS; + fcr31 |= value & (FPU_CSR_ALL_E | FPU_CSR_RM); + break; + + case FPCREG_FEXR: + if (!cpu_has_mips_r) + break; + pr_debug("%p gpr[%d]->exr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + fcr31 &= ~(FPU_CSR_ALL_X | FPU_CSR_ALL_S); + fcr31 |= value & (FPU_CSR_ALL_X | FPU_CSR_ALL_S); + break; + + case FPCREG_FCCR: + if (!cpu_has_mips_r) + break; + pr_debug("%p gpr[%d]->ccr=%08x\n", + (void *)xcp->cp0_epc, MIPSInst_RT(ir), value); + fcr31 &= ~(FPU_CSR_CONDX | FPU_CSR_COND); + fcr31 |= (value << (FPU_CSR_COND_S - MIPS_FCCR_COND0_S)) & + FPU_CSR_COND; + fcr31 |= (value << (FPU_CSR_COND1_S - MIPS_FCCR_COND1_S)) & + FPU_CSR_CONDX; + break; + + default: + break; + } + + ctx->fcr31 = fcr31; +} + +/* + * Emulate the single floating point instruction pointed at by EPC. + * Two instructions if the instruction is in a branch delay slot. + */ + +static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + struct mm_decoded_insn dec_insn, void *__user *fault_addr) +{ + unsigned long contpc = xcp->cp0_epc + dec_insn.pc_inc; + unsigned int cond, cbit; + mips_instruction ir; + int likely, pc_inc; + u32 __user *wva; + u64 __user *dva; + u32 wval; + u64 dval; + int sig; + + /* + * These are giving gcc a gentle hint about what to expect in + * dec_inst in order to do better optimization. + */ + if (!cpu_has_mmips && dec_insn.micro_mips_mode) + unreachable(); + + /* XXX NEC Vr54xx bug workaround */ + if (delay_slot(xcp)) { + if (dec_insn.micro_mips_mode) { + if (!mm_isBranchInstr(xcp, dec_insn, &contpc)) + clear_delay_slot(xcp); + } else { + if (!isBranchInstr(xcp, dec_insn, &contpc)) + clear_delay_slot(xcp); + } + } + + if (delay_slot(xcp)) { + /* + * The instruction to be emulated is in a branch delay slot + * which means that we have to emulate the branch instruction + * BEFORE we do the cop1 instruction. + * + * This branch could be a COP1 branch, but in that case we + * would have had a trap for that instruction, and would not + * come through this route. + * + * Linux MIPS branch emulator operates on context, updating the + * cp0_epc. + */ + ir = dec_insn.next_insn; /* process delay slot instr */ + pc_inc = dec_insn.next_pc_inc; + } else { + ir = dec_insn.insn; /* process current instr */ + pc_inc = dec_insn.pc_inc; + } + + /* + * Since microMIPS FPU instructios are a subset of MIPS32 FPU + * instructions, we want to convert microMIPS FPU instructions + * into MIPS32 instructions so that we could reuse all of the + * FPU emulation code. + * + * NOTE: We cannot do this for branch instructions since they + * are not a subset. Example: Cannot emulate a 16-bit + * aligned target address with a MIPS32 instruction. + */ + if (dec_insn.micro_mips_mode) { + /* + * If next instruction is a 16-bit instruction, then it + * it cannot be a FPU instruction. This could happen + * since we can be called for non-FPU instructions. + */ + if ((pc_inc == 2) || + (microMIPS32_to_MIPS32((union mips_instruction *)&ir) + == SIGILL)) + return SIGILL; + } + +emul: + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, xcp, 0); + MIPS_FPU_EMU_INC_STATS(emulated); + switch (MIPSInst_OPCODE(ir)) { + case ldc1_op: + dva = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] + + MIPSInst_SIMM(ir)); + MIPS_FPU_EMU_INC_STATS(loads); + + if (!access_ok(VERIFY_READ, dva, sizeof(u64))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = dva; + return SIGBUS; + } + if (__get_user(dval, dva)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = dva; + return SIGSEGV; + } + DITOREG(dval, MIPSInst_RT(ir)); + break; + + case sdc1_op: + dva = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] + + MIPSInst_SIMM(ir)); + MIPS_FPU_EMU_INC_STATS(stores); + DIFROMREG(dval, MIPSInst_RT(ir)); + if (!access_ok(VERIFY_WRITE, dva, sizeof(u64))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = dva; + return SIGBUS; + } + if (__put_user(dval, dva)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = dva; + return SIGSEGV; + } + break; + + case lwc1_op: + wva = (u32 __user *) (xcp->regs[MIPSInst_RS(ir)] + + MIPSInst_SIMM(ir)); + MIPS_FPU_EMU_INC_STATS(loads); + if (!access_ok(VERIFY_READ, wva, sizeof(u32))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = wva; + return SIGBUS; + } + if (__get_user(wval, wva)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = wva; + return SIGSEGV; + } + SITOREG(wval, MIPSInst_RT(ir)); + break; + + case swc1_op: + wva = (u32 __user *) (xcp->regs[MIPSInst_RS(ir)] + + MIPSInst_SIMM(ir)); + MIPS_FPU_EMU_INC_STATS(stores); + SIFROMREG(wval, MIPSInst_RT(ir)); + if (!access_ok(VERIFY_WRITE, wva, sizeof(u32))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = wva; + return SIGBUS; + } + if (__put_user(wval, wva)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = wva; + return SIGSEGV; + } + break; + + case cop1_op: + switch (MIPSInst_RS(ir)) { + case dmfc_op: + if (!cpu_has_mips_3_4_5 && !cpu_has_mips64) + return SIGILL; + + /* copregister fs -> gpr[rt] */ + if (MIPSInst_RT(ir) != 0) { + DIFROMREG(xcp->regs[MIPSInst_RT(ir)], + MIPSInst_RD(ir)); + } + break; + + case dmtc_op: + if (!cpu_has_mips_3_4_5 && !cpu_has_mips64) + return SIGILL; + + /* copregister fs <- rt */ + DITOREG(xcp->regs[MIPSInst_RT(ir)], MIPSInst_RD(ir)); + break; + + case mfhc_op: + if (!cpu_has_mips_r2) + goto sigill; + + /* copregister rd -> gpr[rt] */ + if (MIPSInst_RT(ir) != 0) { + SIFROMHREG(xcp->regs[MIPSInst_RT(ir)], + MIPSInst_RD(ir)); + } + break; + + case mthc_op: + if (!cpu_has_mips_r2) + goto sigill; + + /* copregister rd <- gpr[rt] */ + SITOHREG(xcp->regs[MIPSInst_RT(ir)], MIPSInst_RD(ir)); + break; + + case mfc_op: + /* copregister rd -> gpr[rt] */ + if (MIPSInst_RT(ir) != 0) { + SIFROMREG(xcp->regs[MIPSInst_RT(ir)], + MIPSInst_RD(ir)); + } + break; + + case mtc_op: + /* copregister rd <- rt */ + SITOREG(xcp->regs[MIPSInst_RT(ir)], MIPSInst_RD(ir)); + break; + + case cfc_op: + /* cop control register rd -> gpr[rt] */ + cop1_cfc(xcp, ctx, ir); + break; + + case ctc_op: + /* copregister rd <- rt */ + cop1_ctc(xcp, ctx, ir); + if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) { + return SIGFPE; + } + break; + + case bc_op: + if (delay_slot(xcp)) + return SIGILL; + + if (cpu_has_mips_4_5_r) + cbit = fpucondbit[MIPSInst_RT(ir) >> 2]; + else + cbit = FPU_CSR_COND; + cond = ctx->fcr31 & cbit; + + likely = 0; + switch (MIPSInst_RT(ir) & 3) { + case bcfl_op: + if (cpu_has_mips_2_3_4_5_r) + likely = 1; + /* Fall through */ + case bcf_op: + cond = !cond; + break; + case bctl_op: + if (cpu_has_mips_2_3_4_5_r) + likely = 1; + /* Fall through */ + case bct_op: + break; + } + + set_delay_slot(xcp); + if (cond) { + /* + * Branch taken: emulate dslot instruction + */ + unsigned long bcpc; + + /* + * Remember EPC at the branch to point back + * at so that any delay-slot instruction + * signal is not silently ignored. + */ + bcpc = xcp->cp0_epc; + xcp->cp0_epc += dec_insn.pc_inc; + + contpc = MIPSInst_SIMM(ir); + ir = dec_insn.next_insn; + if (dec_insn.micro_mips_mode) { + contpc = (xcp->cp0_epc + (contpc << 1)); + + /* If 16-bit instruction, not FPU. */ + if ((dec_insn.next_pc_inc == 2) || + (microMIPS32_to_MIPS32((union mips_instruction *)&ir) == SIGILL)) { + + /* + * Since this instruction will + * be put on the stack with + * 32-bit words, get around + * this problem by putting a + * NOP16 as the second one. + */ + if (dec_insn.next_pc_inc == 2) + ir = (ir & (~0xffff)) | MM_NOP16; + + /* + * Single step the non-CP1 + * instruction in the dslot. + */ + sig = mips_dsemul(xcp, ir, + contpc); + if (sig) + xcp->cp0_epc = bcpc; + /* + * SIGILL forces out of + * the emulation loop. + */ + return sig ? sig : SIGILL; + } + } else + contpc = (xcp->cp0_epc + (contpc << 2)); + + switch (MIPSInst_OPCODE(ir)) { + case lwc1_op: + case swc1_op: + goto emul; + + case ldc1_op: + case sdc1_op: + if (cpu_has_mips_2_3_4_5_r) + goto emul; + + goto bc_sigill; + + case cop1_op: + goto emul; + + case cop1x_op: + if (cpu_has_mips_4_5_64_r2_r6) + /* its one of ours */ + goto emul; + + goto bc_sigill; + + case spec_op: + switch (MIPSInst_FUNC(ir)) { + case movc_op: + if (cpu_has_mips_4_5_r) + goto emul; + + goto bc_sigill; + } + break; + + bc_sigill: + xcp->cp0_epc = bcpc; + return SIGILL; + } + + /* + * Single step the non-cp1 + * instruction in the dslot + */ + sig = mips_dsemul(xcp, ir, contpc); + if (sig) + xcp->cp0_epc = bcpc; + /* SIGILL forces out of the emulation loop. */ + return sig ? sig : SIGILL; + } else if (likely) { /* branch not taken */ + /* + * branch likely nullifies + * dslot if not taken + */ + xcp->cp0_epc += dec_insn.pc_inc; + contpc += dec_insn.pc_inc; + /* + * else continue & execute + * dslot as normal insn + */ + } + break; + + default: + if (!(MIPSInst_RS(ir) & 0x10)) + return SIGILL; + + /* a real fpu computation instruction */ + if ((sig = fpu_emu(xcp, ctx, ir))) + return sig; + } + break; + + case cop1x_op: + if (!cpu_has_mips_4_5_64_r2_r6) + return SIGILL; + + sig = fpux_emu(xcp, ctx, ir, fault_addr); + if (sig) + return sig; + break; + + case spec_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + if (MIPSInst_FUNC(ir) != movc_op) + return SIGILL; + cond = fpucondbit[MIPSInst_RT(ir) >> 2]; + if (((ctx->fcr31 & cond) != 0) == ((MIPSInst_RT(ir) & 1) != 0)) + xcp->regs[MIPSInst_RD(ir)] = + xcp->regs[MIPSInst_RS(ir)]; + break; + default: +sigill: + return SIGILL; + } + + /* we did it !! */ + xcp->cp0_epc = contpc; + clear_delay_slot(xcp); + + return 0; +} + +/* + * Conversion table from MIPS compare ops 48-63 + * cond = ieee754dp_cmp(x,y,IEEE754_UN,sig); + */ +static const unsigned char cmptab[8] = { + 0, /* cmp_0 (sig) cmp_sf */ + IEEE754_CUN, /* cmp_un (sig) cmp_ngle */ + IEEE754_CEQ, /* cmp_eq (sig) cmp_seq */ + IEEE754_CEQ | IEEE754_CUN, /* cmp_ueq (sig) cmp_ngl */ + IEEE754_CLT, /* cmp_olt (sig) cmp_lt */ + IEEE754_CLT | IEEE754_CUN, /* cmp_ult (sig) cmp_nge */ + IEEE754_CLT | IEEE754_CEQ, /* cmp_ole (sig) cmp_le */ + IEEE754_CLT | IEEE754_CEQ | IEEE754_CUN, /* cmp_ule (sig) cmp_ngt */ +}; + + +/* + * Additional MIPS4 instructions + */ + +#define DEF3OP(name, p, f1, f2, f3) \ +static union ieee754##p fpemu_##p##_##name(union ieee754##p r, \ + union ieee754##p s, union ieee754##p t) \ +{ \ + struct _ieee754_csr ieee754_csr_save; \ + s = f1(s, t); \ + ieee754_csr_save = ieee754_csr; \ + s = f2(s, r); \ + ieee754_csr_save.cx |= ieee754_csr.cx; \ + ieee754_csr_save.sx |= ieee754_csr.sx; \ + s = f3(s); \ + ieee754_csr.cx |= ieee754_csr_save.cx; \ + ieee754_csr.sx |= ieee754_csr_save.sx; \ + return s; \ +} + +static union ieee754dp fpemu_dp_recip(union ieee754dp d) +{ + return ieee754dp_div(ieee754dp_one(0), d); +} + +static union ieee754dp fpemu_dp_rsqrt(union ieee754dp d) +{ + return ieee754dp_div(ieee754dp_one(0), ieee754dp_sqrt(d)); +} + +static union ieee754sp fpemu_sp_recip(union ieee754sp s) +{ + return ieee754sp_div(ieee754sp_one(0), s); +} + +static union ieee754sp fpemu_sp_rsqrt(union ieee754sp s) +{ + return ieee754sp_div(ieee754sp_one(0), ieee754sp_sqrt(s)); +} + +DEF3OP(madd, sp, ieee754sp_mul, ieee754sp_add, ); +DEF3OP(msub, sp, ieee754sp_mul, ieee754sp_sub, ); +DEF3OP(nmadd, sp, ieee754sp_mul, ieee754sp_add, ieee754sp_neg); +DEF3OP(nmsub, sp, ieee754sp_mul, ieee754sp_sub, ieee754sp_neg); +DEF3OP(madd, dp, ieee754dp_mul, ieee754dp_add, ); +DEF3OP(msub, dp, ieee754dp_mul, ieee754dp_sub, ); +DEF3OP(nmadd, dp, ieee754dp_mul, ieee754dp_add, ieee754dp_neg); +DEF3OP(nmsub, dp, ieee754dp_mul, ieee754dp_sub, ieee754dp_neg); + +static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + mips_instruction ir, void *__user *fault_addr) +{ + unsigned rcsr = 0; /* resulting csr */ + + MIPS_FPU_EMU_INC_STATS(cp1xops); + + switch (MIPSInst_FMA_FFMT(ir)) { + case s_fmt:{ /* 0 */ + + union ieee754sp(*handler) (union ieee754sp, union ieee754sp, union ieee754sp); + union ieee754sp fd, fr, fs, ft; + u32 __user *va; + u32 val; + + switch (MIPSInst_FUNC(ir)) { + case lwxc1_op: + va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + + xcp->regs[MIPSInst_FT(ir)]); + + MIPS_FPU_EMU_INC_STATS(loads); + if (!access_ok(VERIFY_READ, va, sizeof(u32))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGBUS; + } + if (__get_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } + SITOREG(val, MIPSInst_FD(ir)); + break; + + case swxc1_op: + va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + + xcp->regs[MIPSInst_FT(ir)]); + + MIPS_FPU_EMU_INC_STATS(stores); + + SIFROMREG(val, MIPSInst_FS(ir)); + if (!access_ok(VERIFY_WRITE, va, sizeof(u32))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGBUS; + } + if (put_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } + break; + + case madd_s_op: + handler = fpemu_sp_madd; + goto scoptop; + case msub_s_op: + handler = fpemu_sp_msub; + goto scoptop; + case nmadd_s_op: + handler = fpemu_sp_nmadd; + goto scoptop; + case nmsub_s_op: + handler = fpemu_sp_nmsub; + goto scoptop; + + scoptop: + SPFROMREG(fr, MIPSInst_FR(ir)); + SPFROMREG(fs, MIPSInst_FS(ir)); + SPFROMREG(ft, MIPSInst_FT(ir)); + fd = (*handler) (fr, fs, ft); + SPTOREG(fd, MIPSInst_FD(ir)); + + copcsr: + if (ieee754_cxtest(IEEE754_INEXACT)) { + MIPS_FPU_EMU_INC_STATS(ieee754_inexact); + rcsr |= FPU_CSR_INE_X | FPU_CSR_INE_S; + } + if (ieee754_cxtest(IEEE754_UNDERFLOW)) { + MIPS_FPU_EMU_INC_STATS(ieee754_underflow); + rcsr |= FPU_CSR_UDF_X | FPU_CSR_UDF_S; + } + if (ieee754_cxtest(IEEE754_OVERFLOW)) { + MIPS_FPU_EMU_INC_STATS(ieee754_overflow); + rcsr |= FPU_CSR_OVF_X | FPU_CSR_OVF_S; + } + if (ieee754_cxtest(IEEE754_INVALID_OPERATION)) { + MIPS_FPU_EMU_INC_STATS(ieee754_invalidop); + rcsr |= FPU_CSR_INV_X | FPU_CSR_INV_S; + } + + ctx->fcr31 = (ctx->fcr31 & ~FPU_CSR_ALL_X) | rcsr; + if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) { + /*printk ("SIGFPE: FPU csr = %08x\n", + ctx->fcr31); */ + return SIGFPE; + } + + break; + + default: + return SIGILL; + } + break; + } + + case d_fmt:{ /* 1 */ + union ieee754dp(*handler) (union ieee754dp, union ieee754dp, union ieee754dp); + union ieee754dp fd, fr, fs, ft; + u64 __user *va; + u64 val; + + switch (MIPSInst_FUNC(ir)) { + case ldxc1_op: + va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + + xcp->regs[MIPSInst_FT(ir)]); + + MIPS_FPU_EMU_INC_STATS(loads); + if (!access_ok(VERIFY_READ, va, sizeof(u64))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGBUS; + } + if (__get_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } + DITOREG(val, MIPSInst_FD(ir)); + break; + + case sdxc1_op: + va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + + xcp->regs[MIPSInst_FT(ir)]); + + MIPS_FPU_EMU_INC_STATS(stores); + DIFROMREG(val, MIPSInst_FS(ir)); + if (!access_ok(VERIFY_WRITE, va, sizeof(u64))) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGBUS; + } + if (__put_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } + break; + + case madd_d_op: + handler = fpemu_dp_madd; + goto dcoptop; + case msub_d_op: + handler = fpemu_dp_msub; + goto dcoptop; + case nmadd_d_op: + handler = fpemu_dp_nmadd; + goto dcoptop; + case nmsub_d_op: + handler = fpemu_dp_nmsub; + goto dcoptop; + + dcoptop: + DPFROMREG(fr, MIPSInst_FR(ir)); + DPFROMREG(fs, MIPSInst_FS(ir)); + DPFROMREG(ft, MIPSInst_FT(ir)); + fd = (*handler) (fr, fs, ft); + DPTOREG(fd, MIPSInst_FD(ir)); + goto copcsr; + + default: + return SIGILL; + } + break; + } + + case 0x3: + if (MIPSInst_FUNC(ir) != pfetch_op) + return SIGILL; + + /* ignore prefx operation */ + break; + + default: + return SIGILL; + } + + return 0; +} + + + +/* + * Emulate a single COP1 arithmetic instruction. + */ +static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + mips_instruction ir) +{ + int rfmt; /* resulting format */ + unsigned rcsr = 0; /* resulting csr */ + unsigned int oldrm; + unsigned int cbit; + unsigned cond; + union { + union ieee754dp d; + union ieee754sp s; + int w; + s64 l; + } rv; /* resulting value */ + u64 bits; + + MIPS_FPU_EMU_INC_STATS(cp1ops); + switch (rfmt = (MIPSInst_FFMT(ir) & 0xf)) { + case s_fmt: { /* 0 */ + union { + union ieee754sp(*b) (union ieee754sp, union ieee754sp); + union ieee754sp(*u) (union ieee754sp); + } handler; + union ieee754sp fs, ft; + + switch (MIPSInst_FUNC(ir)) { + /* binary ops */ + case fadd_op: + handler.b = ieee754sp_add; + goto scopbop; + case fsub_op: + handler.b = ieee754sp_sub; + goto scopbop; + case fmul_op: + handler.b = ieee754sp_mul; + goto scopbop; + case fdiv_op: + handler.b = ieee754sp_div; + goto scopbop; + + /* unary ops */ + case fsqrt_op: + if (!cpu_has_mips_2_3_4_5_r) + return SIGILL; + + handler.u = ieee754sp_sqrt; + goto scopuop; + + /* + * Note that on some MIPS IV implementations such as the + * R5000 and R8000 the FSQRT and FRECIP instructions do not + * achieve full IEEE-754 accuracy - however this emulator does. + */ + case frsqrt_op: + if (!cpu_has_mips_4_5_64_r2_r6) + return SIGILL; + + handler.u = fpemu_sp_rsqrt; + goto scopuop; + + case frecip_op: + if (!cpu_has_mips_4_5_64_r2_r6) + return SIGILL; + + handler.u = fpemu_sp_recip; + goto scopuop; + + case fmovc_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + cond = fpucondbit[MIPSInst_FT(ir) >> 2]; + if (((ctx->fcr31 & cond) != 0) != + ((MIPSInst_FT(ir) & 1) != 0)) + return 0; + SPFROMREG(rv.s, MIPSInst_FS(ir)); + break; + + case fmovz_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + if (xcp->regs[MIPSInst_FT(ir)] != 0) + return 0; + SPFROMREG(rv.s, MIPSInst_FS(ir)); + break; + + case fmovn_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + if (xcp->regs[MIPSInst_FT(ir)] == 0) + return 0; + SPFROMREG(rv.s, MIPSInst_FS(ir)); + break; + + case fabs_op: + handler.u = ieee754sp_abs; + goto scopuop; + + case fneg_op: + handler.u = ieee754sp_neg; + goto scopuop; + + case fmov_op: + /* an easy one */ + SPFROMREG(rv.s, MIPSInst_FS(ir)); + goto copcsr; + + /* binary op on handler */ +scopbop: + SPFROMREG(fs, MIPSInst_FS(ir)); + SPFROMREG(ft, MIPSInst_FT(ir)); + + rv.s = (*handler.b) (fs, ft); + goto copcsr; +scopuop: + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.s = (*handler.u) (fs); + goto copcsr; +copcsr: + if (ieee754_cxtest(IEEE754_INEXACT)) { + MIPS_FPU_EMU_INC_STATS(ieee754_inexact); + rcsr |= FPU_CSR_INE_X | FPU_CSR_INE_S; + } + if (ieee754_cxtest(IEEE754_UNDERFLOW)) { + MIPS_FPU_EMU_INC_STATS(ieee754_underflow); + rcsr |= FPU_CSR_UDF_X | FPU_CSR_UDF_S; + } + if (ieee754_cxtest(IEEE754_OVERFLOW)) { + MIPS_FPU_EMU_INC_STATS(ieee754_overflow); + rcsr |= FPU_CSR_OVF_X | FPU_CSR_OVF_S; + } + if (ieee754_cxtest(IEEE754_ZERO_DIVIDE)) { + MIPS_FPU_EMU_INC_STATS(ieee754_zerodiv); + rcsr |= FPU_CSR_DIV_X | FPU_CSR_DIV_S; + } + if (ieee754_cxtest(IEEE754_INVALID_OPERATION)) { + MIPS_FPU_EMU_INC_STATS(ieee754_invalidop); + rcsr |= FPU_CSR_INV_X | FPU_CSR_INV_S; + } + break; + + /* unary conv ops */ + case fcvts_op: + return SIGILL; /* not defined */ + + case fcvtd_op: + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.d = ieee754dp_fsp(fs); + rfmt = d_fmt; + goto copcsr; + + case fcvtw_op: + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.w = ieee754sp_tint(fs); + rfmt = w_fmt; + goto copcsr; + + case fround_op: + case ftrunc_op: + case fceil_op: + case ffloor_op: + if (!cpu_has_mips_2_3_4_5_r) + return SIGILL; + + oldrm = ieee754_csr.rm; + SPFROMREG(fs, MIPSInst_FS(ir)); + ieee754_csr.rm = MIPSInst_FUNC(ir); + rv.w = ieee754sp_tint(fs); + ieee754_csr.rm = oldrm; + rfmt = w_fmt; + goto copcsr; + + case fcvtl_op: + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.l = ieee754sp_tlong(fs); + rfmt = l_fmt; + goto copcsr; + + case froundl_op: + case ftruncl_op: + case fceill_op: + case ffloorl_op: + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + oldrm = ieee754_csr.rm; + SPFROMREG(fs, MIPSInst_FS(ir)); + ieee754_csr.rm = MIPSInst_FUNC(ir); + rv.l = ieee754sp_tlong(fs); + ieee754_csr.rm = oldrm; + rfmt = l_fmt; + goto copcsr; + + default: + if (MIPSInst_FUNC(ir) >= fcmp_op) { + unsigned cmpop = MIPSInst_FUNC(ir) - fcmp_op; + union ieee754sp fs, ft; + + SPFROMREG(fs, MIPSInst_FS(ir)); + SPFROMREG(ft, MIPSInst_FT(ir)); + rv.w = ieee754sp_cmp(fs, ft, + cmptab[cmpop & 0x7], cmpop & 0x8); + rfmt = -1; + if ((cmpop & 0x8) && ieee754_cxtest + (IEEE754_INVALID_OPERATION)) + rcsr = FPU_CSR_INV_X | FPU_CSR_INV_S; + else + goto copcsr; + + } else + return SIGILL; + break; + } + break; + } + + case d_fmt: { + union ieee754dp fs, ft; + union { + union ieee754dp(*b) (union ieee754dp, union ieee754dp); + union ieee754dp(*u) (union ieee754dp); + } handler; + + switch (MIPSInst_FUNC(ir)) { + /* binary ops */ + case fadd_op: + handler.b = ieee754dp_add; + goto dcopbop; + case fsub_op: + handler.b = ieee754dp_sub; + goto dcopbop; + case fmul_op: + handler.b = ieee754dp_mul; + goto dcopbop; + case fdiv_op: + handler.b = ieee754dp_div; + goto dcopbop; + + /* unary ops */ + case fsqrt_op: + if (!cpu_has_mips_2_3_4_5_r) + return SIGILL; + + handler.u = ieee754dp_sqrt; + goto dcopuop; + /* + * Note that on some MIPS IV implementations such as the + * R5000 and R8000 the FSQRT and FRECIP instructions do not + * achieve full IEEE-754 accuracy - however this emulator does. + */ + case frsqrt_op: + if (!cpu_has_mips_4_5_64_r2_r6) + return SIGILL; + + handler.u = fpemu_dp_rsqrt; + goto dcopuop; + case frecip_op: + if (!cpu_has_mips_4_5_64_r2_r6) + return SIGILL; + + handler.u = fpemu_dp_recip; + goto dcopuop; + case fmovc_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + cond = fpucondbit[MIPSInst_FT(ir) >> 2]; + if (((ctx->fcr31 & cond) != 0) != + ((MIPSInst_FT(ir) & 1) != 0)) + return 0; + DPFROMREG(rv.d, MIPSInst_FS(ir)); + break; + case fmovz_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + if (xcp->regs[MIPSInst_FT(ir)] != 0) + return 0; + DPFROMREG(rv.d, MIPSInst_FS(ir)); + break; + case fmovn_op: + if (!cpu_has_mips_4_5_r) + return SIGILL; + + if (xcp->regs[MIPSInst_FT(ir)] == 0) + return 0; + DPFROMREG(rv.d, MIPSInst_FS(ir)); + break; + case fabs_op: + handler.u = ieee754dp_abs; + goto dcopuop; + + case fneg_op: + handler.u = ieee754dp_neg; + goto dcopuop; + + case fmov_op: + /* an easy one */ + DPFROMREG(rv.d, MIPSInst_FS(ir)); + goto copcsr; + + /* binary op on handler */ +dcopbop: + DPFROMREG(fs, MIPSInst_FS(ir)); + DPFROMREG(ft, MIPSInst_FT(ir)); + + rv.d = (*handler.b) (fs, ft); + goto copcsr; +dcopuop: + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.d = (*handler.u) (fs); + goto copcsr; + + /* + * unary conv ops + */ + case fcvts_op: + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.s = ieee754sp_fdp(fs); + rfmt = s_fmt; + goto copcsr; + + case fcvtd_op: + return SIGILL; /* not defined */ + + case fcvtw_op: + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.w = ieee754dp_tint(fs); /* wrong */ + rfmt = w_fmt; + goto copcsr; + + case fround_op: + case ftrunc_op: + case fceil_op: + case ffloor_op: + if (!cpu_has_mips_2_3_4_5_r) + return SIGILL; + + oldrm = ieee754_csr.rm; + DPFROMREG(fs, MIPSInst_FS(ir)); + ieee754_csr.rm = MIPSInst_FUNC(ir); + rv.w = ieee754dp_tint(fs); + ieee754_csr.rm = oldrm; + rfmt = w_fmt; + goto copcsr; + + case fcvtl_op: + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + DPFROMREG(fs, MIPSInst_FS(ir)); + rv.l = ieee754dp_tlong(fs); + rfmt = l_fmt; + goto copcsr; + + case froundl_op: + case ftruncl_op: + case fceill_op: + case ffloorl_op: + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + oldrm = ieee754_csr.rm; + DPFROMREG(fs, MIPSInst_FS(ir)); + ieee754_csr.rm = MIPSInst_FUNC(ir); + rv.l = ieee754dp_tlong(fs); + ieee754_csr.rm = oldrm; + rfmt = l_fmt; + goto copcsr; + + default: + if (MIPSInst_FUNC(ir) >= fcmp_op) { + unsigned cmpop = MIPSInst_FUNC(ir) - fcmp_op; + union ieee754dp fs, ft; + + DPFROMREG(fs, MIPSInst_FS(ir)); + DPFROMREG(ft, MIPSInst_FT(ir)); + rv.w = ieee754dp_cmp(fs, ft, + cmptab[cmpop & 0x7], cmpop & 0x8); + rfmt = -1; + if ((cmpop & 0x8) + && + ieee754_cxtest + (IEEE754_INVALID_OPERATION)) + rcsr = FPU_CSR_INV_X | FPU_CSR_INV_S; + else + goto copcsr; + + } + else { + return SIGILL; + } + break; + } + break; + + case w_fmt: + switch (MIPSInst_FUNC(ir)) { + case fcvts_op: + /* convert word to single precision real */ + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.s = ieee754sp_fint(fs.bits); + rfmt = s_fmt; + goto copcsr; + case fcvtd_op: + /* convert word to double precision real */ + SPFROMREG(fs, MIPSInst_FS(ir)); + rv.d = ieee754dp_fint(fs.bits); + rfmt = d_fmt; + goto copcsr; + default: + return SIGILL; + } + break; + } + + case l_fmt: + + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + DIFROMREG(bits, MIPSInst_FS(ir)); + + switch (MIPSInst_FUNC(ir)) { + case fcvts_op: + /* convert long to single precision real */ + rv.s = ieee754sp_flong(bits); + rfmt = s_fmt; + goto copcsr; + case fcvtd_op: + /* convert long to double precision real */ + rv.d = ieee754dp_flong(bits); + rfmt = d_fmt; + goto copcsr; + default: + return SIGILL; + } + break; + + default: + return SIGILL; + } + + /* + * Update the fpu CSR register for this operation. + * If an exception is required, generate a tidy SIGFPE exception, + * without updating the result register. + * Note: cause exception bits do not accumulate, they are rewritten + * for each op; only the flag/sticky bits accumulate. + */ + ctx->fcr31 = (ctx->fcr31 & ~FPU_CSR_ALL_X) | rcsr; + if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) { + /*printk ("SIGFPE: FPU csr = %08x\n",ctx->fcr31); */ + return SIGFPE; + } + + /* + * Now we can safely write the result back to the register file. + */ + switch (rfmt) { + case -1: + + if (cpu_has_mips_4_5_r) + cbit = fpucondbit[MIPSInst_FD(ir) >> 2]; + else + cbit = FPU_CSR_COND; + if (rv.w) + ctx->fcr31 |= cbit; + else + ctx->fcr31 &= ~cbit; + break; + + case d_fmt: + DPTOREG(rv.d, MIPSInst_FD(ir)); + break; + case s_fmt: + SPTOREG(rv.s, MIPSInst_FD(ir)); + break; + case w_fmt: + SITOREG(rv.w, MIPSInst_FD(ir)); + break; + case l_fmt: + if (!cpu_has_mips_3_4_5_64_r2_r6) + return SIGILL; + + DITOREG(rv.l, MIPSInst_FD(ir)); + break; + default: + return SIGILL; + } + + return 0; +} + +int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + int has_fpu, void *__user *fault_addr) +{ + unsigned long oldepc, prevepc; + struct mm_decoded_insn dec_insn; + u16 instr[4]; + u16 *instr_ptr; + int sig = 0; + + oldepc = xcp->cp0_epc; + do { + prevepc = xcp->cp0_epc; + + if (get_isa16_mode(prevepc) && cpu_has_mmips) { + /* + * Get next 2 microMIPS instructions and convert them + * into 32-bit instructions. + */ + if ((get_user(instr[0], (u16 __user *)msk_isa16_mode(xcp->cp0_epc))) || + (get_user(instr[1], (u16 __user *)msk_isa16_mode(xcp->cp0_epc + 2))) || + (get_user(instr[2], (u16 __user *)msk_isa16_mode(xcp->cp0_epc + 4))) || + (get_user(instr[3], (u16 __user *)msk_isa16_mode(xcp->cp0_epc + 6)))) { + MIPS_FPU_EMU_INC_STATS(errors); + return SIGBUS; + } + instr_ptr = instr; + + /* Get first instruction. */ + if (mm_insn_16bit(*instr_ptr)) { + /* Duplicate the half-word. */ + dec_insn.insn = (*instr_ptr << 16) | + (*instr_ptr); + /* 16-bit instruction. */ + dec_insn.pc_inc = 2; + instr_ptr += 1; + } else { + dec_insn.insn = (*instr_ptr << 16) | + *(instr_ptr+1); + /* 32-bit instruction. */ + dec_insn.pc_inc = 4; + instr_ptr += 2; + } + /* Get second instruction. */ + if (mm_insn_16bit(*instr_ptr)) { + /* Duplicate the half-word. */ + dec_insn.next_insn = (*instr_ptr << 16) | + (*instr_ptr); + /* 16-bit instruction. */ + dec_insn.next_pc_inc = 2; + } else { + dec_insn.next_insn = (*instr_ptr << 16) | + *(instr_ptr+1); + /* 32-bit instruction. */ + dec_insn.next_pc_inc = 4; + } + dec_insn.micro_mips_mode = 1; + } else { + if ((get_user(dec_insn.insn, + (mips_instruction __user *) xcp->cp0_epc)) || + (get_user(dec_insn.next_insn, + (mips_instruction __user *)(xcp->cp0_epc+4)))) { + MIPS_FPU_EMU_INC_STATS(errors); + return SIGBUS; + } + dec_insn.pc_inc = 4; + dec_insn.next_pc_inc = 4; + dec_insn.micro_mips_mode = 0; + } + + if ((dec_insn.insn == 0) || + ((dec_insn.pc_inc == 2) && + ((dec_insn.insn & 0xffff) == MM_NOP16))) + xcp->cp0_epc += dec_insn.pc_inc; /* Skip NOPs */ + else { + /* + * The 'ieee754_csr' is an alias of ctx->fcr31. + * No need to copy ctx->fcr31 to ieee754_csr. + */ + sig = cop1Emulate(xcp, ctx, dec_insn, fault_addr); + } + + if (has_fpu) + break; + if (sig) + break; + + cond_resched(); + } while (xcp->cp0_epc > prevepc); + + /* SIGILL indicates a non-fpu instruction */ + if (sig == SIGILL && xcp->cp0_epc != oldepc) + /* but if EPC has advanced, then ignore it */ + sig = 0; + + return sig; +} diff --git a/kernel/arch/mips/math-emu/dp_add.c b/kernel/arch/mips/math-emu/dp_add.c new file mode 100644 index 000000000..8954ef031 --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_add.c @@ -0,0 +1,179 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y) +{ + int s; + + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + ieee754_clearcx(); + + FLUSHXDP; + FLUSHYDP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if (xs == ys) + return x; + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + return y; + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return x; + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + if (xs == ys) + return x; + else + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return y; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + + /* FALL THROUGH */ + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + DPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + /* + * Provide guard,round and stick bit space. + */ + xm <<= 3; + ym <<= 3; + + if (xe > ye) { + /* + * Have to shift y fraction right to align. + */ + s = xe - ye; + ym = XDPSRS(ym, s); + ye += s; + } else if (ye > xe) { + /* + * Have to shift x fraction right to align. + */ + s = ye - xe; + xm = XDPSRS(xm, s); + xe += s; + } + assert(xe == ye); + assert(xe <= DP_EMAX); + + if (xs == ys) { + /* + * Generate 28 bit result of adding two 27 bit numbers + * leaving result in xm, xs and xe. + */ + xm = xm + ym; + + if (xm >> (DP_FBITS + 1 + 3)) { /* carry out */ + xm = XDPSRS1(xm); + xe++; + } + } else { + if (xm >= ym) { + xm = xm - ym; + } else { + xm = ym - xm; + xs = ys; + } + if (xm == 0) + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); + + /* + * Normalize to rounding precision. + */ + while ((xm >> (DP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + + return ieee754dp_format(xs, xe, xm); +} diff --git a/kernel/arch/mips/math-emu/dp_cmp.c b/kernel/arch/mips/math-emu/dp_cmp.c new file mode 100644 index 000000000..a29880e29 --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_cmp.c @@ -0,0 +1,59 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754dp.h" + +int ieee754dp_cmp(union ieee754dp x, union ieee754dp y, int cmp, int sig) +{ + s64 vx; + s64 vy; + + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + FLUSHXDP; + FLUSHYDP; + ieee754_clearcx(); /* Even clear inexact flag here */ + + if (ieee754_class_nan(xc) || ieee754_class_nan(yc)) { + if (sig || + xc == IEEE754_CLASS_SNAN || yc == IEEE754_CLASS_SNAN) + ieee754_setcx(IEEE754_INVALID_OPERATION); + return (cmp & IEEE754_CUN) != 0; + } else { + vx = x.bits; + vy = y.bits; + + if (vx < 0) + vx = -vx ^ DP_SIGN_BIT; + if (vy < 0) + vy = -vy ^ DP_SIGN_BIT; + + if (vx < vy) + return (cmp & IEEE754_CLT) != 0; + else if (vx == vy) + return (cmp & IEEE754_CEQ) != 0; + else + return (cmp & IEEE754_CGT) != 0; + } +} diff --git a/kernel/arch/mips/math-emu/dp_div.c b/kernel/arch/mips/math-emu/dp_div.c new file mode 100644 index 000000000..f4746f7c5 --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_div.c @@ -0,0 +1,155 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y) +{ + u64 rm; + int re; + u64 bm; + + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + ieee754_clearcx(); + + FLUSHXDP; + FLUSHYDP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + return ieee754dp_zero(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return ieee754dp_inf(xs ^ ys); + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + ieee754_setcx(IEEE754_ZERO_DIVIDE); + return ieee754dp_inf(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return ieee754dp_zero(xs == ys ? 0 : 1); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + DPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + /* provide rounding space */ + xm <<= 3; + ym <<= 3; + + /* now the dirty work */ + + rm = 0; + re = xe - ye; + + for (bm = DP_MBIT(DP_FBITS + 2); bm; bm >>= 1) { + if (xm >= ym) { + xm -= ym; + rm |= bm; + if (xm == 0) + break; + } + xm <<= 1; + } + + rm <<= 1; + if (xm) + rm |= 1; /* have remainder, set sticky */ + + assert(rm); + + /* + * Normalise rm to rounding precision ? + */ + while ((rm >> (DP_FBITS + 3)) == 0) { + rm <<= 1; + re--; + } + + return ieee754dp_format(xs == ys ? 0 : 1, re, rm); +} diff --git a/kernel/arch/mips/math-emu/dp_fint.c b/kernel/arch/mips/math-emu/dp_fint.c new file mode 100644 index 000000000..10258f0af --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_fint.c @@ -0,0 +1,56 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_fint(int x) +{ + u64 xm; + int xe; + int xs; + + ieee754_clearcx(); + + if (x == 0) + return ieee754dp_zero(0); + if (x == 1 || x == -1) + return ieee754dp_one(x < 0); + if (x == 10 || x == -10) + return ieee754dp_ten(x < 0); + + xs = (x < 0); + if (xs) { + if (x == (1 << 31)) + xm = ((unsigned) 1 << 31); /* max neg can't be safely negated */ + else + xm = -x; + } else { + xm = x; + } + + /* normalize - result can never be inexact or overflow */ + xe = DP_FBITS; + while ((xm >> DP_FBITS) == 0) { + xm <<= 1; + xe--; + } + return builddp(xs, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); +} diff --git a/kernel/arch/mips/math-emu/dp_flong.c b/kernel/arch/mips/math-emu/dp_flong.c new file mode 100644 index 000000000..a267c2e39 --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_flong.c @@ -0,0 +1,65 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_flong(s64 x) +{ + u64 xm; + int xe; + int xs; + + ieee754_clearcx(); + + if (x == 0) + return ieee754dp_zero(0); + if (x == 1 || x == -1) + return ieee754dp_one(x < 0); + if (x == 10 || x == -10) + return ieee754dp_ten(x < 0); + + xs = (x < 0); + if (xs) { + if (x == (1ULL << 63)) + xm = (1ULL << 63); /* max neg can't be safely negated */ + else + xm = -x; + } else { + xm = x; + } + + /* normalize */ + xe = DP_FBITS + 3; + if (xm >> (DP_FBITS + 1 + 3)) { + /* shunt out overflow bits */ + while (xm >> (DP_FBITS + 1 + 3)) { + XDPSRSX1(); + } + } else { + /* normalize in grs extended double precision */ + while ((xm >> (DP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + + return ieee754dp_format(xs, xe, xm); +} diff --git a/kernel/arch/mips/math-emu/dp_fsp.c b/kernel/arch/mips/math-emu/dp_fsp.c new file mode 100644 index 000000000..57d09ca54 --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_fsp.c @@ -0,0 +1,75 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" +#include "ieee754dp.h" + +static inline union ieee754dp ieee754dp_nan_fsp(int xs, u64 xm) +{ + return builddp(xs, DP_EMAX + 1 + DP_EBIAS, + xm << (DP_FBITS - SP_FBITS)); +} + +union ieee754dp ieee754dp_fsp(union ieee754sp x) +{ + COMPXSP; + + EXPLODEXSP; + + ieee754_clearcx(); + + FLUSHXSP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + return ieee754dp_nanxcpt(ieee754dp_nan_fsp(xs, xm)); + + case IEEE754_CLASS_QNAN: + return ieee754dp_nan_fsp(xs, xm); + + case IEEE754_CLASS_INF: + return ieee754dp_inf(xs); + + case IEEE754_CLASS_ZERO: + return ieee754dp_zero(xs); + + case IEEE754_CLASS_DNORM: + /* normalize */ + while ((xm >> SP_FBITS) == 0) { + xm <<= 1; + xe--; + } + break; + + case IEEE754_CLASS_NORM: + break; + } + + /* + * Can't possibly overflow,underflow, or need rounding + */ + + /* drop the hidden bit */ + xm &= ~SP_HIDDEN_BIT; + + return builddp(xs, xe + DP_EBIAS, + (u64) xm << (DP_FBITS - SP_FBITS)); +} diff --git a/kernel/arch/mips/math-emu/dp_mul.c b/kernel/arch/mips/math-emu/dp_mul.c new file mode 100644 index 000000000..d0901f03f --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_mul.c @@ -0,0 +1,174 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) +{ + int re; + int rs; + u64 rm; + unsigned lxm; + unsigned hxm; + unsigned lym; + unsigned hym; + u64 lrm; + u64 hrm; + u64 t; + u64 at; + + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + ieee754_clearcx(); + + FLUSHXDP; + FLUSHYDP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754dp_inf(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return ieee754dp_zero(xs ^ ys); + + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + DPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + /* rm = xm * ym, re = xe+ye basically */ + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + re = xe + ye; + rs = xs ^ ys; + + /* shunt to top of word */ + xm <<= 64 - (DP_FBITS + 1); + ym <<= 64 - (DP_FBITS + 1); + + /* + * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. + */ + + /* 32 * 32 => 64 */ +#define DPXMULT(x, y) ((u64)(x) * (u64)y) + + lxm = xm; + hxm = xm >> 32; + lym = ym; + hym = ym >> 32; + + lrm = DPXMULT(lxm, lym); + hrm = DPXMULT(hxm, hym); + + t = DPXMULT(lxm, hym); + + at = lrm + (t << 32); + hrm += at < lrm; + lrm = at; + + hrm = hrm + (t >> 32); + + t = DPXMULT(hxm, lym); + + at = lrm + (t << 32); + hrm += at < lrm; + lrm = at; + + hrm = hrm + (t >> 32); + + rm = hrm | (lrm != 0); + + /* + * Sticky shift down to normal rounding precision. + */ + if ((s64) rm < 0) { + rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | + ((rm << (DP_FBITS + 1 + 3)) != 0); + re++; + } else { + rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | + ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); + } + assert(rm & (DP_HIDDEN_BIT << 3)); + + return ieee754dp_format(rs, re, rm); +} diff --git a/kernel/arch/mips/math-emu/dp_simple.c b/kernel/arch/mips/math-emu/dp_simple.c new file mode 100644 index 000000000..926d56bf3 --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_simple.c @@ -0,0 +1,49 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_neg(union ieee754dp x) +{ + unsigned int oldrm; + union ieee754dp y; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + y = ieee754dp_sub(ieee754dp_zero(0), x); + ieee754_csr.rm = oldrm; + return y; +} + +union ieee754dp ieee754dp_abs(union ieee754dp x) +{ + unsigned int oldrm; + union ieee754dp y; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + if (DPSIGN(x)) + y = ieee754dp_sub(ieee754dp_zero(0), x); + else + y = ieee754dp_add(ieee754dp_zero(0), x); + ieee754_csr.rm = oldrm; + return y; +} diff --git a/kernel/arch/mips/math-emu/dp_sqrt.c b/kernel/arch/mips/math-emu/dp_sqrt.c new file mode 100644 index 000000000..cd5bc0830 --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_sqrt.c @@ -0,0 +1,163 @@ +/* IEEE754 floating point arithmetic + * double precision square root + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754dp.h" + +static const unsigned table[] = { + 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, + 29598, 36145, 43202, 50740, 58733, 67158, 75992, + 85215, 83599, 71378, 60428, 50647, 41945, 34246, + 27478, 21581, 16499, 12183, 8588, 5674, 3403, + 1742, 661, 130 +}; + +union ieee754dp ieee754dp_sqrt(union ieee754dp x) +{ + struct _ieee754_csr oldcsr; + union ieee754dp y, z, t; + unsigned scalx, yh; + COMPXDP; + + EXPLODEXDP; + ieee754_clearcx(); + FLUSHXDP; + + /* x == INF or NAN? */ + switch (xc) { + case IEEE754_CLASS_SNAN: + return ieee754dp_nanxcpt(x); + + case IEEE754_CLASS_QNAN: + /* sqrt(Nan) = Nan */ + return x; + + case IEEE754_CLASS_ZERO: + /* sqrt(0) = 0 */ + return x; + + case IEEE754_CLASS_INF: + if (xs) { + /* sqrt(-Inf) = Nan */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + } + /* sqrt(+Inf) = Inf */ + return x; + + case IEEE754_CLASS_DNORM: + DPDNORMX; + /* fall through */ + + case IEEE754_CLASS_NORM: + if (xs) { + /* sqrt(-x) = Nan */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + } + break; + } + + /* save old csr; switch off INX enable & flag; set RN rounding */ + oldcsr = ieee754_csr; + ieee754_csr.mx &= ~IEEE754_INEXACT; + ieee754_csr.sx &= ~IEEE754_INEXACT; + ieee754_csr.rm = FPU_CSR_RN; + + /* adjust exponent to prevent overflow */ + scalx = 0; + if (xe > 512) { /* x > 2**-512? */ + xe -= 512; /* x = x / 2**512 */ + scalx += 256; + } else if (xe < -512) { /* x < 2**-512? */ + xe += 512; /* x = x * 2**512 */ + scalx -= 256; + } + + y = x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); + + /* magic initial approximation to almost 8 sig. bits */ + yh = y.bits >> 32; + yh = (yh >> 1) + 0x1ff80000; + yh = yh - table[(yh >> 15) & 31]; + y.bits = ((u64) yh << 32) | (y.bits & 0xffffffff); + + /* Heron's rule once with correction to improve to ~18 sig. bits */ + /* t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; */ + t = ieee754dp_div(x, y); + y = ieee754dp_add(y, t); + y.bits -= 0x0010000600000000LL; + y.bits &= 0xffffffff00000000LL; + + /* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */ + /* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */ + z = t = ieee754dp_mul(y, y); + t.bexp += 0x001; + t = ieee754dp_add(t, z); + z = ieee754dp_mul(ieee754dp_sub(x, z), y); + + /* t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; */ + t = ieee754dp_div(z, ieee754dp_add(t, x)); + t.bexp += 0x001; + y = ieee754dp_add(y, t); + + /* twiddle last bit to force y correctly rounded */ + + /* set RZ, clear INEX flag */ + ieee754_csr.rm = FPU_CSR_RZ; + ieee754_csr.sx &= ~IEEE754_INEXACT; + + /* t=x/y; ...chopped quotient, possibly inexact */ + t = ieee754dp_div(x, y); + + if (ieee754_csr.sx & IEEE754_INEXACT || t.bits != y.bits) { + + if (!(ieee754_csr.sx & IEEE754_INEXACT)) + /* t = t-ulp */ + t.bits -= 1; + + /* add inexact to result status */ + oldcsr.cx |= IEEE754_INEXACT; + oldcsr.sx |= IEEE754_INEXACT; + + switch (oldcsr.rm) { + case FPU_CSR_RU: + y.bits += 1; + /* drop through */ + case FPU_CSR_RN: + t.bits += 1; + break; + } + + /* y=y+t; ...chopped sum */ + y = ieee754dp_add(y, t); + + /* adjust scalx for correctly rounded sqrt(x) */ + scalx -= 1; + } + + /* py[n0]=py[n0]+scalx; ...scale back y */ + y.bexp += scalx; + + /* restore rounding mode, possibly set inexact */ + ieee754_csr = oldcsr; + + return y; +} diff --git a/kernel/arch/mips/math-emu/dp_sub.c b/kernel/arch/mips/math-emu/dp_sub.c new file mode 100644 index 000000000..fc17a781b --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_sub.c @@ -0,0 +1,185 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y) +{ + int s; + + COMPXDP; + COMPYDP; + + EXPLODEXDP; + EXPLODEYDP; + + ieee754_clearcx(); + + FLUSHXDP; + FLUSHYDP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754dp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754dp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if (xs != ys) + return x; + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + return ieee754dp_inf(ys ^ 1); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return x; + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + if (xs != ys) + return x; + else + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + /* quick fix up */ + DPSIGN(y) ^= 1; + return y; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + DPDNORMX; + /* FALL THROUGH */ + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + /* normalize ym,ye */ + DPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + /* normalize xm,xe */ + DPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + /* flip sign of y and handle as add */ + ys ^= 1; + + assert(xm & DP_HIDDEN_BIT); + assert(ym & DP_HIDDEN_BIT); + + + /* provide guard,round and stick bit dpace */ + xm <<= 3; + ym <<= 3; + + if (xe > ye) { + /* + * Have to shift y fraction right to align + */ + s = xe - ye; + ym = XDPSRS(ym, s); + ye += s; + } else if (ye > xe) { + /* + * Have to shift x fraction right to align + */ + s = ye - xe; + xm = XDPSRS(xm, s); + xe += s; + } + assert(xe == ye); + assert(xe <= DP_EMAX); + + if (xs == ys) { + /* generate 28 bit result of adding two 27 bit numbers + */ + xm = xm + ym; + + if (xm >> (DP_FBITS + 1 + 3)) { /* carry out */ + xm = XDPSRS1(xm); /* shift preserving sticky */ + xe++; + } + } else { + if (xm >= ym) { + xm = xm - ym; + } else { + xm = ym - xm; + xs = ys; + } + if (xm == 0) { + if (ieee754_csr.rm == FPU_CSR_RD) + return ieee754dp_zero(1); /* round negative inf. => sign = -1 */ + else + return ieee754dp_zero(0); /* other round modes => sign = 1 */ + } + + /* normalize to rounding precision + */ + while ((xm >> (DP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + + return ieee754dp_format(xs, xe, xm); +} diff --git a/kernel/arch/mips/math-emu/dp_tint.c b/kernel/arch/mips/math-emu/dp_tint.c new file mode 100644 index 000000000..6ffc336c5 --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_tint.c @@ -0,0 +1,105 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754dp.h" + +int ieee754dp_tint(union ieee754dp x) +{ + u64 residue; + int round; + int sticky; + int odd; + + COMPXDP; + + ieee754_clearcx(); + + EXPLODEXDP; + FLUSHXDP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + case IEEE754_CLASS_QNAN: + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_indef(); + + case IEEE754_CLASS_ZERO: + return 0; + + case IEEE754_CLASS_DNORM: + case IEEE754_CLASS_NORM: + break; + } + if (xe > 31) { + /* Set invalid. We will only use overflow for floating + point overflow */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_indef(); + } + /* oh gawd */ + if (xe > DP_FBITS) { + xm <<= xe - DP_FBITS; + } else if (xe < DP_FBITS) { + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (64 - DP_FBITS + xe); + round = (residue >> 63) != 0; + sticky = (residue << 1) != 0; + xm >>= DP_FBITS - xe; + } + /* Note: At this point upper 32 bits of xm are guaranteed + to be zero */ + odd = (xm & 0x1) != 0x0; + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: + break; + case FPU_CSR_RU: /* toward +Infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + /* look for valid corner case 0x80000000 */ + if ((xm >> 31) != 0 && (xs == 0 || xm != 0x80000000)) { + /* This can happen after rounding */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_indef(); + } + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + } + if (xs) + return -xm; + else + return xm; +} diff --git a/kernel/arch/mips/math-emu/dp_tlong.c b/kernel/arch/mips/math-emu/dp_tlong.c new file mode 100644 index 000000000..9cdc145b7 --- /dev/null +++ b/kernel/arch/mips/math-emu/dp_tlong.c @@ -0,0 +1,109 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754dp.h" + +s64 ieee754dp_tlong(union ieee754dp x) +{ + u64 residue; + int round; + int sticky; + int odd; + + COMPXDP; + + ieee754_clearcx(); + + EXPLODEXDP; + FLUSHXDP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + case IEEE754_CLASS_QNAN: + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_indef(); + + case IEEE754_CLASS_ZERO: + return 0; + + case IEEE754_CLASS_DNORM: + case IEEE754_CLASS_NORM: + break; + } + if (xe >= 63) { + /* look for valid corner case */ + if (xe == 63 && xs && xm == DP_HIDDEN_BIT) + return -0x8000000000000000LL; + /* Set invalid. We will only use overflow for floating + point overflow */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_indef(); + } + /* oh gawd */ + if (xe > DP_FBITS) { + xm <<= xe - DP_FBITS; + } else if (xe < DP_FBITS) { + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + /* Shifting a u64 64 times does not work, + * so we do it in two steps. Be aware that xe + * may be -1 */ + residue = xm << (xe + 1); + residue <<= 63 - DP_FBITS; + round = (residue >> 63) != 0; + sticky = (residue << 1) != 0; + xm >>= DP_FBITS - xe; + } + odd = (xm & 0x1) != 0x0; + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: + break; + case FPU_CSR_RU: /* toward +Infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + if ((xm >> 63) != 0) { + /* This can happen after rounding */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_indef(); + } + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + } + if (xs) + return -xm; + else + return xm; +} diff --git a/kernel/arch/mips/math-emu/dsemul.c b/kernel/arch/mips/math-emu/dsemul.c new file mode 100644 index 000000000..e0b5cc27d --- /dev/null +++ b/kernel/arch/mips/math-emu/dsemul.c @@ -0,0 +1,163 @@ +#include <asm/branch.h> +#include <asm/cacheflush.h> +#include <asm/fpu_emulator.h> +#include <asm/inst.h> +#include <asm/mipsregs.h> +#include <asm/uaccess.h> + +#include "ieee754.h" + +/* + * Emulate the arbritrary instruction ir at xcp->cp0_epc. Required when + * we have to emulate the instruction in a COP1 branch delay slot. Do + * not change cp0_epc due to the instruction + * + * According to the spec: + * 1) it shouldn't be a branch :-) + * 2) it can be a COP instruction :-( + * 3) if we are tring to run a protected memory space we must take + * special care on memory access instructions :-( + */ + +/* + * "Trampoline" return routine to catch exception following + * execution of delay-slot instruction execution. + */ + +struct emuframe { + mips_instruction emul; + mips_instruction badinst; + mips_instruction cookie; + unsigned long epc; +}; + +int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc) +{ + extern asmlinkage void handle_dsemulret(void); + struct emuframe __user *fr; + int err; + + if ((get_isa16_mode(regs->cp0_epc) && ((ir >> 16) == MM_NOP16)) || + (ir == 0)) { + /* NOP is easy */ + regs->cp0_epc = cpc; + clear_delay_slot(regs); + return 0; + } + + pr_debug("dsemul %lx %lx\n", regs->cp0_epc, cpc); + + /* + * The strategy is to push the instruction onto the user stack + * and put a trap after it which we can catch and jump to + * the required address any alternative apart from full + * instruction emulation!!. + * + * Algorithmics used a system call instruction, and + * borrowed that vector. MIPS/Linux version is a bit + * more heavyweight in the interests of portability and + * multiprocessor support. For Linux we generate a + * an unaligned access and force an address error exception. + * + * For embedded systems (stand-alone) we prefer to use a + * non-existing CP1 instruction. This prevents us from emulating + * branches, but gives us a cleaner interface to the exception + * handler (single entry point). + */ + + /* Ensure that the two instructions are in the same cache line */ + fr = (struct emuframe __user *) + ((regs->regs[29] - sizeof(struct emuframe)) & ~0x7); + + /* Verify that the stack pointer is not competely insane */ + if (unlikely(!access_ok(VERIFY_WRITE, fr, sizeof(struct emuframe)))) + return SIGBUS; + + if (get_isa16_mode(regs->cp0_epc)) { + err = __put_user(ir >> 16, (u16 __user *)(&fr->emul)); + err |= __put_user(ir & 0xffff, (u16 __user *)((long)(&fr->emul) + 2)); + err |= __put_user(BREAK_MATH >> 16, (u16 __user *)(&fr->badinst)); + err |= __put_user(BREAK_MATH & 0xffff, (u16 __user *)((long)(&fr->badinst) + 2)); + } else { + err = __put_user(ir, &fr->emul); + err |= __put_user((mips_instruction)BREAK_MATH, &fr->badinst); + } + + err |= __put_user((mips_instruction)BD_COOKIE, &fr->cookie); + err |= __put_user(cpc, &fr->epc); + + if (unlikely(err)) { + MIPS_FPU_EMU_INC_STATS(errors); + return SIGBUS; + } + + regs->cp0_epc = ((unsigned long) &fr->emul) | + get_isa16_mode(regs->cp0_epc); + + flush_cache_sigtramp((unsigned long)&fr->emul); + + return 0; +} + +int do_dsemulret(struct pt_regs *xcp) +{ + struct emuframe __user *fr; + unsigned long epc; + u32 insn, cookie; + int err = 0; + u16 instr[2]; + + fr = (struct emuframe __user *) + (msk_isa16_mode(xcp->cp0_epc) - sizeof(mips_instruction)); + + /* + * If we can't even access the area, something is very wrong, but we'll + * leave that to the default handling + */ + if (!access_ok(VERIFY_READ, fr, sizeof(struct emuframe))) + return 0; + + /* + * Do some sanity checking on the stackframe: + * + * - Is the instruction pointed to by the EPC an BREAK_MATH? + * - Is the following memory word the BD_COOKIE? + */ + if (get_isa16_mode(xcp->cp0_epc)) { + err = __get_user(instr[0], (u16 __user *)(&fr->badinst)); + err |= __get_user(instr[1], (u16 __user *)((long)(&fr->badinst) + 2)); + insn = (instr[0] << 16) | instr[1]; + } else { + err = __get_user(insn, &fr->badinst); + } + err |= __get_user(cookie, &fr->cookie); + + if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) { + MIPS_FPU_EMU_INC_STATS(errors); + return 0; + } + + /* + * At this point, we are satisfied that it's a BD emulation trap. Yes, + * a user might have deliberately put two malformed and useless + * instructions in a row in his program, in which case he's in for a + * nasty surprise - the next instruction will be treated as a + * continuation address! Alas, this seems to be the only way that we + * can handle signals, recursion, and longjmps() in the context of + * emulating the branch delay instruction. + */ + + pr_debug("dsemulret\n"); + + if (__get_user(epc, &fr->epc)) { /* Saved EPC */ + /* This is not a good situation to be in */ + force_sig(SIGBUS, current); + + return 0; + } + + /* Set EPC to return to post-branch instruction */ + xcp->cp0_epc = epc; + MIPS_FPU_EMU_INC_STATS(ds_emul); + return 1; +} diff --git a/kernel/arch/mips/math-emu/ieee754.c b/kernel/arch/mips/math-emu/ieee754.c new file mode 100644 index 000000000..8e97acbbe --- /dev/null +++ b/kernel/arch/mips/math-emu/ieee754.c @@ -0,0 +1,94 @@ +/* ieee754 floating point arithmetic + * single and double precision + * + * BUGS + * not much dp done + * doesn't generate IEEE754_INEXACT + * + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/compiler.h> + +#include "ieee754.h" +#include "ieee754sp.h" +#include "ieee754dp.h" + +/* + * Special constants + */ + +/* + * Older GCC requires the inner braces for initialization of union ieee754dp's + * anonymous struct member. Without an error will result. + */ +#define xPCNST(s, b, m, ebias) \ +{ \ + { \ + .sign = (s), \ + .bexp = (b) + ebias, \ + .mant = (m) \ + } \ +} + +#define DPCNST(s, b, m) \ + xPCNST(s, b, m, DP_EBIAS) + +const union ieee754dp __ieee754dp_spcvals[] = { + DPCNST(0, DP_EMIN - 1, 0x0000000000000ULL), /* + zero */ + DPCNST(1, DP_EMIN - 1, 0x0000000000000ULL), /* - zero */ + DPCNST(0, 0, 0x0000000000000ULL), /* + 1.0 */ + DPCNST(1, 0, 0x0000000000000ULL), /* - 1.0 */ + DPCNST(0, 3, 0x4000000000000ULL), /* + 10.0 */ + DPCNST(1, 3, 0x4000000000000ULL), /* - 10.0 */ + DPCNST(0, DP_EMAX + 1, 0x0000000000000ULL), /* + infinity */ + DPCNST(1, DP_EMAX + 1, 0x0000000000000ULL), /* - infinity */ + DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL), /* + indef quiet Nan */ + DPCNST(0, DP_EMAX, 0xFFFFFFFFFFFFFULL), /* + max */ + DPCNST(1, DP_EMAX, 0xFFFFFFFFFFFFFULL), /* - max */ + DPCNST(0, DP_EMIN, 0x0000000000000ULL), /* + min normal */ + DPCNST(1, DP_EMIN, 0x0000000000000ULL), /* - min normal */ + DPCNST(0, DP_EMIN - 1, 0x0000000000001ULL), /* + min denormal */ + DPCNST(1, DP_EMIN - 1, 0x0000000000001ULL), /* - min denormal */ + DPCNST(0, 31, 0x0000000000000ULL), /* + 1.0e31 */ + DPCNST(0, 63, 0x0000000000000ULL), /* + 1.0e63 */ +}; + +#define SPCNST(s, b, m) \ + xPCNST(s, b, m, SP_EBIAS) + +const union ieee754sp __ieee754sp_spcvals[] = { + SPCNST(0, SP_EMIN - 1, 0x000000), /* + zero */ + SPCNST(1, SP_EMIN - 1, 0x000000), /* - zero */ + SPCNST(0, 0, 0x000000), /* + 1.0 */ + SPCNST(1, 0, 0x000000), /* - 1.0 */ + SPCNST(0, 3, 0x200000), /* + 10.0 */ + SPCNST(1, 3, 0x200000), /* - 10.0 */ + SPCNST(0, SP_EMAX + 1, 0x000000), /* + infinity */ + SPCNST(1, SP_EMAX + 1, 0x000000), /* - infinity */ + SPCNST(0, SP_EMAX + 1, 0x3FFFFF), /* + indef quiet Nan */ + SPCNST(0, SP_EMAX, 0x7FFFFF), /* + max normal */ + SPCNST(1, SP_EMAX, 0x7FFFFF), /* - max normal */ + SPCNST(0, SP_EMIN, 0x000000), /* + min normal */ + SPCNST(1, SP_EMIN, 0x000000), /* - min normal */ + SPCNST(0, SP_EMIN - 1, 0x000001), /* + min denormal */ + SPCNST(1, SP_EMIN - 1, 0x000001), /* - min denormal */ + SPCNST(0, 31, 0x000000), /* + 1.0e31 */ + SPCNST(0, 63, 0x000000), /* + 1.0e63 */ +}; diff --git a/kernel/arch/mips/math-emu/ieee754.h b/kernel/arch/mips/math-emu/ieee754.h new file mode 100644 index 000000000..a5ca108ce --- /dev/null +++ b/kernel/arch/mips/math-emu/ieee754.h @@ -0,0 +1,269 @@ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Nov 7, 2000 + * Modification to allow integration with Linux kernel + * + * Kevin D. Kissell, kevink@mips.com and Carsten Langgard, carstenl@mips.com + * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. + */ +#ifndef __ARCH_MIPS_MATH_EMU_IEEE754_H +#define __ARCH_MIPS_MATH_EMU_IEEE754_H + +#include <linux/compiler.h> +#include <asm/byteorder.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <asm/bitfield.h> + +union ieee754dp { + struct { + __BITFIELD_FIELD(unsigned int sign:1, + __BITFIELD_FIELD(unsigned int bexp:11, + __BITFIELD_FIELD(u64 mant:52, + ;))) + }; + u64 bits; +}; + +union ieee754sp { + struct { + __BITFIELD_FIELD(unsigned sign:1, + __BITFIELD_FIELD(unsigned bexp:8, + __BITFIELD_FIELD(unsigned mant:23, + ;))) + }; + u32 bits; +}; + +/* + * single precision (often aka float) +*/ +int ieee754sp_class(union ieee754sp x); + +union ieee754sp ieee754sp_abs(union ieee754sp x); +union ieee754sp ieee754sp_neg(union ieee754sp x); + +union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y); +union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y); +union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y); +union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y); + +union ieee754sp ieee754sp_fint(int x); +union ieee754sp ieee754sp_flong(s64 x); +union ieee754sp ieee754sp_fdp(union ieee754dp x); + +int ieee754sp_tint(union ieee754sp x); +s64 ieee754sp_tlong(union ieee754sp x); + +int ieee754sp_cmp(union ieee754sp x, union ieee754sp y, int cop, int sig); + +union ieee754sp ieee754sp_sqrt(union ieee754sp x); + +/* + * double precision (often aka double) +*/ +int ieee754dp_class(union ieee754dp x); + +union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y); +union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y); +union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y); +union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y); + +union ieee754dp ieee754dp_abs(union ieee754dp x); +union ieee754dp ieee754dp_neg(union ieee754dp x); + +union ieee754dp ieee754dp_fint(int x); +union ieee754dp ieee754dp_flong(s64 x); +union ieee754dp ieee754dp_fsp(union ieee754sp x); + +int ieee754dp_tint(union ieee754dp x); +s64 ieee754dp_tlong(union ieee754dp x); + +int ieee754dp_cmp(union ieee754dp x, union ieee754dp y, int cop, int sig); + +union ieee754dp ieee754dp_sqrt(union ieee754dp x); + + + +/* 5 types of floating point number +*/ +enum { + IEEE754_CLASS_NORM = 0x00, + IEEE754_CLASS_ZERO = 0x01, + IEEE754_CLASS_DNORM = 0x02, + IEEE754_CLASS_INF = 0x03, + IEEE754_CLASS_SNAN = 0x04, + IEEE754_CLASS_QNAN = 0x05, +}; + +/* exception numbers */ +#define IEEE754_INEXACT 0x01 +#define IEEE754_UNDERFLOW 0x02 +#define IEEE754_OVERFLOW 0x04 +#define IEEE754_ZERO_DIVIDE 0x08 +#define IEEE754_INVALID_OPERATION 0x10 + +/* cmp operators +*/ +#define IEEE754_CLT 0x01 +#define IEEE754_CEQ 0x02 +#define IEEE754_CGT 0x04 +#define IEEE754_CUN 0x08 + +/* + * The control status register + */ +struct _ieee754_csr { + __BITFIELD_FIELD(unsigned fcc:7, /* condition[7:1] */ + __BITFIELD_FIELD(unsigned nod:1, /* set 1 for no denormals */ + __BITFIELD_FIELD(unsigned c:1, /* condition[0] */ + __BITFIELD_FIELD(unsigned pad0:3, + __BITFIELD_FIELD(unsigned abs2008:1, /* IEEE 754-2008 ABS/NEG.fmt */ + __BITFIELD_FIELD(unsigned nan2008:1, /* IEEE 754-2008 NaN mode */ + __BITFIELD_FIELD(unsigned cx:6, /* exceptions this operation */ + __BITFIELD_FIELD(unsigned mx:5, /* exception enable mask */ + __BITFIELD_FIELD(unsigned sx:5, /* exceptions total */ + __BITFIELD_FIELD(unsigned rm:2, /* current rounding mode */ + ;)))))))))) +}; +#define ieee754_csr (*(struct _ieee754_csr *)(¤t->thread.fpu.fcr31)) + +static inline unsigned ieee754_getrm(void) +{ + return (ieee754_csr.rm); +} +static inline unsigned ieee754_setrm(unsigned rm) +{ + return (ieee754_csr.rm = rm); +} + +/* + * get current exceptions + */ +static inline unsigned ieee754_getcx(void) +{ + return (ieee754_csr.cx); +} + +/* test for current exception condition + */ +static inline int ieee754_cxtest(unsigned n) +{ + return (ieee754_csr.cx & n); +} + +/* + * get sticky exceptions + */ +static inline unsigned ieee754_getsx(void) +{ + return (ieee754_csr.sx); +} + +/* clear sticky conditions +*/ +static inline unsigned ieee754_clrsx(void) +{ + return (ieee754_csr.sx = 0); +} + +/* test for sticky exception condition + */ +static inline int ieee754_sxtest(unsigned n) +{ + return (ieee754_csr.sx & n); +} + +/* debugging */ +union ieee754sp ieee754sp_dump(char *s, union ieee754sp x); +union ieee754dp ieee754dp_dump(char *s, union ieee754dp x); + +#define IEEE754_SPCVAL_PZERO 0 /* +0.0 */ +#define IEEE754_SPCVAL_NZERO 1 /* -0.0 */ +#define IEEE754_SPCVAL_PONE 2 /* +1.0 */ +#define IEEE754_SPCVAL_NONE 3 /* -1.0 */ +#define IEEE754_SPCVAL_PTEN 4 /* +10.0 */ +#define IEEE754_SPCVAL_NTEN 5 /* -10.0 */ +#define IEEE754_SPCVAL_PINFINITY 6 /* +inf */ +#define IEEE754_SPCVAL_NINFINITY 7 /* -inf */ +#define IEEE754_SPCVAL_INDEF 8 /* quiet NaN */ +#define IEEE754_SPCVAL_PMAX 9 /* +max norm */ +#define IEEE754_SPCVAL_NMAX 10 /* -max norm */ +#define IEEE754_SPCVAL_PMIN 11 /* +min norm */ +#define IEEE754_SPCVAL_NMIN 12 /* -min norm */ +#define IEEE754_SPCVAL_PMIND 13 /* +min denorm */ +#define IEEE754_SPCVAL_NMIND 14 /* -min denorm */ +#define IEEE754_SPCVAL_P1E31 15 /* + 1.0e31 */ +#define IEEE754_SPCVAL_P1E63 16 /* + 1.0e63 */ + +extern const union ieee754dp __ieee754dp_spcvals[]; +extern const union ieee754sp __ieee754sp_spcvals[]; +#define ieee754dp_spcvals ((const union ieee754dp *)__ieee754dp_spcvals) +#define ieee754sp_spcvals ((const union ieee754sp *)__ieee754sp_spcvals) + +/* + * Return infinity with given sign + */ +#define ieee754dp_inf(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PINFINITY+(sn)]) +#define ieee754dp_zero(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PZERO+(sn)]) +#define ieee754dp_one(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PONE+(sn)]) +#define ieee754dp_ten(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PTEN+(sn)]) +#define ieee754dp_indef() (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF]) +#define ieee754dp_max(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMAX+(sn)]) +#define ieee754dp_min(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMIN+(sn)]) +#define ieee754dp_mind(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMIND+(sn)]) +#define ieee754dp_1e31() (ieee754dp_spcvals[IEEE754_SPCVAL_P1E31]) +#define ieee754dp_1e63() (ieee754dp_spcvals[IEEE754_SPCVAL_P1E63]) + +#define ieee754sp_inf(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PINFINITY+(sn)]) +#define ieee754sp_zero(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PZERO+(sn)]) +#define ieee754sp_one(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PONE+(sn)]) +#define ieee754sp_ten(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PTEN+(sn)]) +#define ieee754sp_indef() (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF]) +#define ieee754sp_max(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMAX+(sn)]) +#define ieee754sp_min(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMIN+(sn)]) +#define ieee754sp_mind(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMIND+(sn)]) +#define ieee754sp_1e31() (ieee754sp_spcvals[IEEE754_SPCVAL_P1E31]) +#define ieee754sp_1e63() (ieee754sp_spcvals[IEEE754_SPCVAL_P1E63]) + +/* + * Indefinite integer value + */ +static inline int ieee754si_indef(void) +{ + return INT_MAX; +} + +static inline s64 ieee754di_indef(void) +{ + return S64_MAX; +} + +/* result types for xctx.rt */ +#define IEEE754_RT_SP 0 +#define IEEE754_RT_DP 1 +#define IEEE754_RT_XP 2 +#define IEEE754_RT_SI 3 +#define IEEE754_RT_DI 4 + +/* compat */ +#define ieee754dp_fix(x) ieee754dp_tint(x) +#define ieee754sp_fix(x) ieee754sp_tint(x) + +#endif /* __ARCH_MIPS_MATH_EMU_IEEE754_H */ diff --git a/kernel/arch/mips/math-emu/ieee754d.c b/kernel/arch/mips/math-emu/ieee754d.c new file mode 100644 index 000000000..a04e8a7e5 --- /dev/null +++ b/kernel/arch/mips/math-emu/ieee754d.c @@ -0,0 +1,111 @@ +/* + * Some debug functions + * + * MIPS floating point support + * + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Nov 7, 2000 + * Modified to build and operate in Linux kernel environment. + * + * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/printk.h> +#include "ieee754.h" +#include "ieee754sp.h" +#include "ieee754dp.h" + +union ieee754dp ieee754dp_dump(char *m, union ieee754dp x) +{ + int i; + + printk("%s", m); + printk("<%08x,%08x>\n", (unsigned) (x.bits >> 32), + (unsigned) x.bits); + printk("\t="); + switch (ieee754dp_class(x)) { + case IEEE754_CLASS_QNAN: + case IEEE754_CLASS_SNAN: + printk("Nan %c", DPSIGN(x) ? '-' : '+'); + for (i = DP_FBITS - 1; i >= 0; i--) + printk("%c", DPMANT(x) & DP_MBIT(i) ? '1' : '0'); + break; + case IEEE754_CLASS_INF: + printk("%cInfinity", DPSIGN(x) ? '-' : '+'); + break; + case IEEE754_CLASS_ZERO: + printk("%cZero", DPSIGN(x) ? '-' : '+'); + break; + case IEEE754_CLASS_DNORM: + printk("%c0.", DPSIGN(x) ? '-' : '+'); + for (i = DP_FBITS - 1; i >= 0; i--) + printk("%c", DPMANT(x) & DP_MBIT(i) ? '1' : '0'); + printk("e%d", DPBEXP(x) - DP_EBIAS); + break; + case IEEE754_CLASS_NORM: + printk("%c1.", DPSIGN(x) ? '-' : '+'); + for (i = DP_FBITS - 1; i >= 0; i--) + printk("%c", DPMANT(x) & DP_MBIT(i) ? '1' : '0'); + printk("e%d", DPBEXP(x) - DP_EBIAS); + break; + default: + printk("Illegal/Unknown IEEE754 value class"); + } + printk("\n"); + return x; +} + +union ieee754sp ieee754sp_dump(char *m, union ieee754sp x) +{ + int i; + + printk("%s=", m); + printk("<%08x>\n", (unsigned) x.bits); + printk("\t="); + switch (ieee754sp_class(x)) { + case IEEE754_CLASS_QNAN: + case IEEE754_CLASS_SNAN: + printk("Nan %c", SPSIGN(x) ? '-' : '+'); + for (i = SP_FBITS - 1; i >= 0; i--) + printk("%c", SPMANT(x) & SP_MBIT(i) ? '1' : '0'); + break; + case IEEE754_CLASS_INF: + printk("%cInfinity", SPSIGN(x) ? '-' : '+'); + break; + case IEEE754_CLASS_ZERO: + printk("%cZero", SPSIGN(x) ? '-' : '+'); + break; + case IEEE754_CLASS_DNORM: + printk("%c0.", SPSIGN(x) ? '-' : '+'); + for (i = SP_FBITS - 1; i >= 0; i--) + printk("%c", SPMANT(x) & SP_MBIT(i) ? '1' : '0'); + printk("e%d", SPBEXP(x) - SP_EBIAS); + break; + case IEEE754_CLASS_NORM: + printk("%c1.", SPSIGN(x) ? '-' : '+'); + for (i = SP_FBITS - 1; i >= 0; i--) + printk("%c", SPMANT(x) & SP_MBIT(i) ? '1' : '0'); + printk("e%d", SPBEXP(x) - SP_EBIAS); + break; + default: + printk("Illegal/Unknown IEEE754 value class"); + } + printk("\n"); + return x; +} diff --git a/kernel/arch/mips/math-emu/ieee754dp.c b/kernel/arch/mips/math-emu/ieee754dp.c new file mode 100644 index 000000000..522d843f2 --- /dev/null +++ b/kernel/arch/mips/math-emu/ieee754dp.c @@ -0,0 +1,198 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/compiler.h> + +#include "ieee754dp.h" + +int ieee754dp_class(union ieee754dp x) +{ + COMPXDP; + EXPLODEXDP; + return xc; +} + +static inline int ieee754dp_isnan(union ieee754dp x) +{ + return ieee754_class_nan(ieee754dp_class(x)); +} + +static inline int ieee754dp_issnan(union ieee754dp x) +{ + assert(ieee754dp_isnan(x)); + return (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1); +} + + +/* + * Raise the Invalid Operation IEEE 754 exception + * and convert the signaling NaN supplied to a quiet NaN. + */ +union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp r) +{ + assert(ieee754dp_issnan(r)); + + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); +} + +static u64 ieee754dp_get_rounding(int sn, u64 xm) +{ + /* inexact must round of 3 bits + */ + if (xm & (DP_MBIT(3) - 1)) { + switch (ieee754_csr.rm) { + case FPU_CSR_RZ: + break; + case FPU_CSR_RN: + xm += 0x3 + ((xm >> 3) & 1); + /* xm += (xm&0x8)?0x4:0x3 */ + break; + case FPU_CSR_RU: /* toward +Infinity */ + if (!sn) /* ?? */ + xm += 0x8; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if (sn) /* ?? */ + xm += 0x8; + break; + } + } + return xm; +} + + +/* generate a normal/denormal number with over,under handling + * sn is sign + * xe is an unbiased exponent + * xm is 3bit extended precision value. + */ +union ieee754dp ieee754dp_format(int sn, int xe, u64 xm) +{ + assert(xm); /* we don't gen exact zeros (probably should) */ + + assert((xm >> (DP_FBITS + 1 + 3)) == 0); /* no execess */ + assert(xm & (DP_HIDDEN_BIT << 3)); + + if (xe < DP_EMIN) { + /* strip lower bits */ + int es = DP_EMIN - xe; + + if (ieee754_csr.nod) { + ieee754_setcx(IEEE754_UNDERFLOW); + ieee754_setcx(IEEE754_INEXACT); + + switch(ieee754_csr.rm) { + case FPU_CSR_RN: + case FPU_CSR_RZ: + return ieee754dp_zero(sn); + case FPU_CSR_RU: /* toward +Infinity */ + if (sn == 0) + return ieee754dp_min(0); + else + return ieee754dp_zero(1); + case FPU_CSR_RD: /* toward -Infinity */ + if (sn == 0) + return ieee754dp_zero(0); + else + return ieee754dp_min(1); + } + } + + if (xe == DP_EMIN - 1 && + ieee754dp_get_rounding(sn, xm) >> (DP_FBITS + 1 + 3)) + { + /* Not tiny after rounding */ + ieee754_setcx(IEEE754_INEXACT); + xm = ieee754dp_get_rounding(sn, xm); + xm >>= 1; + /* Clear grs bits */ + xm &= ~(DP_MBIT(3) - 1); + xe++; + } + else { + /* sticky right shift es bits + */ + xm = XDPSRS(xm, es); + xe += es; + assert((xm & (DP_HIDDEN_BIT << 3)) == 0); + assert(xe == DP_EMIN); + } + } + if (xm & (DP_MBIT(3) - 1)) { + ieee754_setcx(IEEE754_INEXACT); + if ((xm & (DP_HIDDEN_BIT << 3)) == 0) { + ieee754_setcx(IEEE754_UNDERFLOW); + } + + /* inexact must round of 3 bits + */ + xm = ieee754dp_get_rounding(sn, xm); + /* adjust exponent for rounding add overflowing + */ + if (xm >> (DP_FBITS + 3 + 1)) { + /* add causes mantissa overflow */ + xm >>= 1; + xe++; + } + } + /* strip grs bits */ + xm >>= 3; + + assert((xm >> (DP_FBITS + 1)) == 0); /* no execess */ + assert(xe >= DP_EMIN); + + if (xe > DP_EMAX) { + ieee754_setcx(IEEE754_OVERFLOW); + ieee754_setcx(IEEE754_INEXACT); + /* -O can be table indexed by (rm,sn) */ + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + return ieee754dp_inf(sn); + case FPU_CSR_RZ: + return ieee754dp_max(sn); + case FPU_CSR_RU: /* toward +Infinity */ + if (sn == 0) + return ieee754dp_inf(0); + else + return ieee754dp_max(1); + case FPU_CSR_RD: /* toward -Infinity */ + if (sn == 0) + return ieee754dp_max(0); + else + return ieee754dp_inf(1); + } + } + /* gen norm/denorm/zero */ + + if ((xm & DP_HIDDEN_BIT) == 0) { + /* we underflow (tiny/zero) */ + assert(xe == DP_EMIN); + if (ieee754_csr.mx & IEEE754_UNDERFLOW) + ieee754_setcx(IEEE754_UNDERFLOW); + return builddp(sn, DP_EMIN - 1 + DP_EBIAS, xm); + } else { + assert((xm >> (DP_FBITS + 1)) == 0); /* no execess */ + assert(xm & DP_HIDDEN_BIT); + + return builddp(sn, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); + } +} diff --git a/kernel/arch/mips/math-emu/ieee754dp.h b/kernel/arch/mips/math-emu/ieee754dp.h new file mode 100644 index 000000000..e2babd98f --- /dev/null +++ b/kernel/arch/mips/math-emu/ieee754dp.h @@ -0,0 +1,81 @@ +/* + * IEEE754 floating point + * double precision internal header file + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/compiler.h> + +#include "ieee754int.h" + +#define assert(expr) ((void)0) + +#define DP_EBIAS 1023 +#define DP_EMIN (-1022) +#define DP_EMAX 1023 +#define DP_FBITS 52 +#define DP_MBITS 52 + +#define DP_MBIT(x) ((u64)1 << (x)) +#define DP_HIDDEN_BIT DP_MBIT(DP_FBITS) +#define DP_SIGN_BIT DP_MBIT(63) + +#define DPSIGN(dp) (dp.sign) +#define DPBEXP(dp) (dp.bexp) +#define DPMANT(dp) (dp.mant) + +static inline int ieee754dp_finite(union ieee754dp x) +{ + return DPBEXP(x) != DP_EMAX + 1 + DP_EBIAS; +} + +/* 3bit extended double precision sticky right shift */ +#define XDPSRS(v,rs) \ + ((rs > (DP_FBITS+3))?1:((v) >> (rs)) | ((v) << (64-(rs)) != 0)) + +#define XDPSRSX1() \ + (xe++, (xm = (xm >> 1) | (xm & 1))) + +#define XDPSRS1(v) \ + (((v) >> 1) | ((v) & 1)) + +/* convert denormal to normalized with extended exponent */ +#define DPDNORMx(m,e) \ + while ((m >> DP_FBITS) == 0) { m <<= 1; e--; } +#define DPDNORMX DPDNORMx(xm, xe) +#define DPDNORMY DPDNORMx(ym, ye) + +static inline union ieee754dp builddp(int s, int bx, u64 m) +{ + union ieee754dp r; + + assert((s) == 0 || (s) == 1); + assert((bx) >= DP_EMIN - 1 + DP_EBIAS + && (bx) <= DP_EMAX + 1 + DP_EBIAS); + assert(((m) >> DP_FBITS) == 0); + + r.sign = s; + r.bexp = bx; + r.mant = m; + + return r; +} + +extern union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp); +extern union ieee754dp ieee754dp_format(int, int, u64); diff --git a/kernel/arch/mips/math-emu/ieee754int.h b/kernel/arch/mips/math-emu/ieee754int.h new file mode 100644 index 000000000..05389d5e3 --- /dev/null +++ b/kernel/arch/mips/math-emu/ieee754int.h @@ -0,0 +1,146 @@ +/* + * IEEE754 floating point + * common internal header file + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef __IEEE754INT_H +#define __IEEE754INT_H + +#include "ieee754.h" + +#define CLPAIR(x, y) ((x)*6+(y)) + +static inline void ieee754_clearcx(void) +{ + ieee754_csr.cx = 0; +} + +static inline void ieee754_setcx(const unsigned int flags) +{ + ieee754_csr.cx |= flags; + ieee754_csr.sx |= flags; +} + +static inline int ieee754_setandtestcx(const unsigned int x) +{ + ieee754_setcx(x); + + return ieee754_csr.mx & x; +} + +static inline int ieee754_class_nan(int xc) +{ + return xc >= IEEE754_CLASS_SNAN; +} + +#define COMPXSP \ + unsigned xm; int xe; int xs __maybe_unused; int xc + +#define COMPYSP \ + unsigned ym; int ye; int ys; int yc + +#define EXPLODESP(v, vc, vs, ve, vm) \ +{ \ + vs = SPSIGN(v); \ + ve = SPBEXP(v); \ + vm = SPMANT(v); \ + if (ve == SP_EMAX+1+SP_EBIAS) { \ + if (vm == 0) \ + vc = IEEE754_CLASS_INF; \ + else if (vm & SP_MBIT(SP_FBITS-1)) \ + vc = IEEE754_CLASS_SNAN; \ + else \ + vc = IEEE754_CLASS_QNAN; \ + } else if (ve == SP_EMIN-1+SP_EBIAS) { \ + if (vm) { \ + ve = SP_EMIN; \ + vc = IEEE754_CLASS_DNORM; \ + } else \ + vc = IEEE754_CLASS_ZERO; \ + } else { \ + ve -= SP_EBIAS; \ + vm |= SP_HIDDEN_BIT; \ + vc = IEEE754_CLASS_NORM; \ + } \ +} +#define EXPLODEXSP EXPLODESP(x, xc, xs, xe, xm) +#define EXPLODEYSP EXPLODESP(y, yc, ys, ye, ym) + + +#define COMPXDP \ + u64 xm; int xe; int xs __maybe_unused; int xc + +#define COMPYDP \ + u64 ym; int ye; int ys; int yc + +#define EXPLODEDP(v, vc, vs, ve, vm) \ +{ \ + vm = DPMANT(v); \ + vs = DPSIGN(v); \ + ve = DPBEXP(v); \ + if (ve == DP_EMAX+1+DP_EBIAS) { \ + if (vm == 0) \ + vc = IEEE754_CLASS_INF; \ + else if (vm & DP_MBIT(DP_FBITS-1)) \ + vc = IEEE754_CLASS_SNAN; \ + else \ + vc = IEEE754_CLASS_QNAN; \ + } else if (ve == DP_EMIN-1+DP_EBIAS) { \ + if (vm) { \ + ve = DP_EMIN; \ + vc = IEEE754_CLASS_DNORM; \ + } else \ + vc = IEEE754_CLASS_ZERO; \ + } else { \ + ve -= DP_EBIAS; \ + vm |= DP_HIDDEN_BIT; \ + vc = IEEE754_CLASS_NORM; \ + } \ +} +#define EXPLODEXDP EXPLODEDP(x, xc, xs, xe, xm) +#define EXPLODEYDP EXPLODEDP(y, yc, ys, ye, ym) + +#define FLUSHDP(v, vc, vs, ve, vm) \ + if (vc==IEEE754_CLASS_DNORM) { \ + if (ieee754_csr.nod) { \ + ieee754_setcx(IEEE754_INEXACT); \ + vc = IEEE754_CLASS_ZERO; \ + ve = DP_EMIN-1+DP_EBIAS; \ + vm = 0; \ + v = ieee754dp_zero(vs); \ + } \ + } + +#define FLUSHSP(v, vc, vs, ve, vm) \ + if (vc==IEEE754_CLASS_DNORM) { \ + if (ieee754_csr.nod) { \ + ieee754_setcx(IEEE754_INEXACT); \ + vc = IEEE754_CLASS_ZERO; \ + ve = SP_EMIN-1+SP_EBIAS; \ + vm = 0; \ + v = ieee754sp_zero(vs); \ + } \ + } + +#define FLUSHXDP FLUSHDP(x, xc, xs, xe, xm) +#define FLUSHYDP FLUSHDP(y, yc, ys, ye, ym) +#define FLUSHXSP FLUSHSP(x, xc, xs, xe, xm) +#define FLUSHYSP FLUSHSP(y, yc, ys, ye, ym) + +#endif /* __IEEE754INT_H */ diff --git a/kernel/arch/mips/math-emu/ieee754sp.c b/kernel/arch/mips/math-emu/ieee754sp.c new file mode 100644 index 000000000..ca8e35e33 --- /dev/null +++ b/kernel/arch/mips/math-emu/ieee754sp.c @@ -0,0 +1,196 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/compiler.h> + +#include "ieee754sp.h" + +int ieee754sp_class(union ieee754sp x) +{ + COMPXSP; + EXPLODEXSP; + return xc; +} + +static inline int ieee754sp_isnan(union ieee754sp x) +{ + return ieee754_class_nan(ieee754sp_class(x)); +} + +static inline int ieee754sp_issnan(union ieee754sp x) +{ + assert(ieee754sp_isnan(x)); + return SPMANT(x) & SP_MBIT(SP_FBITS - 1); +} + + +/* + * Raise the Invalid Operation IEEE 754 exception + * and convert the signaling NaN supplied to a quiet NaN. + */ +union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp r) +{ + assert(ieee754sp_issnan(r)); + + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); +} + +static unsigned ieee754sp_get_rounding(int sn, unsigned xm) +{ + /* inexact must round of 3 bits + */ + if (xm & (SP_MBIT(3) - 1)) { + switch (ieee754_csr.rm) { + case FPU_CSR_RZ: + break; + case FPU_CSR_RN: + xm += 0x3 + ((xm >> 3) & 1); + /* xm += (xm&0x8)?0x4:0x3 */ + break; + case FPU_CSR_RU: /* toward +Infinity */ + if (!sn) /* ?? */ + xm += 0x8; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if (sn) /* ?? */ + xm += 0x8; + break; + } + } + return xm; +} + + +/* generate a normal/denormal number with over,under handling + * sn is sign + * xe is an unbiased exponent + * xm is 3bit extended precision value. + */ +union ieee754sp ieee754sp_format(int sn, int xe, unsigned xm) +{ + assert(xm); /* we don't gen exact zeros (probably should) */ + + assert((xm >> (SP_FBITS + 1 + 3)) == 0); /* no execess */ + assert(xm & (SP_HIDDEN_BIT << 3)); + + if (xe < SP_EMIN) { + /* strip lower bits */ + int es = SP_EMIN - xe; + + if (ieee754_csr.nod) { + ieee754_setcx(IEEE754_UNDERFLOW); + ieee754_setcx(IEEE754_INEXACT); + + switch(ieee754_csr.rm) { + case FPU_CSR_RN: + case FPU_CSR_RZ: + return ieee754sp_zero(sn); + case FPU_CSR_RU: /* toward +Infinity */ + if (sn == 0) + return ieee754sp_min(0); + else + return ieee754sp_zero(1); + case FPU_CSR_RD: /* toward -Infinity */ + if (sn == 0) + return ieee754sp_zero(0); + else + return ieee754sp_min(1); + } + } + + if (xe == SP_EMIN - 1 && + ieee754sp_get_rounding(sn, xm) >> (SP_FBITS + 1 + 3)) + { + /* Not tiny after rounding */ + ieee754_setcx(IEEE754_INEXACT); + xm = ieee754sp_get_rounding(sn, xm); + xm >>= 1; + /* Clear grs bits */ + xm &= ~(SP_MBIT(3) - 1); + xe++; + } else { + /* sticky right shift es bits + */ + SPXSRSXn(es); + assert((xm & (SP_HIDDEN_BIT << 3)) == 0); + assert(xe == SP_EMIN); + } + } + if (xm & (SP_MBIT(3) - 1)) { + ieee754_setcx(IEEE754_INEXACT); + if ((xm & (SP_HIDDEN_BIT << 3)) == 0) { + ieee754_setcx(IEEE754_UNDERFLOW); + } + + /* inexact must round of 3 bits + */ + xm = ieee754sp_get_rounding(sn, xm); + /* adjust exponent for rounding add overflowing + */ + if (xm >> (SP_FBITS + 1 + 3)) { + /* add causes mantissa overflow */ + xm >>= 1; + xe++; + } + } + /* strip grs bits */ + xm >>= 3; + + assert((xm >> (SP_FBITS + 1)) == 0); /* no execess */ + assert(xe >= SP_EMIN); + + if (xe > SP_EMAX) { + ieee754_setcx(IEEE754_OVERFLOW); + ieee754_setcx(IEEE754_INEXACT); + /* -O can be table indexed by (rm,sn) */ + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + return ieee754sp_inf(sn); + case FPU_CSR_RZ: + return ieee754sp_max(sn); + case FPU_CSR_RU: /* toward +Infinity */ + if (sn == 0) + return ieee754sp_inf(0); + else + return ieee754sp_max(1); + case FPU_CSR_RD: /* toward -Infinity */ + if (sn == 0) + return ieee754sp_max(0); + else + return ieee754sp_inf(1); + } + } + /* gen norm/denorm/zero */ + + if ((xm & SP_HIDDEN_BIT) == 0) { + /* we underflow (tiny/zero) */ + assert(xe == SP_EMIN); + if (ieee754_csr.mx & IEEE754_UNDERFLOW) + ieee754_setcx(IEEE754_UNDERFLOW); + return buildsp(sn, SP_EMIN - 1 + SP_EBIAS, xm); + } else { + assert((xm >> (SP_FBITS + 1)) == 0); /* no execess */ + assert(xm & SP_HIDDEN_BIT); + + return buildsp(sn, xe + SP_EBIAS, xm & ~SP_HIDDEN_BIT); + } +} diff --git a/kernel/arch/mips/math-emu/ieee754sp.h b/kernel/arch/mips/math-emu/ieee754sp.h new file mode 100644 index 000000000..374a3f00a --- /dev/null +++ b/kernel/arch/mips/math-emu/ieee754sp.h @@ -0,0 +1,86 @@ +/* + * IEEE754 floating point + * double precision internal header file + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/compiler.h> + +#include "ieee754int.h" + +#define assert(expr) ((void)0) + +#define SP_EBIAS 127 +#define SP_EMIN (-126) +#define SP_EMAX 127 +#define SP_FBITS 23 +#define SP_MBITS 23 + +#define SP_MBIT(x) ((u32)1 << (x)) +#define SP_HIDDEN_BIT SP_MBIT(SP_FBITS) +#define SP_SIGN_BIT SP_MBIT(31) + +#define SPSIGN(sp) (sp.sign) +#define SPBEXP(sp) (sp.bexp) +#define SPMANT(sp) (sp.mant) + +static inline int ieee754sp_finite(union ieee754sp x) +{ + return SPBEXP(x) != SP_EMAX + 1 + SP_EBIAS; +} + +/* 3bit extended single precision sticky right shift */ +#define SPXSRSXn(rs) \ + (xe += rs, \ + xm = (rs > (SP_FBITS+3))?1:((xm) >> (rs)) | ((xm) << (32-(rs)) != 0)) + +#define SPXSRSX1() \ + (xe++, (xm = (xm >> 1) | (xm & 1))) + +#define SPXSRSYn(rs) \ + (ye+=rs, \ + ym = (rs > (SP_FBITS+3))?1:((ym) >> (rs)) | ((ym) << (32-(rs)) != 0)) + +#define SPXSRSY1() \ + (ye++, (ym = (ym >> 1) | (ym & 1))) + +/* convert denormal to normalized with extended exponent */ +#define SPDNORMx(m,e) \ + while ((m >> SP_FBITS) == 0) { m <<= 1; e--; } +#define SPDNORMX SPDNORMx(xm, xe) +#define SPDNORMY SPDNORMx(ym, ye) + +static inline union ieee754sp buildsp(int s, int bx, unsigned m) +{ + union ieee754sp r; + + assert((s) == 0 || (s) == 1); + assert((bx) >= SP_EMIN - 1 + SP_EBIAS + && (bx) <= SP_EMAX + 1 + SP_EBIAS); + assert(((m) >> SP_FBITS) == 0); + + r.sign = s; + r.bexp = bx; + r.mant = m; + + return r; +} + +extern union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp); +extern union ieee754sp ieee754sp_format(int, int, unsigned); diff --git a/kernel/arch/mips/math-emu/me-debugfs.c b/kernel/arch/mips/math-emu/me-debugfs.c new file mode 100644 index 000000000..f308e0f05 --- /dev/null +++ b/kernel/arch/mips/math-emu/me-debugfs.c @@ -0,0 +1,68 @@ +#include <linux/cpumask.h> +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/types.h> +#include <asm/fpu_emulator.h> +#include <asm/local.h> + +DEFINE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); + +static int fpuemu_stat_get(void *data, u64 *val) +{ + int cpu; + unsigned long sum = 0; + + for_each_online_cpu(cpu) { + struct mips_fpu_emulator_stats *ps; + local_t *pv; + + ps = &per_cpu(fpuemustats, cpu); + pv = (void *)ps + (unsigned long)data; + sum += local_read(pv); + } + *val = sum; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n"); + +extern struct dentry *mips_debugfs_dir; +static int __init debugfs_fpuemu(void) +{ + struct dentry *d, *dir; + + if (!mips_debugfs_dir) + return -ENODEV; + dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir); + if (!dir) + return -ENOMEM; + +#define FPU_EMU_STAT_OFFSET(m) \ + offsetof(struct mips_fpu_emulator_stats, m) + +#define FPU_STAT_CREATE(m) \ +do { \ + d = debugfs_create_file(#m , S_IRUGO, dir, \ + (void *)FPU_EMU_STAT_OFFSET(m), \ + &fops_fpuemu_stat); \ + if (!d) \ + return -ENOMEM; \ +} while (0) + + FPU_STAT_CREATE(emulated); + FPU_STAT_CREATE(loads); + FPU_STAT_CREATE(stores); + FPU_STAT_CREATE(cp1ops); + FPU_STAT_CREATE(cp1xops); + FPU_STAT_CREATE(errors); + FPU_STAT_CREATE(ieee754_inexact); + FPU_STAT_CREATE(ieee754_underflow); + FPU_STAT_CREATE(ieee754_overflow); + FPU_STAT_CREATE(ieee754_zerodiv); + FPU_STAT_CREATE(ieee754_invalidop); + FPU_STAT_CREATE(ds_emul); + + return 0; +} +__initcall(debugfs_fpuemu); diff --git a/kernel/arch/mips/math-emu/sp_add.c b/kernel/arch/mips/math-emu/sp_add.c new file mode 100644 index 000000000..f1c87b07d --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_add.c @@ -0,0 +1,176 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y) +{ + int s; + + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + ieee754_clearcx(); + + FLUSHXSP; + FLUSHYSP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if (xs == ys) + return x; + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + return y; + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return x; + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + if (xs == ys) + return x; + else + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return y; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + + /* FALL THROUGH */ + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + /* + * Provide guard, round and stick bit space. + */ + xm <<= 3; + ym <<= 3; + + if (xe > ye) { + /* + * Have to shift y fraction right to align. + */ + s = xe - ye; + SPXSRSYn(s); + } else if (ye > xe) { + /* + * Have to shift x fraction right to align. + */ + s = ye - xe; + SPXSRSXn(s); + } + assert(xe == ye); + assert(xe <= SP_EMAX); + + if (xs == ys) { + /* + * Generate 28 bit result of adding two 27 bit numbers + * leaving result in xm, xs and xe. + */ + xm = xm + ym; + + if (xm >> (SP_FBITS + 1 + 3)) { /* carry out */ + SPXSRSX1(); + } + } else { + if (xm >= ym) { + xm = xm - ym; + } else { + xm = ym - xm; + xs = ys; + } + if (xm == 0) + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + + /* + * Normalize in extended single precision + */ + while ((xm >> (SP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + + return ieee754sp_format(xs, xe, xm); +} diff --git a/kernel/arch/mips/math-emu/sp_cmp.c b/kernel/arch/mips/math-emu/sp_cmp.c new file mode 100644 index 000000000..67b82f1e2 --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_cmp.c @@ -0,0 +1,59 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" + +int ieee754sp_cmp(union ieee754sp x, union ieee754sp y, int cmp, int sig) +{ + int vx; + int vy; + + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + FLUSHXSP; + FLUSHYSP; + ieee754_clearcx(); /* Even clear inexact flag here */ + + if (ieee754_class_nan(xc) || ieee754_class_nan(yc)) { + if (sig || + xc == IEEE754_CLASS_SNAN || yc == IEEE754_CLASS_SNAN) + ieee754_setcx(IEEE754_INVALID_OPERATION); + return (cmp & IEEE754_CUN) != 0; + } else { + vx = x.bits; + vy = y.bits; + + if (vx < 0) + vx = -vx ^ SP_SIGN_BIT; + if (vy < 0) + vy = -vy ^ SP_SIGN_BIT; + + if (vx < vy) + return (cmp & IEEE754_CLT) != 0; + else if (vx == vy) + return (cmp & IEEE754_CEQ) != 0; + else + return (cmp & IEEE754_CGT) != 0; + } +} diff --git a/kernel/arch/mips/math-emu/sp_div.c b/kernel/arch/mips/math-emu/sp_div.c new file mode 100644 index 000000000..27f6db3a0 --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_div.c @@ -0,0 +1,154 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y) +{ + unsigned rm; + int re; + unsigned bm; + + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + ieee754_clearcx(); + + FLUSHXSP; + FLUSHYSP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + return ieee754sp_zero(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return ieee754sp_inf(xs ^ ys); + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + ieee754_setcx(IEEE754_ZERO_DIVIDE); + return ieee754sp_inf(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + return ieee754sp_zero(xs == ys ? 0 : 1); + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + /* provide rounding space */ + xm <<= 3; + ym <<= 3; + + /* now the dirty work */ + + rm = 0; + re = xe - ye; + + for (bm = SP_MBIT(SP_FBITS + 2); bm; bm >>= 1) { + if (xm >= ym) { + xm -= ym; + rm |= bm; + if (xm == 0) + break; + } + xm <<= 1; + } + + rm <<= 1; + if (xm) + rm |= 1; /* have remainder, set sticky */ + + assert(rm); + + /* normalise rm to rounding precision ? + */ + while ((rm >> (SP_FBITS + 3)) == 0) { + rm <<= 1; + re--; + } + + return ieee754sp_format(xs == ys ? 0 : 1, re, rm); +} diff --git a/kernel/arch/mips/math-emu/sp_fdp.c b/kernel/arch/mips/math-emu/sp_fdp.c new file mode 100644 index 000000000..379714889 --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_fdp.c @@ -0,0 +1,82 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" +#include "ieee754dp.h" + +static inline union ieee754sp ieee754sp_nan_fdp(int xs, u64 xm) +{ + return buildsp(xs, SP_EMAX + 1 + SP_EBIAS, + xm >> (DP_FBITS - SP_FBITS)); +} + +union ieee754sp ieee754sp_fdp(union ieee754dp x) +{ + union ieee754sp y; + u32 rm; + + COMPXDP; + COMPYSP; + + EXPLODEXDP; + + ieee754_clearcx(); + + FLUSHXDP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + return ieee754sp_nanxcpt(ieee754sp_nan_fdp(xs, xm)); + + case IEEE754_CLASS_QNAN: + y = ieee754sp_nan_fdp(xs, xm); + EXPLODEYSP; + if (!ieee754_class_nan(yc)) + y = ieee754sp_indef(); + return y; + + case IEEE754_CLASS_INF: + return ieee754sp_inf(xs); + + case IEEE754_CLASS_ZERO: + return ieee754sp_zero(xs); + + case IEEE754_CLASS_DNORM: + /* can't possibly be sp representable */ + ieee754_setcx(IEEE754_UNDERFLOW); + ieee754_setcx(IEEE754_INEXACT); + if ((ieee754_csr.rm == FPU_CSR_RU && !xs) || + (ieee754_csr.rm == FPU_CSR_RD && xs)) + return ieee754sp_mind(xs); + return ieee754sp_zero(xs); + + case IEEE754_CLASS_NORM: + break; + } + + /* + * Convert from DP_FBITS to SP_FBITS+3 with sticky right shift. + */ + rm = (xm >> (DP_FBITS - (SP_FBITS + 3))) | + ((xm << (64 - (DP_FBITS - (SP_FBITS + 3)))) != 0); + + return ieee754sp_format(xs, xe, rm); +} diff --git a/kernel/arch/mips/math-emu/sp_fint.c b/kernel/arch/mips/math-emu/sp_fint.c new file mode 100644 index 000000000..d5d8495b2 --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_fint.c @@ -0,0 +1,65 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_fint(int x) +{ + unsigned xm; + int xe; + int xs; + + ieee754_clearcx(); + + if (x == 0) + return ieee754sp_zero(0); + if (x == 1 || x == -1) + return ieee754sp_one(x < 0); + if (x == 10 || x == -10) + return ieee754sp_ten(x < 0); + + xs = (x < 0); + if (xs) { + if (x == (1 << 31)) + xm = ((unsigned) 1 << 31); /* max neg can't be safely negated */ + else + xm = -x; + } else { + xm = x; + } + xe = SP_FBITS + 3; + + if (xm >> (SP_FBITS + 1 + 3)) { + /* shunt out overflow bits + */ + while (xm >> (SP_FBITS + 1 + 3)) { + SPXSRSX1(); + } + } else { + /* normalize in grs extended single precision + */ + while ((xm >> (SP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + return ieee754sp_format(xs, xe, xm); +} diff --git a/kernel/arch/mips/math-emu/sp_flong.c b/kernel/arch/mips/math-emu/sp_flong.c new file mode 100644 index 000000000..012e30ce7 --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_flong.c @@ -0,0 +1,64 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_flong(s64 x) +{ + u64 xm; /* <--- need 64-bit mantissa temp */ + int xe; + int xs; + + ieee754_clearcx(); + + if (x == 0) + return ieee754sp_zero(0); + if (x == 1 || x == -1) + return ieee754sp_one(x < 0); + if (x == 10 || x == -10) + return ieee754sp_ten(x < 0); + + xs = (x < 0); + if (xs) { + if (x == (1ULL << 63)) + xm = (1ULL << 63); /* max neg can't be safely negated */ + else + xm = -x; + } else { + xm = x; + } + xe = SP_FBITS + 3; + + if (xm >> (SP_FBITS + 1 + 3)) { + /* shunt out overflow bits + */ + while (xm >> (SP_FBITS + 1 + 3)) { + SPXSRSX1(); + } + } else { + /* normalize in grs extended single precision */ + while ((xm >> (SP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + return ieee754sp_format(xs, xe, xm); +} diff --git a/kernel/arch/mips/math-emu/sp_mul.c b/kernel/arch/mips/math-emu/sp_mul.c new file mode 100644 index 000000000..d910c43a6 --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_mul.c @@ -0,0 +1,166 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y) +{ + int re; + int rs; + unsigned rm; + unsigned short lxm; + unsigned short hxm; + unsigned short lym; + unsigned short hym; + unsigned lrm; + unsigned hrm; + unsigned t; + unsigned at; + + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + ieee754_clearcx(); + + FLUSHXSP; + FLUSHYSP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754sp_inf(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return ieee754sp_zero(xs ^ ys); + + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + /* rm = xm * ym, re = xe+ye basically */ + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + re = xe + ye; + rs = xs ^ ys; + + /* shunt to top of word */ + xm <<= 32 - (SP_FBITS + 1); + ym <<= 32 - (SP_FBITS + 1); + + /* + * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. + */ + lxm = xm & 0xffff; + hxm = xm >> 16; + lym = ym & 0xffff; + hym = ym >> 16; + + lrm = lxm * lym; /* 16 * 16 => 32 */ + hrm = hxm * hym; /* 16 * 16 => 32 */ + + t = lxm * hym; /* 16 * 16 => 32 */ + at = lrm + (t << 16); + hrm += at < lrm; + lrm = at; + hrm = hrm + (t >> 16); + + t = hxm * lym; /* 16 * 16 => 32 */ + at = lrm + (t << 16); + hrm += at < lrm; + lrm = at; + hrm = hrm + (t >> 16); + + rm = hrm | (lrm != 0); + + /* + * Sticky shift down to normal rounding precision. + */ + if ((int) rm < 0) { + rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | + ((rm << (SP_FBITS + 1 + 3)) != 0); + re++; + } else { + rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | + ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); + } + assert(rm & (SP_HIDDEN_BIT << 3)); + + return ieee754sp_format(rs, re, rm); +} diff --git a/kernel/arch/mips/math-emu/sp_simple.c b/kernel/arch/mips/math-emu/sp_simple.c new file mode 100644 index 000000000..c50e9451f --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_simple.c @@ -0,0 +1,49 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_neg(union ieee754sp x) +{ + unsigned int oldrm; + union ieee754sp y; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + y = ieee754sp_sub(ieee754sp_zero(0), x); + ieee754_csr.rm = oldrm; + return y; +} + +union ieee754sp ieee754sp_abs(union ieee754sp x) +{ + unsigned int oldrm; + union ieee754sp y; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + if (SPSIGN(x)) + y = ieee754sp_sub(ieee754sp_zero(0), x); + else + y = ieee754sp_add(ieee754sp_zero(0), x); + ieee754_csr.rm = oldrm; + return y; +} diff --git a/kernel/arch/mips/math-emu/sp_sqrt.c b/kernel/arch/mips/math-emu/sp_sqrt.c new file mode 100644 index 000000000..67059c33a --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_sqrt.c @@ -0,0 +1,114 @@ +/* IEEE754 floating point arithmetic + * single precision square root + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_sqrt(union ieee754sp x) +{ + int ix, s, q, m, t, i; + unsigned int r; + COMPXSP; + + /* take care of Inf and NaN */ + + EXPLODEXSP; + ieee754_clearcx(); + FLUSHXSP; + + /* x == INF or NAN? */ + switch (xc) { + case IEEE754_CLASS_SNAN: + return ieee754sp_nanxcpt(x); + + case IEEE754_CLASS_QNAN: + /* sqrt(Nan) = Nan */ + return x; + + case IEEE754_CLASS_ZERO: + /* sqrt(0) = 0 */ + return x; + + case IEEE754_CLASS_INF: + if (xs) { + /* sqrt(-Inf) = Nan */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + } + /* sqrt(+Inf) = Inf */ + return x; + + case IEEE754_CLASS_DNORM: + case IEEE754_CLASS_NORM: + if (xs) { + /* sqrt(-x) = Nan */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + } + break; + } + + ix = x.bits; + + /* normalize x */ + m = (ix >> 23); + if (m == 0) { /* subnormal x */ + for (i = 0; (ix & 0x00800000) == 0; i++) + ix <<= 1; + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if (m & 1) /* odd m, double x to make it even */ + ix += ix; + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + q = s = 0; /* q = sqrt(x) */ + r = 0x01000000; /* r = moving bit from right to left */ + + while (r != 0) { + t = s + r; + if (t <= ix) { + s = t + r; + ix -= t; + q += r; + } + ix += ix; + r >>= 1; + } + + if (ix != 0) { + ieee754_setcx(IEEE754_INEXACT); + switch (ieee754_csr.rm) { + case FPU_CSR_RU: + q += 2; + break; + case FPU_CSR_RN: + q += (q & 1); + break; + } + } + ix = (q >> 1) + 0x3f000000; + ix += (m << 23); + x.bits = ix; + return x; +} diff --git a/kernel/arch/mips/math-emu/sp_sub.c b/kernel/arch/mips/math-emu/sp_sub.c new file mode 100644 index 000000000..ec5f937a8 --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_sub.c @@ -0,0 +1,178 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y) +{ + int s; + + COMPXSP; + COMPYSP; + + EXPLODEXSP; + EXPLODEYSP; + + ieee754_clearcx(); + + FLUSHXSP; + FLUSHYSP; + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if (xs != ys) + return x; + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + return ieee754sp_inf(ys ^ 1); + + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + return x; + + /* + * Zero handling + */ + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + if (xs != ys) + return x; + else + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + return x; + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + /* quick fix up */ + SPSIGN(y) ^= 1; + return y; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + SPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + break; + } + /* flip sign of y and handle as add */ + ys ^= 1; + + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + + /* provide guard,round and stick bit space */ + xm <<= 3; + ym <<= 3; + + if (xe > ye) { + /* + * have to shift y fraction right to align + */ + s = xe - ye; + SPXSRSYn(s); + } else if (ye > xe) { + /* + * have to shift x fraction right to align + */ + s = ye - xe; + SPXSRSXn(s); + } + assert(xe == ye); + assert(xe <= SP_EMAX); + + if (xs == ys) { + /* generate 28 bit result of adding two 27 bit numbers + */ + xm = xm + ym; + + if (xm >> (SP_FBITS + 1 + 3)) { /* carry out */ + SPXSRSX1(); /* shift preserving sticky */ + } + } else { + if (xm >= ym) { + xm = xm - ym; + } else { + xm = ym - xm; + xs = ys; + } + if (xm == 0) { + if (ieee754_csr.rm == FPU_CSR_RD) + return ieee754sp_zero(1); /* round negative inf. => sign = -1 */ + else + return ieee754sp_zero(0); /* other round modes => sign = 1 */ + } + /* normalize to rounding precision + */ + while ((xm >> (SP_FBITS + 3)) == 0) { + xm <<= 1; + xe--; + } + } + + return ieee754sp_format(xs, xe, xm); +} diff --git a/kernel/arch/mips/math-emu/sp_tint.c b/kernel/arch/mips/math-emu/sp_tint.c new file mode 100644 index 000000000..091299a31 --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_tint.c @@ -0,0 +1,109 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" + +int ieee754sp_tint(union ieee754sp x) +{ + u32 residue; + int round; + int sticky; + int odd; + + COMPXSP; + + ieee754_clearcx(); + + EXPLODEXSP; + FLUSHXSP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + case IEEE754_CLASS_QNAN: + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_indef(); + + case IEEE754_CLASS_ZERO: + return 0; + + case IEEE754_CLASS_DNORM: + case IEEE754_CLASS_NORM: + break; + } + if (xe >= 31) { + /* look for valid corner case */ + if (xe == 31 && xs && xm == SP_HIDDEN_BIT) + return -0x80000000; + /* Set invalid. We will only use overflow for floating + point overflow */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_indef(); + } + /* oh gawd */ + if (xe > SP_FBITS) { + xm <<= xe - SP_FBITS; + } else { + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + /* Shifting a u32 32 times does not work, + * so we do it in two steps. Be aware that xe + * may be -1 */ + residue = xm << (xe + 1); + residue <<= 31 - SP_FBITS; + round = (residue >> 31) != 0; + sticky = (residue << 1) != 0; + xm >>= SP_FBITS - xe; + } + odd = (xm & 0x1) != 0x0; + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: + break; + case FPU_CSR_RU: /* toward +Infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + if ((xm >> 31) != 0) { + /* This can happen after rounding */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_indef(); + } + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + } + if (xs) + return -xm; + else + return xm; +} diff --git a/kernel/arch/mips/math-emu/sp_tlong.c b/kernel/arch/mips/math-emu/sp_tlong.c new file mode 100644 index 000000000..9f3c742c1 --- /dev/null +++ b/kernel/arch/mips/math-emu/sp_tlong.c @@ -0,0 +1,106 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "ieee754sp.h" +#include "ieee754dp.h" + +s64 ieee754sp_tlong(union ieee754sp x) +{ + u32 residue; + int round; + int sticky; + int odd; + + COMPXDP; /* <-- need 64-bit mantissa tmp */ + + ieee754_clearcx(); + + EXPLODEXSP; + FLUSHXSP; + + switch (xc) { + case IEEE754_CLASS_SNAN: + case IEEE754_CLASS_QNAN: + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_indef(); + + case IEEE754_CLASS_ZERO: + return 0; + + case IEEE754_CLASS_DNORM: + case IEEE754_CLASS_NORM: + break; + } + if (xe >= 63) { + /* look for valid corner case */ + if (xe == 63 && xs && xm == SP_HIDDEN_BIT) + return -0x8000000000000000LL; + /* Set invalid. We will only use overflow for floating + point overflow */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_indef(); + } + /* oh gawd */ + if (xe > SP_FBITS) { + xm <<= xe - SP_FBITS; + } else if (xe < SP_FBITS) { + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (32 - SP_FBITS + xe); + round = (residue >> 31) != 0; + sticky = (residue << 1) != 0; + xm >>= SP_FBITS - xe; + } + odd = (xm & 0x1) != 0x0; + switch (ieee754_csr.rm) { + case FPU_CSR_RN: + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: + break; + case FPU_CSR_RU: /* toward +Infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -Infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + if ((xm >> 63) != 0) { + /* This can happen after rounding */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_indef(); + } + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + } + if (xs) + return -xm; + else + return xm; +} |