From 9a983c12a349a65f26c5833580a1fbfff3457ba0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 10 Apr 2017 18:42:34 -0700 Subject: [PATCH] Implement new FP encoding proposal https://groups.google.com/a/groups.riscv.org/d/msg/isa-dev/_r7hBlzsEd8/cWPyJKMzCQAJ --- riscv-tools | 2 +- src/main/scala/tile/FPU.scala | 523 +++++++++++++++++++--------------- 2 files changed, 296 insertions(+), 229 deletions(-) diff --git a/riscv-tools b/riscv-tools index 8a8d7e68..cd8bc479 160000 --- a/riscv-tools +++ b/riscv-tools @@ -1 +1 @@ -Subproject commit 8a8d7e680d48c4dc46b2ac0e8114d400d7afc4af +Subproject commit cd8bc4798c38ba11118492474e96baf717c7af36 diff --git a/src/main/scala/tile/FPU.scala b/src/main/scala/tile/FPU.scala index 96ab2178..df05a863 100644 --- a/src/main/scala/tile/FPU.scala +++ b/src/main/scala/tile/FPU.scala @@ -20,32 +20,11 @@ case class FPUParams( object FPConstants { - def FCMD_ADD = BitPat("b0??00") - def FCMD_SUB = BitPat("b0??01") - def FCMD_MUL = BitPat("b0??10") - def FCMD_MADD = BitPat("b1??00") - def FCMD_MSUB = BitPat("b1??01") - def FCMD_NMSUB = BitPat("b1??10") - def FCMD_NMADD = BitPat("b1??11") - def FCMD_DIV = BitPat("b?0011") - def FCMD_SQRT = BitPat("b?1011") - def FCMD_SGNJ = BitPat("b??1?0") - def FCMD_MINMAX = BitPat("b?01?1") - def FCMD_CVT_FF = BitPat("b??0??") - def FCMD_CVT_IF = BitPat("b?10??") - def FCMD_CMP = BitPat("b?01??") - def FCMD_MV_XF = BitPat("b?11??") - def FCMD_CVT_FI = BitPat("b??0??") - def FCMD_MV_FX = BitPat("b??1??") - def FCMD_X = BitPat("b?????") - val FCMD_WIDTH = 5 - val RM_SZ = 3 val FLAGS_SZ = 5 } trait HasFPUCtrlSigs { - val cmd = Bits(width = FCMD_WIDTH) val ldst = Bool() val wen = Bool() val ren1 = Bool() @@ -53,7 +32,8 @@ trait HasFPUCtrlSigs { val ren3 = Bool() val swap12 = Bool() val swap23 = Bool() - val single = Bool() + val singleIn = Bool() + val singleOut = Bool() val fromint = Bool() val toint = Bool() val fastpipe = Bool() @@ -71,71 +51,71 @@ class FPUDecoder(implicit p: Parameters) extends FPUModule()(p) { val sigs = new FPUCtrlSigs().asOutput } - val default = List(FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X) + val default = List(X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X) val f = - Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N), - FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,Y,N,Y,N,N,N,N,N), - FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,N), - FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y), - FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y), - FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y), - FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y), - FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,N), - FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,N), - FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y), - FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y), - FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y), - FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y), - FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,Y), - FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,Y), - FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,Y), - FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N), - FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N), - FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N), - FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,Y), - FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,Y), - FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y), - FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y), - FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,Y,N,N,N,Y,N,N,Y), - FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y), - FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y), - FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y), - FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y), - FDIV_S -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,Y,N,N,N,N,Y,N,Y), - FSQRT_S -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,Y,N,N,N,N,N,Y,Y)) + Array(FLW -> List(Y,Y,N,N,N,X,X,X,X,N,N,N,N,N,N,N), + FSW -> List(Y,N,N,Y,N,Y,X,N,Y,N,Y,N,N,N,N,N), + FMV_S_X -> List(N,Y,N,N,N,X,X,Y,N,Y,N,N,N,N,N,N), + FCVT_S_W -> List(N,Y,N,N,N,X,X,Y,Y,Y,N,N,N,N,N,Y), + FCVT_S_WU-> List(N,Y,N,N,N,X,X,Y,Y,Y,N,N,N,N,N,Y), + FCVT_S_L -> List(N,Y,N,N,N,X,X,Y,Y,Y,N,N,N,N,N,Y), + FCVT_S_LU-> List(N,Y,N,N,N,X,X,Y,Y,Y,N,N,N,N,N,Y), + FMV_X_S -> List(N,N,Y,N,N,N,X,N,Y,N,Y,N,N,N,N,N), + FCLASS_S -> List(N,N,Y,N,N,N,X,Y,Y,N,Y,N,N,N,N,N), + FCVT_W_S -> List(N,N,Y,N,N,N,X,Y,Y,N,Y,N,N,N,N,Y), + FCVT_WU_S-> List(N,N,Y,N,N,N,X,Y,Y,N,Y,N,N,N,N,Y), + FCVT_L_S -> List(N,N,Y,N,N,N,X,Y,Y,N,Y,N,N,N,N,Y), + FCVT_LU_S-> List(N,N,Y,N,N,N,X,Y,Y,N,Y,N,N,N,N,Y), + FEQ_S -> List(N,N,Y,Y,N,N,N,Y,Y,N,Y,N,N,N,N,Y), + FLT_S -> List(N,N,Y,Y,N,N,N,Y,Y,N,Y,N,N,N,N,Y), + FLE_S -> List(N,N,Y,Y,N,N,N,Y,Y,N,Y,N,N,N,N,Y), + FSGNJ_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,Y,N,N,N,N), + FSGNJN_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,Y,N,N,N,N), + FSGNJX_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,Y,N,N,N,N), + FMIN_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,Y,N,N,N,Y), + FMAX_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,Y,N,N,N,Y), + FADD_S -> List(N,Y,Y,Y,N,N,Y,Y,Y,N,N,N,Y,N,N,Y), + FSUB_S -> List(N,Y,Y,Y,N,N,Y,Y,Y,N,N,N,Y,N,N,Y), + FMUL_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,N,Y,N,N,Y), + FMADD_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y), + FMSUB_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y), + FNMADD_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y), + FNMSUB_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y), + FDIV_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,N,N,Y,N,Y), + FSQRT_S -> List(N,Y,Y,N,N,Y,X,Y,Y,N,N,N,N,N,Y,Y)) val d = - Array(FLD -> List(FCMD_X, Y,Y,N,N,N,X,X,N,N,N,N,N,N,N,N), - FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,N,N,Y,N,N,N,N,N), - FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,N), - FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y), - FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y), - FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y), - FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y), - FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,N), - FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,N), - FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y), - FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y), - FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y), - FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y), - FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,Y,N,N,Y,N,N,N,Y), - FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,N,N,N,Y,N,N,N,Y), - FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,Y), - FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,Y), - FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,Y), - FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N), - FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N), - FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N), - FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,Y), - FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,Y), - FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y), - FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y), - FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y), - FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y), - FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y), - FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y), - FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y), - FDIV_D -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,Y), - FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y)) + Array(FLD -> List(Y,Y,N,N,N,X,X,X,N,N,N,N,N,N,N,N), + FSD -> List(Y,N,N,Y,N,Y,X,N,N,N,Y,N,N,N,N,N), + FMV_D_X -> List(N,Y,N,N,N,X,X,X,N,Y,N,N,N,N,N,N), + FCVT_D_W -> List(N,Y,N,N,N,X,X,N,N,Y,N,N,N,N,N,Y), + FCVT_D_WU-> List(N,Y,N,N,N,X,X,N,N,Y,N,N,N,N,N,Y), + FCVT_D_L -> List(N,Y,N,N,N,X,X,N,N,Y,N,N,N,N,N,Y), + FCVT_D_LU-> List(N,Y,N,N,N,X,X,N,N,Y,N,N,N,N,N,Y), + FMV_X_D -> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,N), + FCLASS_D -> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,N), + FCVT_W_D -> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,Y), + FCVT_WU_D-> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,Y), + FCVT_L_D -> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,Y), + FCVT_LU_D-> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,Y), + FCVT_S_D -> List(N,Y,Y,N,N,N,X,N,Y,N,N,Y,N,N,N,Y), + FCVT_D_S -> List(N,Y,Y,N,N,N,X,Y,N,N,N,Y,N,N,N,Y), + FEQ_D -> List(N,N,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y), + FLT_D -> List(N,N,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y), + FLE_D -> List(N,N,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y), + FSGNJ_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,N,N), + FSGNJN_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,N,N), + FSGNJX_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,N,N), + FMIN_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,N,Y), + FMAX_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,N,Y), + FADD_D -> List(N,Y,Y,Y,N,N,Y,N,N,N,N,N,Y,N,N,Y), + FSUB_D -> List(N,Y,Y,Y,N,N,Y,N,N,N,N,N,Y,N,N,Y), + FMUL_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,N,Y), + FMADD_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y), + FMSUB_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y), + FNMADD_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y), + FNMSUB_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y), + FDIV_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,N,N,Y,N,Y), + FSQRT_D -> List(N,Y,Y,N,N,Y,X,N,N,N,N,N,N,N,Y,Y)) val insns = fLen match { case 32 => f @@ -143,9 +123,9 @@ class FPUDecoder(implicit p: Parameters) extends FPUModule()(p) { } val decoder = DecodeLogic(io.inst, default, insns) val s = io.sigs - val sigs = Seq(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12, - s.swap23, s.single, s.fromint, s.toint, s.fastpipe, s.fma, - s.div, s.sqrt, s.wflags) + val sigs = Seq(s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12, + s.swap23, s.singleIn, s.singleOut, s.fromint, s.toint, + s.fastpipe, s.fma, s.div, s.sqrt, s.wflags) sigs zip decoder map {case(s,d) => s := d} } @@ -183,11 +163,12 @@ class FPUIO(implicit p: Parameters) extends FPUCoreIO ()(p) { class FPResult(implicit p: Parameters) extends CoreBundle()(p) { val data = Bits(width = fLen+1) - val exc = Bits(width = 5) + val exc = Bits(width = FPConstants.FLAGS_SZ) } class FPInput(implicit p: Parameters) extends CoreBundle()(p) with HasFPUCtrlSigs { - val rm = Bits(width = 3) + val rm = Bits(width = FPConstants.RM_SZ) + val fmaCmd = Bits(width = 2) val typ = Bits(width = 2) val in1 = Bits(width = fLen+1) val in2 = Bits(width = fLen+1) @@ -221,99 +202,187 @@ object ClassifyRecFN { } } -object IsNaNRecFN { - def apply(in: UInt, t: FType) = in(t.sig + t.exp - 1, t.sig + t.exp - 3).andR -} +case class FType(exp: Int, sig: Int) { + def ieeeWidth = exp + sig + def recodedWidth = ieeeWidth + 1 -object IsSNaNRecFN { - def apply(in: UInt, t: FType) = IsNaNRecFN(in, t) && !in(t.sig - 2) -} + def qNaN = UInt((BigInt(7) << (exp + sig - 3)) + (BigInt(1) << (sig - 2)), exp + sig + 1) + def isNaN(x: UInt) = x(sig + exp - 1, sig + exp - 3).andR + def isSNaN(x: UInt) = isNaN(x) && !x(sig - 2) -class FType(val exp: Int, val sig: Int) + // convert between formats, ignoring rounding, range, NaN + def unsafeConvert(x: UInt, to: FType) = if (this == to) x else { + val sign = x(sig + exp) + val fractIn = x(sig - 2, 0) + val expIn = x(sig + exp - 1, sig - 1) + val fractOut = fractIn << to.sig >> sig + val expOut = { + val expCode = expIn(exp, exp - 2) + val commonCase = (expIn + (1 << to.exp)) - (1 << exp) + Mux(expCode === 0 || expCode >= 6, Cat(expCode, commonCase(to.exp - 3, 0)), commonCase(to.exp, 0)) + } + Cat(sign, expOut, fractOut) + } + + def recode(x: UInt) = hardfloat.recFNFromFN(exp, sig, x) + def ieee(x: UInt) = hardfloat.fNFromRecFN(exp, sig, x) +} object FType { val S = new FType(8, 24) val D = new FType(11, 53) -} -/** Format conversion without rounding or NaN handling */ -object RecFNToRecFN_noncompliant { - def apply(in: UInt, inExpWidth: Int, inSigWidth: Int, outExpWidth: Int, outSigWidth: Int) = { - val sign = in(inSigWidth + inExpWidth) - val fractIn = in(inSigWidth - 2, 0) - val expIn = in(inSigWidth + inExpWidth - 1, inSigWidth - 1) - val fractOut = fractIn << outSigWidth >> inSigWidth - val expOut = { - val expCode = expIn(inExpWidth, inExpWidth - 2) - val commonCase = (expIn + (1 << outExpWidth)) - (1 << inExpWidth) - Mux(expCode === 0 || expCode >= 6, Cat(expCode, commonCase(outExpWidth - 3, 0)), - commonCase(outExpWidth, 0)) - } - Cat(sign, expOut, fractOut) - } -} - -object CanonicalNaN { - def apply(t: FType): UInt = - UInt((BigInt(7) << (t.exp + t.sig - 3)) + (BigInt(1) << (t.sig - 2)), t.exp + t.sig + 1) - def signaling(t: FType): UInt = - UInt((BigInt(7) << (t.exp + t.sig - 3)) + (BigInt(1) << (t.sig - 3)), t.exp + t.sig + 1) + val all = List(S, D) } trait HasFPUParameters { val fLen: Int val (sExpWidth, sSigWidth) = (FType.S.exp, FType.S.sig) val (dExpWidth, dSigWidth) = (FType.D.exp, FType.D.sig) - val floatTypes = fLen match { - case 32 => List(FType.S) - case 64 => List(FType.S, FType.D) - } - val maxType = floatTypes.sortWith(_.exp > _.exp).head + val floatTypes = FType.all.filter(_.ieeeWidth <= fLen) + val maxType = floatTypes.last + def prevType(t: FType) = floatTypes(floatTypes.indexOf(t) - 1) val maxExpWidth = maxType.exp val maxSigWidth = maxType.sig - def expand(x: UInt, t: FType) = RecFNToRecFN_noncompliant(x, t.exp, t.sig, maxType.exp, maxType.sig) - def contract(x: UInt, t: FType) = RecFNToRecFN_noncompliant(x, maxType.exp, maxType.sig, t.exp, t.sig) + private def isBox(x: UInt, t: FType): Bool = x(t.sig + t.exp, t.sig + t.exp - 4).andR + + private def box(x: UInt, xt: FType, y: UInt, yt: FType): UInt = { + require(xt.ieeeWidth == 2 * yt.ieeeWidth) + val swizzledNaN = Cat( + x(xt.sig + xt.exp, xt.sig + xt.exp - 3), + x(xt.sig - 2, yt.recodedWidth - 1).andR, + x(xt.sig + xt.exp - 5, xt.sig), + y(yt.recodedWidth - 2), + x(xt.sig - 2, yt.recodedWidth - 1), + y(yt.recodedWidth - 1), + y(yt.recodedWidth - 3, 0)) + Mux(xt.isNaN(x), swizzledNaN, x) + } + + // implement NaN unboxing for FU inputs + def unbox(x: UInt, tag: UInt): UInt = { + def helper(x: UInt, t: FType): Seq[(Bool, UInt)] = { + val prev = + if (floatTypes.indexOf(t) == 0) { + Seq() + } else { + val prevT = prevType(t) + val unswizzled = Cat( + x(prevT.sig + prevT.exp - 1), + x(t.sig - 1), + x(prevT.sig + prevT.exp - 2, 0)) + val prev = helper(unswizzled, prevT) + val isbox = isBox(x, t) + prev.map(p => (isbox && p._1, p._2)) + } + prev :+ (true.B, t.unsafeConvert(x, maxType)) + } + + val res = helper(x, maxType) + val oks = res.map(_._1) + val floats = res.map(_._2) + Mux(oks(tag), floats(tag), maxType.qNaN) + } + + // make sure that the redundant bits in the NaN-boxed encoding are consistent + def consistent(x: UInt): Bool = { + def helper(x: UInt, t: FType): Bool = if (floatTypes.indexOf(t) == 0) true.B else { + val prevT = prevType(t) + val unswizzled = Cat( + x(prevT.sig + prevT.exp - 1), + x(t.sig - 1), + x(prevT.sig + prevT.exp - 2, 0)) + val prevOK = !isBox(x, t) || helper(unswizzled, prevT) + val curOK = !t.isNaN(x) || x(t.sig + t.exp - 4) === x(t.sig - 2, prevT.recodedWidth - 1).andR + prevOK && curOK + } + helper(x, maxType) + } + + // generate a NaN box from an FU result + def box(x: UInt, tag: UInt): UInt = { + def helper(y: UInt, yt: FType): UInt = { + if (yt == maxType) { + y + } else { + val nt = floatTypes(floatTypes.indexOf(yt) + 1) + val bigger = box(UInt((BigInt(1) << nt.recodedWidth)-1), nt, y, yt) + bigger | UInt((BigInt(1) << maxType.recodedWidth) - (BigInt(1) << nt.recodedWidth)) + } + } + val opts = floatTypes.map(t => helper(x, t)) + opts(tag) + } + + // zap bits that hardfloat thinks are don't-cares, but we do care about + def sanitizeNaN(x: UInt, t: FType): UInt = { + if (floatTypes.indexOf(t) == 0) { + x + } else { + val maskedNaN = x & ~UInt((BigInt(1) << (t.sig-1)) | (BigInt(1) << (t.sig+t.exp-4)), t.recodedWidth) + Mux(t.isNaN(x), maskedNaN, x) + } + } + + // implement NaN boxing and recoding for FL*/fmv.*.x + def recode(x: UInt, tag: UInt): UInt = { + def helper(x: UInt, t: FType): UInt = { + if (floatTypes.indexOf(t) == 0) { + t.recode(x) + } else { + val prevT = prevType(t) + box(t.recode(x), t, helper(x, prevT), prevT) + } + } + + // fill MSBs of subword loads to emulate a wider load of a NaN-boxed value + val boxes = floatTypes.map(t => UInt((BigInt(1) << maxType.ieeeWidth) - (BigInt(1) << t.ieeeWidth))) + helper(boxes(tag) | x, maxType) + } + + // implement NaN unboxing and un-recoding for FS*/fmv.x.* + def ieee(x: UInt, t: FType = maxType): UInt = { + if (floatTypes.indexOf(t) == 0) { + t.ieee(x) + } else { + val unrecoded = t.ieee(x) + val prevT = prevType(t) + val prevRecoded = Cat( + x(prevT.recodedWidth-2), + x(t.sig-1), + x(prevT.recodedWidth-3, 0)) + val prevUnrecoded = ieee(prevRecoded, prevT) + Cat(unrecoded >> prevT.ieeeWidth, Mux(t.isNaN(x), prevUnrecoded, unrecoded(prevT.ieeeWidth-1, 0))) + } + } } abstract class FPUModule(implicit p: Parameters) extends CoreModule()(p) with HasFPUParameters class FPToInt(implicit p: Parameters) extends FPUModule()(p) { class Output extends Bundle { + val in = new FPInput val lt = Bool() val store = Bits(width = fLen) val toint = Bits(width = xLen) - val exc = Bits(width = 5) + val exc = Bits(width = FPConstants.FLAGS_SZ) override def cloneType = new Output().asInstanceOf[this.type] } val io = new Bundle { val in = Valid(new FPInput).flip - val as_double = new FPInput().asOutput val out = Valid(new Output) } val in = RegEnable(io.in.bits, io.in.valid) val valid = Reg(next=io.in.valid) - val in1_s = contract(in.in1, FType.S) - val unrec_s = hardfloat.fNFromRecFN(sExpWidth, sSigWidth, in1_s).sextTo(xLen) - val unrec_mem = fLen match { - case 32 => unrec_s - case 64 => - val unrec_d = hardfloat.fNFromRecFN(dExpWidth, dSigWidth, in.in1).sextTo(xLen) - Mux(in.single, unrec_s, unrec_d) - } - val unrec_int = xLen match { - case 32 => unrec_s - case fLen => unrec_mem - } - - val classify_s = ClassifyRecFN(sExpWidth, sSigWidth, in1_s) + val classify_s = ClassifyRecFN(sExpWidth, sSigWidth, maxType.unsafeConvert(in.in1, FType.S)) val classify_out = fLen match { case 32 => classify_s case 64 => val classify_d = ClassifyRecFN(dExpWidth, dSigWidth, in.in1) - Mux(in.single, classify_s, classify_d) + Mux(in.singleIn, classify_s, classify_d) } val dcmp = Module(new hardfloat.CompareRecFN(maxExpWidth, maxSigWidth)) @@ -321,32 +390,34 @@ class FPToInt(implicit p: Parameters) extends FPUModule()(p) { dcmp.io.b := in.in2 dcmp.io.signaling := !in.rm(1) - io.out.bits.toint := Mux(in.rm(0), classify_out, unrec_int) - io.out.bits.store := unrec_mem + val store = ieee(in.in1) + val toint = Mux(in.rm(0), classify_out, store) + io.out.bits.store := store + io.out.bits.toint := Mux(in.singleOut, toint(31, 0).sextTo(xLen), toint) io.out.bits.exc := Bits(0) - when (in.cmd === FCMD_CMP) { + when (in.wflags) { // feq/flt/fle, fcvt io.out.bits.toint := (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR io.out.bits.exc := dcmp.io.exceptionFlags - } - when (in.cmd === FCMD_CVT_IF) { - val minXLen = 32 - val n = log2Ceil(xLen/minXLen) + 1 - for (i <- 0 until n) { - val conv = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, minXLen << i)) - conv.io.in := in.in1 - conv.io.roundingMode := in.rm - conv.io.signedOut := ~in.typ(0) - when (in.typ.extract(log2Ceil(n), 1) === i) { - io.out.bits.toint := conv.io.out.sextTo(xLen) - io.out.bits.exc := Cat(conv.io.intExceptionFlags(2, 1).orR, UInt(0, 3), conv.io.intExceptionFlags(0)) + when (!in.ren2) { // fcvt + val minXLen = 32 + val n = log2Ceil(xLen/minXLen) + 1 + for (i <- 0 until n) { + val conv = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, minXLen << i)) + conv.io.in := in.in1 + conv.io.roundingMode := in.rm + conv.io.signedOut := ~in.typ(0) + when (in.typ.extract(log2Ceil(n), 1) === i) { + io.out.bits.toint := conv.io.out.sextTo(xLen) + io.out.bits.exc := Cat(conv.io.intExceptionFlags(2, 1).orR, UInt(0, 3), conv.io.intExceptionFlags(0)) + } } } } io.out.valid := valid io.out.bits.lt := dcmp.io.lt - io.as_double := in + io.out.bits.in := in } class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) { @@ -359,10 +430,7 @@ class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) { val mux = Wire(new FPResult) mux.exc := Bits(0) - mux.data := expand(hardfloat.recFNFromFN(sExpWidth, sSigWidth, in.bits.in1), FType.S) - if (fLen > 32) when (!in.bits.single) { - mux.data := hardfloat.recFNFromFN(dExpWidth, dSigWidth, in.bits.in1) - } + mux.data := recode(in.bits.in1, !in.bits.singleIn) val intValue = { val minXLen = 32 @@ -377,12 +445,12 @@ class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) { res.asUInt } - when (in.bits.cmd === FCMD_CVT_FI) { + when (in.bits.wflags) { // fcvt val l2s = Module(new hardfloat.INToRecFN(xLen, sExpWidth, sSigWidth)) l2s.io.signedIn := ~in.bits.typ(0) l2s.io.in := intValue l2s.io.roundingMode := in.bits.rm - mux.data := expand(l2s.io.out, FType.S) + mux.data := sanitizeNaN(l2s.io.out, FType.S) mux.exc := l2s.io.exceptionFlags fLen match { @@ -392,8 +460,9 @@ class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) { l2d.io.signedIn := ~in.bits.typ(0) l2d.io.in := intValue l2d.io.roundingMode := in.bits.rm - when (!in.bits.single) { - mux.data := l2d.io.out + mux.data := Cat(l2d.io.out >> l2s.io.out.getWidth, l2s.io.out) + when (!in.bits.singleIn) { + mux.data := sanitizeNaN(l2d.io.out, FType.D) mux.exc := l2d.io.exceptionFlags } } @@ -414,37 +483,46 @@ class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) { val signNum = Mux(in.bits.rm(1), in.bits.in1 ^ in.bits.in2, Mux(in.bits.rm(0), ~in.bits.in2, in.bits.in2)) val fsgnj = Cat(signNum(fLen), in.bits.in1(fLen-1, 0)) - val mux = Wire(new FPResult) - mux.exc := UInt(0) - mux.data := fsgnj + val fsgnjMux = Wire(new FPResult) + fsgnjMux.exc := UInt(0) + fsgnjMux.data := fsgnj - when (in.bits.cmd === FCMD_MINMAX) { - val isnan1 = IsNaNRecFN(in.bits.in1, maxType) - val isnan2 = IsNaNRecFN(in.bits.in2, maxType) - val isInvalid = IsSNaNRecFN(in.bits.in1, maxType) || IsSNaNRecFN(in.bits.in2, maxType) + when (in.bits.wflags) { // fmin/fmax + val isnan1 = maxType.isNaN(in.bits.in1) + val isnan2 = maxType.isNaN(in.bits.in2) + val isInvalid = maxType.isSNaN(in.bits.in1) || maxType.isSNaN(in.bits.in2) val isNaNOut = isInvalid || (isnan1 && isnan2) val isLHS = isnan2 || in.bits.rm(0) =/= io.lt && !isnan1 - mux.exc := isInvalid << 4 - mux.data := Mux(isNaNOut, CanonicalNaN(maxType), Mux(isLHS, in.bits.in1, in.bits.in2)) + fsgnjMux.exc := isInvalid << 4 + fsgnjMux.data := Mux(isNaNOut, maxType.qNaN, Mux(isLHS, in.bits.in1, in.bits.in2)) } + val mux = Wire(new FPResult) + mux.exc := fsgnjMux.exc + fLen match { case 32 => + mux.data := fsgnjMux.data case 64 => - when (in.bits.cmd === FCMD_CVT_FF) { + val fsgnjSingle = maxType.unsafeConvert(fsgnjMux.data, FType.S) + mux.data := Mux(in.bits.singleOut, Cat(fsgnjMux.data >> fsgnjSingle.getWidth, fsgnjSingle), fsgnjMux.data) + + when (in.bits.wflags && !in.bits.ren2) { // fcvt val d2s = Module(new hardfloat.RecFNToRecFN(dExpWidth, dSigWidth, sExpWidth, sSigWidth)) d2s.io.in := in.bits.in1 d2s.io.roundingMode := in.bits.rm + val d2sOut = sanitizeNaN(d2s.io.out, FType.S) val s2d = Module(new hardfloat.RecFNToRecFN(sExpWidth, sSigWidth, dExpWidth, dSigWidth)) - s2d.io.in := contract(in.bits.in1, FType.S) + s2d.io.in := maxType.unsafeConvert(in.bits.in1, FType.S) s2d.io.roundingMode := in.bits.rm + val s2dOut = sanitizeNaN(s2d.io.out, FType.D) - when (in.bits.single) { - mux.data := expand(d2s.io.out, FType.S) + when (in.bits.singleOut) { + mux.data := Cat(s2dOut >> d2sOut.getWidth, d2sOut) mux.exc := d2s.io.exceptionFlags }.otherwise { - mux.data := s2d.io.out + mux.data := s2dOut mux.exc := s2d.io.exceptionFlags } } @@ -459,31 +537,29 @@ class FPUFMAPipe(val latency: Int, t: FType)(implicit p: Parameters) extends FPU val out = Valid(new FPResult) } - val width = t.sig + t.exp - val one = UInt(1) << (width-1) - val zero = (io.in.bits.in1(width) ^ io.in.bits.in2(width)) << width - val valid = Reg(next=io.in.valid) val in = Reg(new FPInput) when (io.in.valid) { + val signProd = io.in.bits.in1(maxType.sig + maxType.exp) ^ io.in.bits.in2(maxType.sig + maxType.exp) + val one = UInt(1) << (t.sig + t.exp - 1) + val zero = signProd << (t.sig + t.exp) val cmd_fma = io.in.bits.ren3 val cmd_addsub = io.in.bits.swap23 in := io.in.bits - in.in1 := contract(io.in.bits.in1, t) - in.in2 := Mux(cmd_addsub, one, contract(io.in.bits.in2, t)) - in.in3 := Mux(cmd_fma || cmd_addsub, contract(io.in.bits.in3, t), zero) - in.cmd := Cat(io.in.bits.cmd(1) & (cmd_fma || cmd_addsub), io.in.bits.cmd(0)) + in.in1 := maxType.unsafeConvert(io.in.bits.in1, t) + in.in2 := Mux(cmd_addsub, one, maxType.unsafeConvert(io.in.bits.in2, t)) + in.in3 := Mux(cmd_fma || cmd_addsub, maxType.unsafeConvert(io.in.bits.in3, t), zero) } val fma = Module(new hardfloat.MulAddRecFN(t.exp, t.sig)) - fma.io.op := in.cmd + fma.io.op := in.fmaCmd fma.io.roundingMode := in.rm fma.io.a := in.in1 fma.io.b := in.in2 fma.io.c := in.in3 val res = Wire(new FPResult) - res.data := expand(fma.io.out, t) + res.data := sanitizeNaN(fma.io.out, t) res.exc := fma.io.exceptionFlags io.out := Pipe(valid, res, latency-1) } @@ -517,23 +593,18 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { // load response val load_wb = Reg(next=io.dmem_resp_val) - val load_wb_single = RegEnable(!io.dmem_resp_type(0), io.dmem_resp_val) + val load_wb_double = RegEnable(io.dmem_resp_type(0), io.dmem_resp_val) val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val) val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val) - val rec_s = hardfloat.recFNFromFN(sExpWidth, sSigWidth, load_wb_data) - val load_wb_data_recoded = fLen match { - case 32 => rec_s - case 64 => - val rec_d = hardfloat.recFNFromFN(dExpWidth, dSigWidth, load_wb_data) - Mux(load_wb_single, expand(rec_s, FType.S), rec_d) - } // regfile val regfile = Mem(32, Bits(width = fLen+1)) when (load_wb) { - regfile(load_wb_tag) := load_wb_data_recoded + val wdata = recode(load_wb_data, load_wb_double) + regfile(load_wb_tag) := wdata + assert(consistent(wdata)) if (enableCommitLog) - printf("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + 32, Mux(load_wb_single, load_wb_data(31,0), load_wb_data)) + printf("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + 32, load_wb_data) } val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt())) @@ -552,12 +623,14 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12)) val req = Wire(new FPInput) + def readAndUnbox(addr: UInt) = unbox(regfile(addr), !ex_ctrl.singleIn) req := ex_ctrl req.rm := ex_rm - req.in1 := regfile(ex_ra1) - req.in2 := regfile(ex_ra2) - req.in3 := regfile(ex_ra3) + req.in1 := readAndUnbox(ex_ra1) + req.in2 := readAndUnbox(ex_ra2) + req.in3 := readAndUnbox(ex_ra3) req.typ := ex_reg_inst(21,20) + req.fmaCmd := ex_reg_inst(3,2) | (!ex_ctrl.ren3 && ex_reg_inst(27)) when (ex_cp_valid) { req := io.cp_req.bits when (io.cp_req.bits.swap23) { @@ -567,11 +640,11 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { } val sfma = Module(new FPUFMAPipe(cfg.sfmaLatency, FType.S)) - sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single + sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.singleOut sfma.io.in.bits := req val fpiu = Module(new FPToInt) - fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX) + fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || (ex_ctrl.fastpipe && ex_ctrl.wflags)) fpiu.io.in.bits := req io.store_data := fpiu.io.out.bits.store io.toint_data := fpiu.io.out.bits.toint @@ -595,7 +668,7 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { val divSqrt_waddr = Reg(UInt(width = 5)) val divSqrt_single = Reg(Bool()) val divSqrt_wdata = Wire(UInt(width = fLen+1)) - val divSqrt_flags = Wire(UInt(width = 5)) + val divSqrt_flags = Wire(UInt(width = FPConstants.FLAGS_SZ)) val divSqrt_in_flight = Reg(init=Bool(false)) val divSqrt_killed = Reg(Bool()) @@ -604,12 +677,12 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { val pipes = List( Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits), Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits), - Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out.bits)) ++ + Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.singleOut, sfma.io.out.bits)) ++ (fLen > 32).option({ val dfma = Module(new FPUFMAPipe(cfg.dfmaLatency, FType.D)) - dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single + dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.singleOut dfma.io.in.bits := req - Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits) + Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.singleOut, dfma.io.out.bits) }) def latencyMask(c: FPUCtrlSigs, offset: Int) = { require(pipes.forall(_.lat >= offset)) @@ -643,7 +716,7 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { for (i <- 0 until maxLatency-1) { when (!write_port_busy && memLatencyMask(i)) { wbInfo(i).cp := mem_cp_valid - wbInfo(i).single := mem_ctrl.single + wbInfo(i).single := mem_ctrl.singleOut wbInfo(i).pipeid := pipeid(mem_ctrl) wbInfo(i).rd := mem_reg_inst(11,7) } @@ -651,20 +724,14 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { } val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd) - val wdata = Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wbInfo(0).pipeid)) + val wdouble = Mux(divSqrt_wen, !divSqrt_single, !wbInfo(0).single) + val wdata = box(Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wbInfo(0).pipeid)), wdouble) val wexc = (pipes.map(_.res.exc): Seq[UInt])(wbInfo(0).pipeid) when ((!wbInfo(0).cp && wen(0)) || divSqrt_wen) { + assert(consistent(wdata)) regfile(waddr) := wdata if (enableCommitLog) { - val wsingle = Mux(divSqrt_wen, divSqrt_single, wbInfo(0).single) - val wdata_unrec_s = hardfloat.fNFromRecFN(sExpWidth, sSigWidth, contract(wdata, FType.S)) - val unrec = fLen match { - case 32 => wdata_unrec_s - case 64 => - val wdata_unrec_d = hardfloat.fNFromRecFN(dExpWidth, dSigWidth, wdata) - Mux(wsingle, wdata_unrec_s, wdata_unrec_d) - } - printf("f%d p%d 0x%x\n", waddr, waddr + 32, unrec) + printf("f%d p%d 0x%x\n", waddr, waddr + 32, ieee(wdata)) } } when (wbInfo(0).cp && wen(0)) { @@ -705,21 +772,21 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_in_flight divSqrt.io.sqrtOp := mem_ctrl.sqrt - divSqrt.io.a := fpiu.io.as_double.in1 - divSqrt.io.b := fpiu.io.as_double.in2 - divSqrt.io.roundingMode := fpiu.io.as_double.rm + divSqrt.io.a := fpiu.io.out.bits.in.in1 + divSqrt.io.b := fpiu.io.out.bits.in.in2 + divSqrt.io.roundingMode := fpiu.io.out.bits.in.rm when (divSqrt.io.inValid && divSqrt_inReady) { divSqrt_in_flight := true divSqrt_killed := killm - divSqrt_single := mem_ctrl.single + divSqrt_single := mem_ctrl.singleOut divSqrt_waddr := mem_reg_inst(11,7) divSqrt_rm := divSqrt.io.roundingMode } when (divSqrt_outValid) { divSqrt_wen := !divSqrt_killed - divSqrt_wdata_double := divSqrt.io.out + divSqrt_wdata_double := sanitizeNaN(divSqrt.io.out, FType.D) divSqrt_in_flight := false divSqrt_flags_double := divSqrt.io.exceptionFlags } @@ -727,7 +794,7 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24)) divSqrt_toSingle.io.in := divSqrt_wdata_double divSqrt_toSingle.io.roundingMode := divSqrt_rm - divSqrt_wdata := Mux(divSqrt_single, expand(divSqrt_toSingle.io.out, FType.S), divSqrt_wdata_double) + divSqrt_wdata := Mux(divSqrt_single, Cat(divSqrt_wdata_double >> divSqrt_toSingle.io.out.getWidth, sanitizeNaN(divSqrt_toSingle.io.out, FType.S)), divSqrt_wdata_double) divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0)) } else { when (id_ctrl.div || id_ctrl.sqrt) { io.illegal_rm := true }