76af15a6ff
I was examining a WB-stage control signal instead of a MEM-stage control signal. I refactored the code to group the signals together, so that this sort of bug is less likely going forward.
814 lines
30 KiB
Scala
814 lines
30 KiB
Scala
// See LICENSE.Berkeley for license details.
|
|
// See LICENSE.SiFive for license details.
|
|
|
|
package tile
|
|
|
|
import Chisel._
|
|
import Chisel.ImplicitConversions._
|
|
import FPConstants._
|
|
import rocket.DecodeLogic
|
|
import rocket.Instructions._
|
|
import uncore.constants.MemoryOpConstants._
|
|
import config._
|
|
import util._
|
|
|
|
case class FPUParams(
|
|
divSqrt: Boolean = true,
|
|
sfmaLatency: Int = 3,
|
|
dfmaLatency: Int = 4
|
|
)
|
|
|
|
object FPConstants
|
|
{
|
|
val RM_SZ = 3
|
|
val FLAGS_SZ = 5
|
|
}
|
|
|
|
trait HasFPUCtrlSigs {
|
|
val ldst = Bool()
|
|
val wen = Bool()
|
|
val ren1 = Bool()
|
|
val ren2 = Bool()
|
|
val ren3 = Bool()
|
|
val swap12 = Bool()
|
|
val swap23 = Bool()
|
|
val singleIn = Bool()
|
|
val singleOut = Bool()
|
|
val fromint = Bool()
|
|
val toint = Bool()
|
|
val fastpipe = Bool()
|
|
val fma = Bool()
|
|
val div = Bool()
|
|
val sqrt = Bool()
|
|
val wflags = Bool()
|
|
}
|
|
|
|
class FPUCtrlSigs extends Bundle with HasFPUCtrlSigs
|
|
|
|
class FPUDecoder(implicit p: Parameters) extends FPUModule()(p) {
|
|
val io = new Bundle {
|
|
val inst = Bits(INPUT, 32)
|
|
val sigs = new FPUCtrlSigs().asOutput
|
|
}
|
|
|
|
val default = List(X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X)
|
|
val f =
|
|
Array(FLW -> List(Y,Y,N,N,N,X,X,X,X,N,N,N,N,N,N,N),
|
|
FSW -> List(Y,N,N,Y,N,Y,X,N,Y,N,Y,N,N,N,N,N),
|
|
FMV_S_X -> List(N,Y,N,N,N,X,X,Y,N,Y,N,N,N,N,N,N),
|
|
FCVT_S_W -> List(N,Y,N,N,N,X,X,Y,Y,Y,N,N,N,N,N,Y),
|
|
FCVT_S_WU-> List(N,Y,N,N,N,X,X,Y,Y,Y,N,N,N,N,N,Y),
|
|
FCVT_S_L -> List(N,Y,N,N,N,X,X,Y,Y,Y,N,N,N,N,N,Y),
|
|
FCVT_S_LU-> List(N,Y,N,N,N,X,X,Y,Y,Y,N,N,N,N,N,Y),
|
|
FMV_X_S -> List(N,N,Y,N,N,N,X,N,Y,N,Y,N,N,N,N,N),
|
|
FCLASS_S -> List(N,N,Y,N,N,N,X,Y,Y,N,Y,N,N,N,N,N),
|
|
FCVT_W_S -> List(N,N,Y,N,N,N,X,Y,Y,N,Y,N,N,N,N,Y),
|
|
FCVT_WU_S-> List(N,N,Y,N,N,N,X,Y,Y,N,Y,N,N,N,N,Y),
|
|
FCVT_L_S -> List(N,N,Y,N,N,N,X,Y,Y,N,Y,N,N,N,N,Y),
|
|
FCVT_LU_S-> List(N,N,Y,N,N,N,X,Y,Y,N,Y,N,N,N,N,Y),
|
|
FEQ_S -> List(N,N,Y,Y,N,N,N,Y,Y,N,Y,N,N,N,N,Y),
|
|
FLT_S -> List(N,N,Y,Y,N,N,N,Y,Y,N,Y,N,N,N,N,Y),
|
|
FLE_S -> List(N,N,Y,Y,N,N,N,Y,Y,N,Y,N,N,N,N,Y),
|
|
FSGNJ_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,Y,N,N,N,N),
|
|
FSGNJN_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,Y,N,N,N,N),
|
|
FSGNJX_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,Y,N,N,N,N),
|
|
FMIN_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,Y,N,N,N,Y),
|
|
FMAX_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,Y,N,N,N,Y),
|
|
FADD_S -> List(N,Y,Y,Y,N,N,Y,Y,Y,N,N,N,Y,N,N,Y),
|
|
FSUB_S -> List(N,Y,Y,Y,N,N,Y,Y,Y,N,N,N,Y,N,N,Y),
|
|
FMUL_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,N,Y,N,N,Y),
|
|
FMADD_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y),
|
|
FMSUB_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y),
|
|
FNMADD_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y),
|
|
FNMSUB_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y),
|
|
FDIV_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,N,N,Y,N,Y),
|
|
FSQRT_S -> List(N,Y,Y,N,N,N,X,Y,Y,N,N,N,N,N,Y,Y))
|
|
val d =
|
|
Array(FLD -> List(Y,Y,N,N,N,X,X,X,N,N,N,N,N,N,N,N),
|
|
FSD -> List(Y,N,N,Y,N,Y,X,N,N,N,Y,N,N,N,N,N),
|
|
FMV_D_X -> List(N,Y,N,N,N,X,X,X,N,Y,N,N,N,N,N,N),
|
|
FCVT_D_W -> List(N,Y,N,N,N,X,X,N,N,Y,N,N,N,N,N,Y),
|
|
FCVT_D_WU-> List(N,Y,N,N,N,X,X,N,N,Y,N,N,N,N,N,Y),
|
|
FCVT_D_L -> List(N,Y,N,N,N,X,X,N,N,Y,N,N,N,N,N,Y),
|
|
FCVT_D_LU-> List(N,Y,N,N,N,X,X,N,N,Y,N,N,N,N,N,Y),
|
|
FMV_X_D -> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,N),
|
|
FCLASS_D -> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,N),
|
|
FCVT_W_D -> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,Y),
|
|
FCVT_WU_D-> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,Y),
|
|
FCVT_L_D -> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,Y),
|
|
FCVT_LU_D-> List(N,N,Y,N,N,N,X,N,N,N,Y,N,N,N,N,Y),
|
|
FCVT_S_D -> List(N,Y,Y,N,N,N,X,N,Y,N,N,Y,N,N,N,Y),
|
|
FCVT_D_S -> List(N,Y,Y,N,N,N,X,Y,N,N,N,Y,N,N,N,Y),
|
|
FEQ_D -> List(N,N,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
|
|
FLT_D -> List(N,N,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
|
|
FLE_D -> List(N,N,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
|
|
FSGNJ_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,N,N),
|
|
FSGNJN_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,N,N),
|
|
FSGNJX_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,N,N),
|
|
FMIN_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,N,Y),
|
|
FMAX_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,N,Y),
|
|
FADD_D -> List(N,Y,Y,Y,N,N,Y,N,N,N,N,N,Y,N,N,Y),
|
|
FSUB_D -> List(N,Y,Y,Y,N,N,Y,N,N,N,N,N,Y,N,N,Y),
|
|
FMUL_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,N,Y),
|
|
FMADD_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y),
|
|
FMSUB_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y),
|
|
FNMADD_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y),
|
|
FNMSUB_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y),
|
|
FDIV_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,N,N,Y,N,Y),
|
|
FSQRT_D -> List(N,Y,Y,N,N,N,X,N,N,N,N,N,N,N,Y,Y))
|
|
|
|
val insns = fLen match {
|
|
case 32 => f
|
|
case 64 => f ++ d
|
|
}
|
|
val decoder = DecodeLogic(io.inst, default, insns)
|
|
val s = io.sigs
|
|
val sigs = Seq(s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12,
|
|
s.swap23, s.singleIn, s.singleOut, s.fromint, s.toint,
|
|
s.fastpipe, s.fma, s.div, s.sqrt, s.wflags)
|
|
sigs zip decoder map {case(s,d) => s := d}
|
|
}
|
|
|
|
class FPUCoreIO(implicit p: Parameters) extends CoreBundle()(p) {
|
|
val inst = Bits(INPUT, 32)
|
|
val fromint_data = Bits(INPUT, xLen)
|
|
|
|
val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ)
|
|
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ))
|
|
|
|
val store_data = Bits(OUTPUT, fLen)
|
|
val toint_data = Bits(OUTPUT, xLen)
|
|
|
|
val dmem_resp_val = Bool(INPUT)
|
|
val dmem_resp_type = Bits(INPUT, 3)
|
|
val dmem_resp_tag = UInt(INPUT, 5)
|
|
val dmem_resp_data = Bits(INPUT, fLen)
|
|
|
|
val valid = Bool(INPUT)
|
|
val fcsr_rdy = Bool(OUTPUT)
|
|
val nack_mem = Bool(OUTPUT)
|
|
val illegal_rm = Bool(OUTPUT)
|
|
val killx = Bool(INPUT)
|
|
val killm = Bool(INPUT)
|
|
val dec = new FPUCtrlSigs().asOutput
|
|
val sboard_set = Bool(OUTPUT)
|
|
val sboard_clr = Bool(OUTPUT)
|
|
val sboard_clra = UInt(OUTPUT, 5)
|
|
}
|
|
|
|
class FPUIO(implicit p: Parameters) extends FPUCoreIO ()(p) {
|
|
val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
|
|
val cp_resp = Decoupled(new FPResult())
|
|
}
|
|
|
|
class FPResult(implicit p: Parameters) extends CoreBundle()(p) {
|
|
val data = Bits(width = fLen+1)
|
|
val exc = Bits(width = FPConstants.FLAGS_SZ)
|
|
}
|
|
|
|
class FPInput(implicit p: Parameters) extends CoreBundle()(p) with HasFPUCtrlSigs {
|
|
val rm = Bits(width = FPConstants.RM_SZ)
|
|
val fmaCmd = Bits(width = 2)
|
|
val typ = Bits(width = 2)
|
|
val in1 = Bits(width = fLen+1)
|
|
val in2 = Bits(width = fLen+1)
|
|
val in3 = Bits(width = fLen+1)
|
|
|
|
override def cloneType = new FPInput().asInstanceOf[this.type]
|
|
}
|
|
|
|
case class FType(exp: Int, sig: Int) {
|
|
def ieeeWidth = exp + sig
|
|
def recodedWidth = ieeeWidth + 1
|
|
|
|
def qNaN = UInt((BigInt(7) << (exp + sig - 3)) + (BigInt(1) << (sig - 2)), exp + sig + 1)
|
|
def isNaN(x: UInt) = x(sig + exp - 1, sig + exp - 3).andR
|
|
def isSNaN(x: UInt) = isNaN(x) && !x(sig - 2)
|
|
|
|
def classify(x: UInt) = {
|
|
val sign = x(sig + exp)
|
|
val code = x(exp + sig - 1, exp + sig - 3)
|
|
val codeHi = code(2, 1)
|
|
val isSpecial = codeHi === UInt(3)
|
|
|
|
val isHighSubnormalIn = x(exp + sig - 3, sig - 1) < UInt(2)
|
|
val isSubnormal = code === UInt(1) || codeHi === UInt(1) && isHighSubnormalIn
|
|
val isNormal = codeHi === UInt(1) && !isHighSubnormalIn || codeHi === UInt(2)
|
|
val isZero = code === UInt(0)
|
|
val isInf = isSpecial && !code(0)
|
|
val isNaN = code.andR
|
|
val isSNaN = isNaN && !x(sig-2)
|
|
val isQNaN = isNaN && x(sig-2)
|
|
|
|
Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign,
|
|
isSubnormal && !sign, isZero && !sign, isZero && sign,
|
|
isSubnormal && sign, isNormal && sign, isInf && sign)
|
|
}
|
|
|
|
// convert between formats, ignoring rounding, range, NaN
|
|
def unsafeConvert(x: UInt, to: FType) = if (this == to) x else {
|
|
val sign = x(sig + exp)
|
|
val fractIn = x(sig - 2, 0)
|
|
val expIn = x(sig + exp - 1, sig - 1)
|
|
val fractOut = fractIn << to.sig >> sig
|
|
val expOut = {
|
|
val expCode = expIn(exp, exp - 2)
|
|
val commonCase = (expIn + (1 << to.exp)) - (1 << exp)
|
|
Mux(expCode === 0 || expCode >= 6, Cat(expCode, commonCase(to.exp - 3, 0)), commonCase(to.exp, 0))
|
|
}
|
|
Cat(sign, expOut, fractOut)
|
|
}
|
|
|
|
def recode(x: UInt) = hardfloat.recFNFromFN(exp, sig, x)
|
|
def ieee(x: UInt) = hardfloat.fNFromRecFN(exp, sig, x)
|
|
}
|
|
|
|
object FType {
|
|
val S = new FType(8, 24)
|
|
val D = new FType(11, 53)
|
|
|
|
val all = List(S, D)
|
|
}
|
|
|
|
trait HasFPUParameters {
|
|
val fLen: Int
|
|
val xLen: Int
|
|
val minXLen = 32
|
|
val nIntTypes = log2Ceil(xLen/minXLen) + 1
|
|
val floatTypes = FType.all.filter(_.ieeeWidth <= fLen)
|
|
val minType = floatTypes.head
|
|
val maxType = floatTypes.last
|
|
def prevType(t: FType) = floatTypes(typeTag(t) - 1)
|
|
val maxExpWidth = maxType.exp
|
|
val maxSigWidth = maxType.sig
|
|
def typeTag(t: FType) = floatTypes.indexOf(t)
|
|
|
|
private def isBox(x: UInt, t: FType): Bool = x(t.sig + t.exp, t.sig + t.exp - 4).andR
|
|
|
|
private def box(x: UInt, xt: FType, y: UInt, yt: FType): UInt = {
|
|
require(xt.ieeeWidth == 2 * yt.ieeeWidth)
|
|
val swizzledNaN = Cat(
|
|
x(xt.sig + xt.exp, xt.sig + xt.exp - 3),
|
|
x(xt.sig - 2, yt.recodedWidth - 1).andR,
|
|
x(xt.sig + xt.exp - 5, xt.sig),
|
|
y(yt.recodedWidth - 2),
|
|
x(xt.sig - 2, yt.recodedWidth - 1),
|
|
y(yt.recodedWidth - 1),
|
|
y(yt.recodedWidth - 3, 0))
|
|
Mux(xt.isNaN(x), swizzledNaN, x)
|
|
}
|
|
|
|
// implement NaN unboxing for FU inputs
|
|
def unbox(x: UInt, tag: UInt, exactType: Option[FType]): UInt = {
|
|
val outType = exactType.getOrElse(maxType)
|
|
def helper(x: UInt, t: FType): Seq[(Bool, UInt)] = {
|
|
val prev =
|
|
if (t == minType) {
|
|
Seq()
|
|
} else {
|
|
val prevT = prevType(t)
|
|
val unswizzled = Cat(
|
|
x(prevT.sig + prevT.exp - 1),
|
|
x(t.sig - 1),
|
|
x(prevT.sig + prevT.exp - 2, 0))
|
|
val prev = helper(unswizzled, prevT)
|
|
val isbox = isBox(x, t)
|
|
prev.map(p => (isbox && p._1, p._2))
|
|
}
|
|
prev :+ (true.B, t.unsafeConvert(x, outType))
|
|
}
|
|
|
|
val (oks, floats) = helper(x, maxType).unzip
|
|
if (exactType.isEmpty || floatTypes.size == 1) {
|
|
Mux(oks(tag), floats(tag), maxType.qNaN)
|
|
} else {
|
|
val t = exactType.get
|
|
floats(typeTag(t)) | Mux(oks(typeTag(t)), 0.U, t.qNaN)
|
|
}
|
|
}
|
|
|
|
// make sure that the redundant bits in the NaN-boxed encoding are consistent
|
|
def consistent(x: UInt): Bool = {
|
|
def helper(x: UInt, t: FType): Bool = if (typeTag(t) == 0) true.B else {
|
|
val prevT = prevType(t)
|
|
val unswizzled = Cat(
|
|
x(prevT.sig + prevT.exp - 1),
|
|
x(t.sig - 1),
|
|
x(prevT.sig + prevT.exp - 2, 0))
|
|
val prevOK = !isBox(x, t) || helper(unswizzled, prevT)
|
|
val curOK = !t.isNaN(x) || x(t.sig + t.exp - 4) === x(t.sig - 2, prevT.recodedWidth - 1).andR
|
|
prevOK && curOK
|
|
}
|
|
helper(x, maxType)
|
|
}
|
|
|
|
// generate a NaN box from an FU result
|
|
def box(x: UInt, tag: UInt): UInt = {
|
|
def helper(y: UInt, yt: FType): UInt = {
|
|
if (yt == maxType) {
|
|
y
|
|
} else {
|
|
val nt = floatTypes(typeTag(yt) + 1)
|
|
val bigger = box(UInt((BigInt(1) << nt.recodedWidth)-1), nt, y, yt)
|
|
bigger | UInt((BigInt(1) << maxType.recodedWidth) - (BigInt(1) << nt.recodedWidth))
|
|
}
|
|
}
|
|
val opts = floatTypes.map(t => helper(x, t))
|
|
opts(tag)
|
|
}
|
|
|
|
// zap bits that hardfloat thinks are don't-cares, but we do care about
|
|
def sanitizeNaN(x: UInt, t: FType): UInt = {
|
|
if (typeTag(t) == 0) {
|
|
x
|
|
} else {
|
|
val maskedNaN = x & ~UInt((BigInt(1) << (t.sig-1)) | (BigInt(1) << (t.sig+t.exp-4)), t.recodedWidth)
|
|
Mux(t.isNaN(x), maskedNaN, x)
|
|
}
|
|
}
|
|
|
|
// implement NaN boxing and recoding for FL*/fmv.*.x
|
|
def recode(x: UInt, tag: UInt): UInt = {
|
|
def helper(x: UInt, t: FType): UInt = {
|
|
if (typeTag(t) == 0) {
|
|
t.recode(x)
|
|
} else {
|
|
val prevT = prevType(t)
|
|
box(t.recode(x), t, helper(x, prevT), prevT)
|
|
}
|
|
}
|
|
|
|
// fill MSBs of subword loads to emulate a wider load of a NaN-boxed value
|
|
val boxes = floatTypes.map(t => UInt((BigInt(1) << maxType.ieeeWidth) - (BigInt(1) << t.ieeeWidth)))
|
|
helper(boxes(tag) | x, maxType)
|
|
}
|
|
|
|
// implement NaN unboxing and un-recoding for FS*/fmv.x.*
|
|
def ieee(x: UInt, t: FType = maxType): UInt = {
|
|
if (typeTag(t) == 0) {
|
|
t.ieee(x)
|
|
} else {
|
|
val unrecoded = t.ieee(x)
|
|
val prevT = prevType(t)
|
|
val prevRecoded = Cat(
|
|
x(prevT.recodedWidth-2),
|
|
x(t.sig-1),
|
|
x(prevT.recodedWidth-3, 0))
|
|
val prevUnrecoded = ieee(prevRecoded, prevT)
|
|
Cat(unrecoded >> prevT.ieeeWidth, Mux(t.isNaN(x), prevUnrecoded, unrecoded(prevT.ieeeWidth-1, 0)))
|
|
}
|
|
}
|
|
}
|
|
|
|
abstract class FPUModule(implicit p: Parameters) extends CoreModule()(p) with HasFPUParameters
|
|
|
|
class FPToInt(implicit p: Parameters) extends FPUModule()(p) {
|
|
class Output extends Bundle {
|
|
val in = new FPInput
|
|
val lt = Bool()
|
|
val store = Bits(width = fLen)
|
|
val toint = Bits(width = xLen)
|
|
val exc = Bits(width = FPConstants.FLAGS_SZ)
|
|
override def cloneType = new Output().asInstanceOf[this.type]
|
|
}
|
|
val io = new Bundle {
|
|
val in = Valid(new FPInput).flip
|
|
val out = Valid(new Output)
|
|
}
|
|
|
|
val in = RegEnable(io.in.bits, io.in.valid)
|
|
val valid = Reg(next=io.in.valid)
|
|
|
|
val dcmp = Module(new hardfloat.CompareRecFN(maxExpWidth, maxSigWidth))
|
|
dcmp.io.a := in.in1
|
|
dcmp.io.b := in.in2
|
|
dcmp.io.signaling := !in.rm(1)
|
|
|
|
val tag = !in.singleOut // TODO typeTag
|
|
val store = ieee(in.in1)
|
|
val toint = Wire(init = store)
|
|
val intType = Wire(init = tag)
|
|
io.out.bits.store := ((0 until nIntTypes).map(i => Fill(1 << (nIntTypes - i - 1), store((minXLen << i) - 1, 0))): Seq[UInt])(tag)
|
|
io.out.bits.toint := ((0 until nIntTypes).map(i => toint((minXLen << i) - 1, 0).sextTo(xLen)): Seq[UInt])(intType)
|
|
io.out.bits.exc := Bits(0)
|
|
|
|
when (in.rm(0)) {
|
|
val classify_out = (floatTypes.map(t => t.classify(maxType.unsafeConvert(in.in1, t))): Seq[UInt])(tag)
|
|
toint := classify_out | (store >> minXLen << minXLen)
|
|
intType := 0
|
|
}
|
|
|
|
when (in.wflags) { // feq/flt/fle, fcvt
|
|
toint := (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR | (store >> minXLen << minXLen)
|
|
io.out.bits.exc := dcmp.io.exceptionFlags
|
|
intType := 0
|
|
|
|
when (!in.ren2) { // fcvt
|
|
val cvtType = in.typ.extract(log2Ceil(nIntTypes), 1)
|
|
intType := cvtType
|
|
|
|
val conv = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, xLen))
|
|
conv.io.in := in.in1
|
|
conv.io.roundingMode := in.rm
|
|
conv.io.signedOut := ~in.typ(0)
|
|
toint := conv.io.out
|
|
io.out.bits.exc := Cat(conv.io.intExceptionFlags(2, 1).orR, UInt(0, 3), conv.io.intExceptionFlags(0))
|
|
|
|
for (i <- 0 until nIntTypes-1) {
|
|
val w = minXLen << i
|
|
when (cvtType === i) {
|
|
val narrow = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, w))
|
|
narrow.io.in := in.in1
|
|
narrow.io.roundingMode := in.rm
|
|
narrow.io.signedOut := ~in.typ(0)
|
|
|
|
val excSign = in.in1(maxExpWidth + maxSigWidth) && !maxType.isNaN(in.in1)
|
|
val excOut = Cat(conv.io.signedOut === excSign, Fill(w-1, !excSign))
|
|
val invalid = conv.io.intExceptionFlags(2) || narrow.io.intExceptionFlags(1)
|
|
when (invalid) { toint := Cat(conv.io.out >> w, excOut) }
|
|
io.out.bits.exc := Cat(invalid, UInt(0, 3), !invalid && conv.io.intExceptionFlags(0))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
io.out.valid := valid
|
|
io.out.bits.lt := dcmp.io.lt || (dcmp.io.a.asSInt < 0.S && dcmp.io.b.asSInt >= 0.S)
|
|
io.out.bits.in := in
|
|
}
|
|
|
|
class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
|
val io = new Bundle {
|
|
val in = Valid(new FPInput).flip
|
|
val out = Valid(new FPResult)
|
|
}
|
|
|
|
val in = Pipe(io.in)
|
|
val tag = !in.bits.singleIn // TODO typeTag
|
|
|
|
val mux = Wire(new FPResult)
|
|
mux.exc := Bits(0)
|
|
mux.data := recode(in.bits.in1, !in.bits.singleIn)
|
|
|
|
val intValue = {
|
|
val res = Wire(init = in.bits.in1.asSInt)
|
|
for (i <- 0 until nIntTypes-1) {
|
|
val smallInt = in.bits.in1((minXLen << i) - 1, 0)
|
|
when (in.bits.typ.extract(log2Ceil(nIntTypes), 1) === i) {
|
|
res := Mux(in.bits.typ(0), smallInt.zext, smallInt.asSInt)
|
|
}
|
|
}
|
|
res.asUInt
|
|
}
|
|
|
|
when (in.bits.wflags) { // fcvt
|
|
// could be improved for RVD/RVQ with a single variable-position rounding
|
|
// unit, rather than N fixed-position ones
|
|
val i2fResults = for (t <- floatTypes) yield {
|
|
val i2f = Module(new hardfloat.INToRecFN(xLen, t.exp, t.sig))
|
|
i2f.io.signedIn := ~in.bits.typ(0)
|
|
i2f.io.in := intValue
|
|
i2f.io.roundingMode := in.bits.rm
|
|
i2f.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
|
(sanitizeNaN(i2f.io.out, t), i2f.io.exceptionFlags)
|
|
}
|
|
|
|
val (data, exc) = i2fResults.unzip
|
|
val dataPadded = data.init.map(d => Cat(data.last >> d.getWidth, d)) :+ data.last
|
|
mux.data := dataPadded(tag)
|
|
mux.exc := exc(tag)
|
|
}
|
|
|
|
io.out <> Pipe(in.valid, mux, latency-1)
|
|
}
|
|
|
|
class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
|
val io = new Bundle {
|
|
val in = Valid(new FPInput).flip
|
|
val out = Valid(new FPResult)
|
|
val lt = Bool(INPUT) // from FPToInt
|
|
}
|
|
|
|
val in = Pipe(io.in)
|
|
|
|
val signNum = Mux(in.bits.rm(1), in.bits.in1 ^ in.bits.in2, Mux(in.bits.rm(0), ~in.bits.in2, in.bits.in2))
|
|
val fsgnj = Cat(signNum(fLen), in.bits.in1(fLen-1, 0))
|
|
|
|
val fsgnjMux = Wire(new FPResult)
|
|
fsgnjMux.exc := UInt(0)
|
|
fsgnjMux.data := fsgnj
|
|
|
|
when (in.bits.wflags) { // fmin/fmax
|
|
val isnan1 = maxType.isNaN(in.bits.in1)
|
|
val isnan2 = maxType.isNaN(in.bits.in2)
|
|
val isInvalid = maxType.isSNaN(in.bits.in1) || maxType.isSNaN(in.bits.in2)
|
|
val isNaNOut = isnan1 && isnan2
|
|
val isLHS = isnan2 || in.bits.rm(0) =/= io.lt && !isnan1
|
|
fsgnjMux.exc := isInvalid << 4
|
|
fsgnjMux.data := Mux(isNaNOut, maxType.qNaN, Mux(isLHS, in.bits.in1, in.bits.in2))
|
|
}
|
|
|
|
val inTag = !in.bits.singleIn // TODO typeTag
|
|
val outTag = !in.bits.singleOut // TODO typeTag
|
|
val mux = Wire(init = fsgnjMux)
|
|
for (t <- floatTypes.init) {
|
|
when (outTag === typeTag(t)) {
|
|
mux.data := Cat(fsgnjMux.data >> t.recodedWidth, maxType.unsafeConvert(fsgnjMux.data, t))
|
|
}
|
|
}
|
|
|
|
when (in.bits.wflags && !in.bits.ren2) { // fcvt
|
|
if (floatTypes.size > 1) {
|
|
// widening conversions simply canonicalize NaN operands
|
|
val widened = Mux(maxType.isNaN(in.bits.in1), maxType.qNaN, in.bits.in1)
|
|
fsgnjMux.data := widened
|
|
fsgnjMux.exc := maxType.isSNaN(in.bits.in1) << 4
|
|
|
|
// narrowing conversions require rounding (for RVQ, this could be
|
|
// optimized to use a single variable-position rounding unit, rather
|
|
// than two fixed-position ones)
|
|
for (outType <- floatTypes.init) when (outTag === typeTag(outType) && (typeTag(outType) == 0 || outTag < inTag)) {
|
|
val narrower = Module(new hardfloat.RecFNToRecFN(maxType.exp, maxType.sig, outType.exp, outType.sig))
|
|
narrower.io.in := in.bits.in1
|
|
narrower.io.roundingMode := in.bits.rm
|
|
narrower.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
|
val narrowed = sanitizeNaN(narrower.io.out, outType)
|
|
mux.data := Cat(fsgnjMux.data >> narrowed.getWidth, narrowed)
|
|
mux.exc := narrower.io.exceptionFlags
|
|
}
|
|
}
|
|
}
|
|
|
|
io.out <> Pipe(in.valid, mux, latency-1)
|
|
}
|
|
|
|
class FPUFMAPipe(val latency: Int, val t: FType)(implicit p: Parameters) extends FPUModule()(p) {
|
|
val io = new Bundle {
|
|
val in = Valid(new FPInput).flip
|
|
val out = Valid(new FPResult)
|
|
}
|
|
|
|
val valid = Reg(next=io.in.valid)
|
|
val in = Reg(new FPInput)
|
|
when (io.in.valid) {
|
|
val one = UInt(1) << (t.sig + t.exp - 1)
|
|
val zero = (io.in.bits.in1 ^ io.in.bits.in2) & (UInt(1) << (t.sig + t.exp))
|
|
val cmd_fma = io.in.bits.ren3
|
|
val cmd_addsub = io.in.bits.swap23
|
|
in := io.in.bits
|
|
when (cmd_addsub) { in.in2 := one }
|
|
when (!(cmd_fma || cmd_addsub)) { in.in3 := zero }
|
|
}
|
|
|
|
val fma = Module(new hardfloat.MulAddRecFN(t.exp, t.sig))
|
|
fma.io.op := in.fmaCmd
|
|
fma.io.roundingMode := in.rm
|
|
fma.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
|
fma.io.a := in.in1
|
|
fma.io.b := in.in2
|
|
fma.io.c := in.in3
|
|
|
|
val res = Wire(new FPResult)
|
|
res.data := sanitizeNaN(fma.io.out, t)
|
|
res.exc := fma.io.exceptionFlags
|
|
io.out := Pipe(valid, res, latency-1)
|
|
}
|
|
|
|
class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
|
|
val io = new FPUIO
|
|
|
|
val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
|
|
val req_valid = ex_reg_valid || io.cp_req.valid
|
|
val ex_reg_inst = RegEnable(io.inst, io.valid)
|
|
val ex_cp_valid = io.cp_req.fire()
|
|
val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx || ex_cp_valid, init=Bool(false))
|
|
val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
|
|
val mem_cp_valid = Reg(next=ex_cp_valid, init=Bool(false))
|
|
val killm = (io.killm || io.nack_mem) && !mem_cp_valid
|
|
val wb_reg_valid = Reg(next=mem_reg_valid && (!killm || mem_cp_valid), init=Bool(false))
|
|
val wb_cp_valid = Reg(next=mem_cp_valid, init=Bool(false))
|
|
|
|
val fp_decoder = Module(new FPUDecoder)
|
|
fp_decoder.io.inst := io.inst
|
|
|
|
val cp_ctrl = Wire(new FPUCtrlSigs)
|
|
cp_ctrl <> io.cp_req.bits
|
|
io.cp_resp.valid := Bool(false)
|
|
io.cp_resp.bits.data := UInt(0)
|
|
|
|
val id_ctrl = fp_decoder.io.sigs
|
|
val ex_ctrl = Mux(ex_cp_valid, cp_ctrl, RegEnable(id_ctrl, io.valid))
|
|
val mem_ctrl = RegEnable(ex_ctrl, req_valid)
|
|
val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
|
|
|
|
// load response
|
|
val load_wb = Reg(next=io.dmem_resp_val)
|
|
val load_wb_double = RegEnable(io.dmem_resp_type(0), io.dmem_resp_val)
|
|
val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val)
|
|
val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val)
|
|
|
|
// regfile
|
|
val regfile = Mem(32, Bits(width = fLen+1))
|
|
when (load_wb) {
|
|
val wdata = recode(load_wb_data, load_wb_double)
|
|
regfile(load_wb_tag) := wdata
|
|
assert(consistent(wdata))
|
|
if (enableCommitLog)
|
|
printf("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + 32, load_wb_data)
|
|
}
|
|
|
|
val ex_ra = List.fill(3)(Reg(UInt()))
|
|
val ex_rs = ex_ra.map(a => regfile(a))
|
|
when (io.valid) {
|
|
when (id_ctrl.ren1) {
|
|
when (!id_ctrl.swap12) { ex_ra(0) := io.inst(19,15) }
|
|
when (id_ctrl.swap12) { ex_ra(1) := io.inst(19,15) }
|
|
}
|
|
when (id_ctrl.ren2) {
|
|
when (id_ctrl.swap12) { ex_ra(0) := io.inst(24,20) }
|
|
when (id_ctrl.swap23) { ex_ra(2) := io.inst(24,20) }
|
|
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra(1) := io.inst(24,20) }
|
|
}
|
|
when (id_ctrl.ren3) { ex_ra(2) := io.inst(31,27) }
|
|
}
|
|
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
|
|
|
|
val sfma = Module(new FPUFMAPipe(cfg.sfmaLatency, FType.S))
|
|
sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.singleOut
|
|
sfma.io.in.bits := fuInput(Some(sfma.t))
|
|
|
|
val fpiu = Module(new FPToInt)
|
|
fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || (ex_ctrl.fastpipe && ex_ctrl.wflags))
|
|
fpiu.io.in.bits := fuInput(None)
|
|
io.store_data := fpiu.io.out.bits.store
|
|
io.toint_data := fpiu.io.out.bits.toint
|
|
when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
|
|
io.cp_resp.bits.data := fpiu.io.out.bits.toint
|
|
io.cp_resp.valid := Bool(true)
|
|
}
|
|
|
|
val ifpu = Module(new IntToFP(2))
|
|
ifpu.io.in.valid := req_valid && ex_ctrl.fromint
|
|
ifpu.io.in.bits := fpiu.io.in.bits
|
|
ifpu.io.in.bits.in1 := Mux(ex_cp_valid, io.cp_req.bits.in1, io.fromint_data)
|
|
|
|
val fpmu = Module(new FPToFP(2))
|
|
fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
|
|
fpmu.io.in.bits := fpiu.io.in.bits
|
|
fpmu.io.lt := fpiu.io.out.bits.lt
|
|
|
|
val divSqrt_wen = Wire(init = false.B)
|
|
val divSqrt_inFlight = Wire(init = false.B)
|
|
val divSqrt_waddr = Reg(UInt(width = 5))
|
|
val divSqrt_typeTag = Wire(UInt(width = log2Up(floatTypes.size)))
|
|
val divSqrt_wdata = Wire(UInt(width = fLen+1))
|
|
val divSqrt_flags = Wire(UInt(width = FPConstants.FLAGS_SZ))
|
|
|
|
// writeback arbitration
|
|
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult)
|
|
val pipes = List(
|
|
Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits),
|
|
Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits),
|
|
Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.singleOut, sfma.io.out.bits)) ++
|
|
(fLen > 32).option({
|
|
val dfma = Module(new FPUFMAPipe(cfg.dfmaLatency, FType.D))
|
|
dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.singleOut
|
|
dfma.io.in.bits := fuInput(Some(dfma.t))
|
|
Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.singleOut, dfma.io.out.bits)
|
|
})
|
|
def latencyMask(c: FPUCtrlSigs, offset: Int) = {
|
|
require(pipes.forall(_.lat >= offset))
|
|
pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_)
|
|
}
|
|
def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), UInt(p._2), UInt(0))).reduce(_|_)
|
|
val maxLatency = pipes.map(_.lat).max
|
|
val memLatencyMask = latencyMask(mem_ctrl, 2)
|
|
|
|
class WBInfo extends Bundle {
|
|
val rd = UInt(width = 5)
|
|
val single = Bool()
|
|
val cp = Bool()
|
|
val pipeid = UInt(width = log2Ceil(pipes.size))
|
|
override def cloneType: this.type = new WBInfo().asInstanceOf[this.type]
|
|
}
|
|
|
|
val wen = Reg(init=Bits(0, maxLatency-1))
|
|
val wbInfo = Reg(Vec(maxLatency-1, new WBInfo))
|
|
val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
|
|
val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid)
|
|
|
|
for (i <- 0 until maxLatency-2) {
|
|
when (wen(i+1)) { wbInfo(i) := wbInfo(i+1) }
|
|
}
|
|
wen := wen >> 1
|
|
when (mem_wen) {
|
|
when (!killm) {
|
|
wen := wen >> 1 | memLatencyMask
|
|
}
|
|
for (i <- 0 until maxLatency-1) {
|
|
when (!write_port_busy && memLatencyMask(i)) {
|
|
wbInfo(i).cp := mem_cp_valid
|
|
wbInfo(i).single := mem_ctrl.singleOut
|
|
wbInfo(i).pipeid := pipeid(mem_ctrl)
|
|
wbInfo(i).rd := mem_reg_inst(11,7)
|
|
}
|
|
}
|
|
}
|
|
|
|
val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd)
|
|
val wdouble = Mux(divSqrt_wen, divSqrt_typeTag, !wbInfo(0).single)
|
|
val wdata = box(Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wbInfo(0).pipeid)), wdouble)
|
|
val wexc = (pipes.map(_.res.exc): Seq[UInt])(wbInfo(0).pipeid)
|
|
when ((!wbInfo(0).cp && wen(0)) || divSqrt_wen) {
|
|
assert(consistent(wdata))
|
|
regfile(waddr) := wdata
|
|
if (enableCommitLog) {
|
|
printf("f%d p%d 0x%x\n", waddr, waddr + 32, ieee(wdata))
|
|
}
|
|
}
|
|
when (wbInfo(0).cp && wen(0)) {
|
|
io.cp_resp.bits.data := wdata
|
|
io.cp_resp.valid := Bool(true)
|
|
}
|
|
io.cp_req.ready := !ex_reg_valid
|
|
|
|
val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
|
|
val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
|
|
io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
|
|
io.fcsr_flags.bits :=
|
|
Mux(wb_toint_valid, wb_toint_exc, UInt(0)) |
|
|
Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
|
|
Mux(wen(0), wexc, UInt(0))
|
|
|
|
val divSqrt_write_port_busy = (mem_ctrl.div || mem_ctrl.sqrt) && wen.orR
|
|
io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_inFlight)
|
|
io.nack_mem := write_port_busy || divSqrt_write_port_busy || divSqrt_inFlight
|
|
io.dec <> fp_decoder.io.sigs
|
|
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
|
|
io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
|
|
io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wbInfo(0).pipeid === UInt(x._2))))
|
|
io.sboard_clra := waddr
|
|
// we don't currently support round-max-magnitude (rm=4)
|
|
io.illegal_rm := io.inst(14,12).isOneOf(5, 6) || io.inst(14,12) === 7 && io.fcsr_rm >= 5
|
|
|
|
if (cfg.divSqrt) {
|
|
val divSqrt_killed = Reg(Bool())
|
|
|
|
for (t <- floatTypes) {
|
|
val tag = !mem_ctrl.singleOut // TODO typeTag
|
|
val divSqrt = Module(new hardfloat.DivSqrtRecFN_small(t.exp, t.sig, 0))
|
|
divSqrt.io.inValid := mem_reg_valid && tag === typeTag(t) && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_inFlight
|
|
divSqrt.io.sqrtOp := mem_ctrl.sqrt
|
|
divSqrt.io.a := maxType.unsafeConvert(fpiu.io.out.bits.in.in1, t)
|
|
divSqrt.io.b := maxType.unsafeConvert(fpiu.io.out.bits.in.in2, t)
|
|
divSqrt.io.roundingMode := fpiu.io.out.bits.in.rm
|
|
divSqrt.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
|
|
|
when (!divSqrt.io.inReady) { divSqrt_inFlight := true } // only 1 in flight
|
|
|
|
when (divSqrt.io.inValid && divSqrt.io.inReady) {
|
|
divSqrt_killed := killm
|
|
divSqrt_waddr := mem_reg_inst(11,7)
|
|
}
|
|
|
|
when (divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt) {
|
|
divSqrt_wen := !divSqrt_killed
|
|
divSqrt_wdata := sanitizeNaN(divSqrt.io.out, t)
|
|
divSqrt_flags := divSqrt.io.exceptionFlags
|
|
divSqrt_typeTag := typeTag(t)
|
|
}
|
|
}
|
|
} else {
|
|
when (id_ctrl.div || id_ctrl.sqrt) { io.illegal_rm := true }
|
|
}
|
|
|
|
def fuInput(minT: Option[FType]): FPInput = {
|
|
val req = Wire(new FPInput)
|
|
val tag = !ex_ctrl.singleIn // TODO typeTag
|
|
req := ex_ctrl
|
|
req.rm := ex_rm
|
|
req.in1 := unbox(ex_rs(0), tag, minT)
|
|
req.in2 := unbox(ex_rs(1), tag, minT)
|
|
req.in3 := unbox(ex_rs(2), tag, minT)
|
|
req.typ := ex_reg_inst(21,20)
|
|
req.fmaCmd := ex_reg_inst(3,2) | (!ex_ctrl.ren3 && ex_reg_inst(27))
|
|
when (ex_cp_valid) {
|
|
req := io.cp_req.bits
|
|
when (io.cp_req.bits.swap23) {
|
|
req.in2 := io.cp_req.bits.in3
|
|
req.in3 := io.cp_req.bits.in2
|
|
}
|
|
}
|
|
req
|
|
}
|
|
}
|
|
|
|
/** Mix-ins for constructing tiles that may have an FPU external to the core pipeline */
|
|
trait CanHaveSharedFPU extends HasTileParameters
|
|
|
|
trait CanHaveSharedFPUModule {
|
|
val outer: CanHaveSharedFPU
|
|
val fpuOpt = outer.tileParams.core.fpu.map(params => Module(new FPU(params)(outer.p)))
|
|
// TODO fpArb could go here instead of inside LegacyRoccComplex
|
|
}
|