1
0

Merge master into rocc-fpu-port

ebb33f2f4b658211960a4c6c023c139420c67212
This commit is contained in:
Colin Schmidt
2015-08-06 08:03:10 -07:00
23 changed files with 1045 additions and 1293 deletions

View File

@ -13,24 +13,24 @@ case object DFMALatency
object FPConstants
{
val FCMD_ADD = Bits("b0??00")
val FCMD_SUB = Bits("b0??01")
val FCMD_MUL = Bits("b0??10")
val FCMD_MADD = Bits("b1??00")
val FCMD_MSUB = Bits("b1??01")
val FCMD_NMSUB = Bits("b1??10")
val FCMD_NMADD = Bits("b1??11")
val FCMD_DIV = Bits("b?0011")
val FCMD_SQRT = Bits("b?1011")
val FCMD_SGNJ = Bits("b??1?0")
val FCMD_MINMAX = Bits("b?01?1")
val FCMD_CVT_FF = Bits("b??0??")
val FCMD_CVT_IF = Bits("b?10??")
val FCMD_CMP = Bits("b?01??")
val FCMD_MV_XF = Bits("b?11??")
val FCMD_CVT_FI = Bits("b??0??")
val FCMD_MV_FX = Bits("b??1??")
val FCMD_X = Bits("b?????")
val FCMD_ADD = BitPat("b0??00")
val FCMD_SUB = BitPat("b0??01")
val FCMD_MUL = BitPat("b0??10")
val FCMD_MADD = BitPat("b1??00")
val FCMD_MSUB = BitPat("b1??01")
val FCMD_NMSUB = BitPat("b1??10")
val FCMD_NMADD = BitPat("b1??11")
val FCMD_DIV = BitPat("b?0011")
val FCMD_SQRT = BitPat("b?1011")
val FCMD_SGNJ = BitPat("b??1?0")
val FCMD_MINMAX = BitPat("b?01?1")
val FCMD_CVT_FF = BitPat("b??0??")
val FCMD_CVT_IF = BitPat("b?10??")
val FCMD_CMP = BitPat("b?01??")
val FCMD_MV_XF = BitPat("b?11??")
val FCMD_CVT_FI = BitPat("b??0??")
val FCMD_MV_FX = BitPat("b??1??")
val FCMD_X = BitPat("b?????")
val FCMD_WIDTH = 5
val RM_SZ = 3
@ -65,9 +65,6 @@ class FPUDecoder extends Module
val sigs = new FPUCtrlSigs().asOutput
}
val N = Bool(false)
val Y = Bool(true)
val X = Bool(false)
val decoder = DecodeLogic(io.inst,
List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X),
Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N,N),
@ -138,33 +135,34 @@ class FPUDecoder extends Module
s.toint, s.fastpipe, s.fma, s.div, s.sqrt, s.round, s.wflags) := decoder
}
class DpathFPUIO extends Bundle {
val inst = Bits(OUTPUT, 32)
val fromint_data = Bits(OUTPUT, 64)
class FPUIO extends Bundle {
val inst = Bits(INPUT, 32)
val fromint_data = Bits(INPUT, 64)
val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip
val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ))
val store_data = Bits(INPUT, 64)
val toint_data = Bits(INPUT, 64)
val store_data = Bits(OUTPUT, 64)
val toint_data = Bits(OUTPUT, 64)
val dmem_resp_val = Bool(OUTPUT)
val dmem_resp_type = Bits(OUTPUT, 3)
val dmem_resp_tag = UInt(OUTPUT, 5)
val dmem_resp_data = Bits(OUTPUT, 64)
}
val dmem_resp_val = Bool(INPUT)
val dmem_resp_type = Bits(INPUT, 3)
val dmem_resp_tag = UInt(INPUT, 5)
val dmem_resp_data = Bits(INPUT, 64)
class CtrlFPUIO extends Bundle {
val valid = Bool(OUTPUT)
val fcsr_rdy = Bool(INPUT)
val nack_mem = Bool(INPUT)
val illegal_rm = Bool(INPUT)
val killx = Bool(OUTPUT)
val killm = Bool(OUTPUT)
val dec = new FPUCtrlSigs().asInput
val sboard_set = Bool(INPUT)
val sboard_clr = Bool(INPUT)
val sboard_clra = UInt(INPUT, 5)
val valid = Bool(INPUT)
val fcsr_rdy = Bool(OUTPUT)
val nack_mem = Bool(OUTPUT)
val illegal_rm = Bool(OUTPUT)
val killx = Bool(INPUT)
val killm = Bool(INPUT)
val dec = new FPUCtrlSigs().asOutput
val sboard_set = Bool(OUTPUT)
val sboard_clr = Bool(OUTPUT)
val sboard_clra = UInt(OUTPUT, 5)
val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
val cp_resp = Decoupled(new FPResult())
}
class FPResult extends Bundle
@ -230,7 +228,7 @@ class FPToInt extends Module
io.out.bits.exc := dcmp_exc
}
when (in.cmd === FCMD_CVT_IF) {
io.out.bits.toint := Mux(in.typ(1), d2i._1, d2i._1(31,0).toSInt)
io.out.bits.toint := Mux(in.typ(1), d2i._1, d2i._1(31,0).toSInt).toUInt
io.out.bits.exc := d2i._2
}
@ -248,7 +246,7 @@ class IntToFP(val latency: Int) extends Module
val in = Pipe(io.in)
val mux = new FPResult
val mux = Wire(new FPResult)
mux.exc := Bits(0)
mux.data := hardfloat.floatNToRecodedFloatN(in.bits.in1, 52, 12)
when (in.bits.single) {
@ -299,7 +297,7 @@ class FPToFP(val latency: Int) extends Module
val isMax = in.bits.rm(0)
val isLHS = isnan2 || isMax != io.lt && !isnan1
val mux = new FPResult
val mux = Wire(new FPResult)
mux.exc := minmax_exc
mux.data := in.bits.in2
@ -347,7 +345,7 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module
fma.io.b := in.in2
fma.io.c := in.in3
val res = new FPResult
val res = Wire(new FPResult)
res.data := fma.io.out
res.exc := fma.io.exceptionFlags
io.out := Pipe(valid, res, latency-1)
@ -355,26 +353,21 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module
class FPU extends Module
{
val io = new Bundle {
val ctrl = (new CtrlFPUIO).flip
val dpath = (new DpathFPUIO).flip
val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
val cp_resp = Decoupled(new FPResult())
}
val io = new FPUIO
val ex_reg_valid = Reg(next=io.ctrl.valid, init=Bool(false))
val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
val req_valid = ex_reg_valid || io.cp_req.valid
val ex_reg_inst = RegEnable(io.dpath.inst, io.ctrl.valid)
val ex_reg_inst = RegEnable(io.inst, io.valid)
val ex_cp_valid = io.cp_req.valid && !ex_reg_valid
val mem_reg_valid = Reg(next=ex_reg_valid && !io.ctrl.killx || ex_cp_valid, init=Bool(false))
val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx || ex_cp_valid, init=Bool(false))
val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
val mem_cp_valid = Reg(next=ex_cp_valid, init=Bool(false))
val killm = (io.ctrl.killm || io.ctrl.nack_mem) && !mem_cp_valid
val killm = (io.killm || io.nack_mem) && !mem_cp_valid
val wb_reg_valid = Reg(next=mem_reg_valid && (!killm || mem_cp_valid), init=Bool(false))
val wb_cp_valid = Reg(next=mem_cp_valid, init=Bool(false))
val fp_decoder = Module(new FPUDecoder)
fp_decoder.io.inst := io.dpath.inst
fp_decoder.io.inst := io.inst
val cp_ctrl = new FPUCtrlSigs
cp_ctrl <> io.cp_req.bits
@ -382,15 +375,15 @@ class FPU extends Module
io.cp_resp.bits.data := UInt(0)
val id_ctrl = fp_decoder.io.sigs
val ex_ctrl = Mux(ex_reg_valid, RegEnable(id_ctrl, io.ctrl.valid), cp_ctrl)
val ex_ctrl = Mux(ex_reg_valid, RegEnable(id_ctrl, io.valid), cp_ctrl)
val mem_ctrl = RegEnable(ex_ctrl, req_valid)
val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
// load response
val load_wb = Reg(next=io.dpath.dmem_resp_val)
val load_wb_single = RegEnable(io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU, io.dpath.dmem_resp_val)
val load_wb_data = RegEnable(io.dpath.dmem_resp_data, io.dpath.dmem_resp_val)
val load_wb_tag = RegEnable(io.dpath.dmem_resp_tag, io.dpath.dmem_resp_val)
val load_wb = Reg(next=io.dmem_resp_val)
val load_wb_single = RegEnable(io.dmem_resp_type === MT_W || io.dmem_resp_type === MT_WU, io.dmem_resp_val)
val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val)
val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val)
val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9)
val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12)
val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d)
@ -400,26 +393,26 @@ class FPU extends Module
when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded }
val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt()))
when (io.ctrl.valid) {
when (io.valid) {
when (id_ctrl.ren1) {
when (!id_ctrl.swap12) { ex_ra1 := io.dpath.inst(19,15) }
when (id_ctrl.swap12) { ex_ra2 := io.dpath.inst(19,15) }
when (!id_ctrl.swap12) { ex_ra1 := io.inst(19,15) }
when (id_ctrl.swap12) { ex_ra2 := io.inst(19,15) }
}
when (id_ctrl.ren2) {
when (id_ctrl.swap12) { ex_ra1 := io.dpath.inst(24,20) }
when (id_ctrl.swap23) { ex_ra3 := io.dpath.inst(24,20) }
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.dpath.inst(24,20) }
when (id_ctrl.swap12) { ex_ra1 := io.inst(24,20) }
when (id_ctrl.swap23) { ex_ra3 := io.inst(24,20) }
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.inst(24,20) }
}
when (id_ctrl.ren3) { ex_ra3 := io.dpath.inst(31,27) }
when (id_ctrl.ren3) { ex_ra3 := io.inst(31,27) }
}
val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_))
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.dpath.fcsr_rm, ex_reg_inst(14,12))
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
val cp_rs1 = io.cp_req.bits.in1
val cp_rs2 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in3, io.cp_req.bits.in2)
val cp_rs3 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in2, io.cp_req.bits.in3)
val req = new FPInput
val req = Wire(new FPInput)
req := ex_ctrl
req.rm := Mux(ex_reg_valid, ex_rm, io.cp_req.bits.rm)
req.in1 := Mux(ex_reg_valid, ex_rs1, cp_rs1)
@ -438,8 +431,8 @@ class FPU extends Module
val fpiu = Module(new FPToInt)
fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX)
fpiu.io.in.bits := req
io.dpath.store_data := fpiu.io.out.bits.store
io.dpath.toint_data := fpiu.io.out.bits.toint
io.store_data := fpiu.io.out.bits.store
io.toint_data := fpiu.io.out.bits.toint
when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
io.cp_resp.bits.data := fpiu.io.out.bits.toint
io.cp_resp.valid := Bool(true)
@ -448,25 +441,23 @@ class FPU extends Module
val ifpu = Module(new IntToFP(3))
ifpu.io.in.valid := req_valid && ex_ctrl.fromint
ifpu.io.in.bits := req
ifpu.io.in.bits.in1 := Mux(ex_reg_valid, io.dpath.fromint_data, cp_rs1)
ifpu.io.in.bits.in1 := Mux(ex_reg_valid, io.fromint_data, cp_rs1)
val fpmu = Module(new FPToFP(2))
fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
fpmu.io.in.bits := req
fpmu.io.lt := fpiu.io.out.bits.lt
val divSqrt = Module(new hardfloat.divSqrtRecodedFloat64)
val divSqrt_inReady = Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div)
val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt
val divSqrt_wen = Reg(next=Bool(false))
val divSqrt_inReady = Wire(init=Bool(false))
val divSqrt_waddr = Reg(Bits())
val divSqrt_wdata = Bits()
val divSqrt_flags = Bits()
val divSqrt_wdata = Wire(Bits())
val divSqrt_flags = Wire(Bits())
val divSqrt_in_flight = Reg(init=Bool(false))
val divSqrt_cp = Reg(init=Bool(false))
// writeback arbitration
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: Bits, wexc: Bits)
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: UInt, wexc: UInt)
val pipes = List(
Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits.data, fpmu.io.out.bits.exc),
Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits.data, ifpu.io.out.bits.exc),
@ -481,7 +472,7 @@ class FPU extends Module
val memLatencyMask = latencyMask(mem_ctrl, 2)
val wen = Reg(init=Bits(0, maxLatency-1))
val winfo = Vec.fill(maxLatency-1){Reg(Bits())}
val winfo = Reg(Vec.fill(maxLatency-1){Bits()})
val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid)
val mem_winfo = Cat(mem_cp_valid, pipeid(mem_ctrl), mem_reg_inst(11,7))
@ -515,22 +506,22 @@ class FPU extends Module
val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
io.dpath.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
io.dpath.fcsr_flags.bits :=
io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
io.fcsr_flags.bits :=
Mux(wb_toint_valid, wb_toint_exc, UInt(0)) |
Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
Mux(wen(0), wexc, UInt(0))
val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR) // || mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid))
io.ctrl.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
io.ctrl.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
io.ctrl.dec <> fp_decoder.io.sigs
io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
io.dec <> fp_decoder.io.sigs
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
io.ctrl.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
io.ctrl.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2))))
io.ctrl.sboard_clra := waddr
io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2))))
io.sboard_clra := waddr
// we don't currently support round-max-magnitude (rm=4)
io.ctrl.illegal_rm := ex_rm(2) && ex_ctrl.round
io.illegal_rm := ex_rm(2) && ex_ctrl.round
divSqrt_wdata := 0
divSqrt_flags := 0
@ -540,9 +531,11 @@ class FPU extends Module
val divSqrt_flags_double = Reg(Bits())
val divSqrt_wdata_double = Reg(Bits())
def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1
val divSqrt = Module(new hardfloat.divSqrtRecodedFloat64)
divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div)
val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt
val divSqrt_wb_hazard = wen.orR
divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && (!io.ctrl.killm || mem_cp_valid) && (mem_ctrl.div || mem_ctrl.sqrt)
divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && (!io.killm || mem_cp_valid) && (mem_ctrl.div || mem_ctrl.sqrt)
divSqrt.io.sqrtOp := mem_ctrl.sqrt
divSqrt.io.a := fpiu.io.as_double.in1
divSqrt.io.b := fpiu.io.as_double.in2