Merge branch 'rocc-fpu-port'
This commit is contained in:
commit
f67b02fadb
@ -5,6 +5,7 @@ package rocket
|
|||||||
import Chisel._
|
import Chisel._
|
||||||
import uncore._
|
import uncore._
|
||||||
import cde.{Parameters, Field}
|
import cde.{Parameters, Field}
|
||||||
|
import junctions.ParameterizedBundle
|
||||||
|
|
||||||
class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
|
class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
|
||||||
{
|
{
|
||||||
@ -53,3 +54,51 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
|
|||||||
io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> log2Up(n)
|
io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> log2Up(n)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class InOrderArbiter[T <: Data, U <: Data](reqTyp: T, respTyp: U, n: Int)
|
||||||
|
(implicit p: Parameters) extends Module {
|
||||||
|
val io = new Bundle {
|
||||||
|
val in_req = Vec(n, Decoupled(reqTyp)).flip
|
||||||
|
val in_resp = Vec(n, Decoupled(respTyp))
|
||||||
|
val out_req = Decoupled(reqTyp)
|
||||||
|
val out_resp = Decoupled(respTyp).flip
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n > 1) {
|
||||||
|
val route_q = Module(new Queue(UInt(width = log2Up(n)), 2))
|
||||||
|
val req_arb = Module(new RRArbiter(reqTyp, n))
|
||||||
|
req_arb.io.in <> io.in_req
|
||||||
|
|
||||||
|
val req_helper = DecoupledHelper(
|
||||||
|
req_arb.io.out.valid,
|
||||||
|
route_q.io.enq.ready,
|
||||||
|
io.out_req.ready)
|
||||||
|
|
||||||
|
io.out_req.bits := req_arb.io.out.bits
|
||||||
|
io.out_req.valid := req_helper.fire(io.out_req.ready)
|
||||||
|
|
||||||
|
route_q.io.enq.bits := req_arb.io.chosen
|
||||||
|
route_q.io.enq.valid := req_helper.fire(route_q.io.enq.ready)
|
||||||
|
|
||||||
|
req_arb.io.out.ready := req_helper.fire(req_arb.io.out.valid)
|
||||||
|
|
||||||
|
val resp_sel = route_q.io.deq.bits
|
||||||
|
val resp_ready = io.in_resp(resp_sel).ready
|
||||||
|
val resp_helper = DecoupledHelper(
|
||||||
|
resp_ready,
|
||||||
|
route_q.io.deq.valid,
|
||||||
|
io.out_resp.valid)
|
||||||
|
|
||||||
|
val resp_valid = resp_helper.fire(resp_ready)
|
||||||
|
for (i <- 0 until n) {
|
||||||
|
io.in_resp(i).bits := io.out_resp.bits
|
||||||
|
io.in_resp(i).valid := resp_valid && resp_sel === UInt(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
route_q.io.deq.ready := resp_helper.fire(route_q.io.deq.valid)
|
||||||
|
io.out_resp.ready := resp_helper.fire(io.out_resp.valid)
|
||||||
|
} else {
|
||||||
|
io.out_req <> io.in_req.head
|
||||||
|
io.in_resp.head <> io.out_resp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -163,9 +163,9 @@ class FPUIO extends Bundle {
|
|||||||
val sboard_set = Bool(OUTPUT)
|
val sboard_set = Bool(OUTPUT)
|
||||||
val sboard_clr = Bool(OUTPUT)
|
val sboard_clr = Bool(OUTPUT)
|
||||||
val sboard_clra = UInt(OUTPUT, 5)
|
val sboard_clra = UInt(OUTPUT, 5)
|
||||||
}
|
|
||||||
|
|
||||||
class CtrlFPUIO extends Bundle {
|
val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
|
||||||
|
val cp_resp = Decoupled(new FPResult())
|
||||||
}
|
}
|
||||||
|
|
||||||
class FPResult extends Bundle
|
class FPResult extends Bundle
|
||||||
@ -439,18 +439,27 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
val io = new FPUIO
|
val io = new FPUIO
|
||||||
|
|
||||||
val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
|
val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
|
||||||
|
val req_valid = ex_reg_valid || io.cp_req.valid
|
||||||
val ex_reg_inst = RegEnable(io.inst, io.valid)
|
val ex_reg_inst = RegEnable(io.inst, io.valid)
|
||||||
val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx, init=Bool(false))
|
val ex_cp_valid = io.cp_req.valid && !ex_reg_valid
|
||||||
|
val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx || ex_cp_valid, init=Bool(false))
|
||||||
val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
|
val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
|
||||||
val killm = io.killm || io.nack_mem
|
val mem_cp_valid = Reg(next=ex_cp_valid, init=Bool(false))
|
||||||
val wb_reg_valid = Reg(next=mem_reg_valid && !killm, init=Bool(false))
|
val killm = (io.killm || io.nack_mem) && !mem_cp_valid
|
||||||
|
val wb_reg_valid = Reg(next=mem_reg_valid && (!killm || mem_cp_valid), init=Bool(false))
|
||||||
|
val wb_cp_valid = Reg(next=mem_cp_valid, init=Bool(false))
|
||||||
|
|
||||||
val fp_decoder = Module(new FPUDecoder)
|
val fp_decoder = Module(new FPUDecoder)
|
||||||
fp_decoder.io.inst := io.inst
|
fp_decoder.io.inst := io.inst
|
||||||
|
|
||||||
|
val cp_ctrl = new FPUCtrlSigs
|
||||||
|
cp_ctrl <> io.cp_req.bits
|
||||||
|
io.cp_resp.valid := Bool(false)
|
||||||
|
io.cp_resp.bits.data := UInt(0)
|
||||||
|
|
||||||
val id_ctrl = fp_decoder.io.sigs
|
val id_ctrl = fp_decoder.io.sigs
|
||||||
val ex_ctrl = RegEnable(id_ctrl, io.valid)
|
val ex_ctrl = Mux(ex_reg_valid, RegEnable(id_ctrl, io.valid), cp_ctrl)
|
||||||
val mem_ctrl = RegEnable(ex_ctrl, ex_reg_valid)
|
val mem_ctrl = RegEnable(ex_ctrl, req_valid)
|
||||||
val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
|
val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
|
||||||
|
|
||||||
// load response
|
// load response
|
||||||
@ -488,35 +497,43 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_))
|
val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_))
|
||||||
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
|
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
|
||||||
|
|
||||||
|
val cp_rs1 = io.cp_req.bits.in1
|
||||||
|
val cp_rs2 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in3, io.cp_req.bits.in2)
|
||||||
|
val cp_rs3 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in2, io.cp_req.bits.in3)
|
||||||
|
|
||||||
val req = Wire(new FPInput)
|
val req = Wire(new FPInput)
|
||||||
req := ex_ctrl
|
req := ex_ctrl
|
||||||
req.rm := ex_rm
|
req.rm := Mux(ex_reg_valid, ex_rm, io.cp_req.bits.rm)
|
||||||
req.in1 := ex_rs1
|
req.in1 := Mux(ex_reg_valid, ex_rs1, cp_rs1)
|
||||||
req.in2 := ex_rs2
|
req.in2 := Mux(ex_reg_valid, ex_rs2, cp_rs2)
|
||||||
req.in3 := ex_rs3
|
req.in3 := Mux(ex_reg_valid, ex_rs3, cp_rs3)
|
||||||
req.typ := ex_reg_inst(21,20)
|
req.typ := Mux(ex_reg_valid, ex_reg_inst(21,20), io.cp_req.bits.typ)
|
||||||
|
|
||||||
val sfma = Module(new FPUFMAPipe(p(SFMALatency), 8, 24))
|
val sfma = Module(new FPUFMAPipe(p(SFMALatency), 8, 24))
|
||||||
sfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && ex_ctrl.single
|
sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single
|
||||||
sfma.io.in.bits := req
|
sfma.io.in.bits := req
|
||||||
|
|
||||||
val dfma = Module(new FPUFMAPipe(p(DFMALatency), 11, 53))
|
val dfma = Module(new FPUFMAPipe(p(DFMALatency), 11, 53))
|
||||||
dfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && !ex_ctrl.single
|
dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single
|
||||||
dfma.io.in.bits := req
|
dfma.io.in.bits := req
|
||||||
|
|
||||||
val fpiu = Module(new FPToInt)
|
val fpiu = Module(new FPToInt)
|
||||||
fpiu.io.in.valid := ex_reg_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX)
|
fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX)
|
||||||
fpiu.io.in.bits := req
|
fpiu.io.in.bits := req
|
||||||
io.store_data := fpiu.io.out.bits.store
|
io.store_data := fpiu.io.out.bits.store
|
||||||
io.toint_data := fpiu.io.out.bits.toint
|
io.toint_data := fpiu.io.out.bits.toint
|
||||||
|
when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
|
||||||
|
io.cp_resp.bits.data := fpiu.io.out.bits.toint
|
||||||
|
io.cp_resp.valid := Bool(true)
|
||||||
|
}
|
||||||
|
|
||||||
val ifpu = Module(new IntToFP(3))
|
val ifpu = Module(new IntToFP(3))
|
||||||
ifpu.io.in.valid := ex_reg_valid && ex_ctrl.fromint
|
ifpu.io.in.valid := req_valid && ex_ctrl.fromint
|
||||||
ifpu.io.in.bits := req
|
ifpu.io.in.bits := req
|
||||||
ifpu.io.in.bits.in1 := io.fromint_data
|
ifpu.io.in.bits.in1 := Mux(ex_reg_valid, io.fromint_data, cp_rs1)
|
||||||
|
|
||||||
val fpmu = Module(new FPToFP(2))
|
val fpmu = Module(new FPToFP(2))
|
||||||
fpmu.io.in.valid := ex_reg_valid && ex_ctrl.fastpipe
|
fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
|
||||||
fpmu.io.in.bits := req
|
fpmu.io.in.bits := req
|
||||||
fpmu.io.lt := fpiu.io.out.bits.lt
|
fpmu.io.lt := fpiu.io.out.bits.lt
|
||||||
|
|
||||||
@ -546,8 +563,8 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
val wen = Reg(init=Bits(0, maxLatency-1))
|
val wen = Reg(init=Bits(0, maxLatency-1))
|
||||||
val winfo = Reg(Vec(Bits(), maxLatency-1))
|
val winfo = Reg(Vec(Bits(), maxLatency-1))
|
||||||
val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
|
val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
|
||||||
val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, ex_reg_valid)
|
val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid)
|
||||||
val mem_winfo = Cat(pipeid(mem_ctrl), mem_ctrl.single, mem_reg_inst(11,7)) //single only used for debugging
|
val mem_winfo = Cat(mem_cp_valid, pipeid(mem_ctrl), mem_ctrl.single, mem_reg_inst(11,7)) //single only used for debugging
|
||||||
|
|
||||||
for (i <- 0 until maxLatency-2) {
|
for (i <- 0 until maxLatency-2) {
|
||||||
when (wen(i+1)) { winfo(i) := winfo(i+1) }
|
when (wen(i+1)) { winfo(i) := winfo(i+1) }
|
||||||
@ -566,9 +583,10 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
|
|
||||||
val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt)
|
val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt)
|
||||||
val wsrc = (winfo(0) >> 6)
|
val wsrc = (winfo(0) >> 6)
|
||||||
|
val wcp = winfo(0)(6+log2Up(pipes.size))
|
||||||
val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.res.data))(wsrc))
|
val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.res.data))(wsrc))
|
||||||
val wexc = Vec(pipes.map(_.res.exc))(wsrc)
|
val wexc = Vec(pipes.map(_.res.exc))(wsrc)
|
||||||
when (wen(0) || divSqrt_wen) {
|
when ((!wcp && wen(0)) || divSqrt_wen) {
|
||||||
regfile(waddr) := wdata
|
regfile(waddr) := wdata
|
||||||
if (enableCommitLog) {
|
if (enableCommitLog) {
|
||||||
val wdata_unrec_s = hardfloat.fNFromRecFN(8, 24, wdata(64,0))
|
val wdata_unrec_s = hardfloat.fNFromRecFN(8, 24, wdata(64,0))
|
||||||
@ -578,6 +596,11 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
Mux(wb_single, Cat(UInt(0,32), wdata_unrec_s), wdata_unrec_d))
|
Mux(wb_single, Cat(UInt(0,32), wdata_unrec_s), wdata_unrec_d))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
when (wcp && wen(0)) {
|
||||||
|
io.cp_resp.bits.data := wdata
|
||||||
|
io.cp_resp.valid := Bool(true)
|
||||||
|
}
|
||||||
|
io.cp_req.ready := !ex_reg_valid
|
||||||
|
|
||||||
val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
|
val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
|
||||||
val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
|
val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
|
||||||
@ -592,8 +615,8 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
|
io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
|
||||||
io.dec <> fp_decoder.io.sigs
|
io.dec <> fp_decoder.io.sigs
|
||||||
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
|
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
|
||||||
io.sboard_set := wb_reg_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
|
io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
|
||||||
io.sboard_clr := divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2)))
|
io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2))))
|
||||||
io.sboard_clra := waddr
|
io.sboard_clra := waddr
|
||||||
// we don't currently support round-max-magnitude (rm=4)
|
// we don't currently support round-max-magnitude (rm=4)
|
||||||
io.illegal_rm := ex_rm(2) && ex_ctrl.round
|
io.illegal_rm := ex_rm(2) && ex_ctrl.round
|
||||||
|
@ -46,13 +46,14 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
|
|||||||
val s2_btb_resp_valid = Reg(init=Bool(false))
|
val s2_btb_resp_valid = Reg(init=Bool(false))
|
||||||
val s2_btb_resp_bits = Reg(btb.io.resp.bits)
|
val s2_btb_resp_bits = Reg(btb.io.resp.bits)
|
||||||
val s2_xcpt_if = Reg(init=Bool(false))
|
val s2_xcpt_if = Reg(init=Bool(false))
|
||||||
|
val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true))
|
||||||
|
|
||||||
val msb = vaddrBits-1
|
val msb = vaddrBits-1
|
||||||
val lsb = log2Up(fetchWidth*coreInstBytes)
|
val lsb = log2Up(fetchWidth*coreInstBytes)
|
||||||
val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target)
|
val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target)
|
||||||
val ntpc_0 = s1_pc + UInt(coreInstBytes*fetchWidth)
|
val ntpc_0 = s1_pc + UInt(coreInstBytes*fetchWidth)
|
||||||
val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure
|
val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure
|
||||||
val icmiss = s2_valid && !icache.io.resp.valid
|
val icmiss = s2_valid && !icbuf.io.deq.valid
|
||||||
val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc)
|
val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc)
|
||||||
val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt
|
val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt
|
||||||
val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes))
|
val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes))
|
||||||
@ -100,14 +101,17 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
|
|||||||
icmiss || io.ptw.invalidate
|
icmiss || io.ptw.invalidate
|
||||||
icache.io.resp.ready := !stall && !s1_same_block
|
icache.io.resp.ready := !stall && !s1_same_block
|
||||||
|
|
||||||
io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid)
|
io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icbuf.io.deq.valid)
|
||||||
io.cpu.resp.bits.pc := s2_pc
|
io.cpu.resp.bits.pc := s2_pc
|
||||||
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
|
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
|
||||||
|
|
||||||
|
icbuf.io.enq <> icache.io.resp
|
||||||
|
icbuf.io.deq.ready := !stall && !s1_same_block
|
||||||
|
|
||||||
require(fetchWidth * coreInstBytes <= rowBytes)
|
require(fetchWidth * coreInstBytes <= rowBytes)
|
||||||
val fetch_data =
|
val fetch_data =
|
||||||
if (fetchWidth * coreInstBytes == rowBytes) icache.io.resp.bits.datablock
|
if (fetchWidth * coreInstBytes == rowBytes) icbuf.io.deq.bits.datablock
|
||||||
else icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits))
|
else icbuf.io.deq.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits))
|
||||||
|
|
||||||
for (i <- 0 until fetchWidth) {
|
for (i <- 0 until fetchWidth) {
|
||||||
io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits)
|
io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits)
|
||||||
|
@ -40,9 +40,8 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
|
|||||||
val stall = !io.resp.ready
|
val stall = !io.resp.ready
|
||||||
val rdy = Wire(Bool())
|
val rdy = Wire(Bool())
|
||||||
|
|
||||||
val s2_valid = Reg(init=Bool(false))
|
val refill_addr = Reg(UInt(width = paddrBits))
|
||||||
val s2_addr = Reg(UInt(width = paddrBits))
|
val s1_any_tag_hit = Bool()
|
||||||
val s2_any_tag_hit = Wire(Bool())
|
|
||||||
|
|
||||||
val s1_valid = Reg(init=Bool(false))
|
val s1_valid = Reg(init=Bool(false))
|
||||||
val s1_pgoff = Reg(UInt(width = pgIdxBits))
|
val s1_pgoff = Reg(UInt(width = pgIdxBits))
|
||||||
@ -57,91 +56,85 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
|
|||||||
s1_pgoff := io.req.bits.idx
|
s1_pgoff := io.req.bits.idx
|
||||||
}
|
}
|
||||||
|
|
||||||
s2_valid := s1_valid && rdy && !io.req.bits.kill || io.resp.valid && stall
|
val out_valid = s1_valid && !io.req.bits.kill && state === s_ready
|
||||||
when (s1_valid && rdy && !stall) {
|
val s1_idx = s1_addr(untagBits-1,blockOffBits)
|
||||||
s2_addr := s1_addr
|
val s1_offset = s1_addr(blockOffBits-1,0)
|
||||||
}
|
val s1_hit = out_valid && s1_any_tag_hit
|
||||||
|
val s1_miss = out_valid && !s1_any_tag_hit
|
||||||
|
rdy := state === s_ready && !s1_miss
|
||||||
|
|
||||||
val s2_tag = s2_addr(tagBits+untagBits-1,untagBits)
|
when (s1_valid && state === s_ready && s1_miss) {
|
||||||
val s2_idx = s2_addr(untagBits-1,blockOffBits)
|
refill_addr := s1_addr
|
||||||
val s2_offset = s2_addr(blockOffBits-1,0)
|
}
|
||||||
val s2_hit = s2_valid && s2_any_tag_hit
|
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
|
||||||
val s2_miss = s2_valid && !s2_any_tag_hit
|
|
||||||
rdy := state === s_ready && !s2_miss
|
|
||||||
|
|
||||||
val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat)
|
val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat)
|
||||||
val (refill_cnt, refill_wrap) = Counter(narrow_grant.fire(), refillCycles) //TODO Zero width wire
|
val (refill_cnt, refill_wrap) = Counter(narrow_grant.fire(), refillCycles) //TODO Zero width wire
|
||||||
val refill_done = state === s_refill && refill_wrap
|
val refill_done = state === s_refill && refill_wrap
|
||||||
narrow_grant.ready := Bool(true)
|
narrow_grant.ready := Bool(true)
|
||||||
|
|
||||||
val repl_way = if (isDM) UInt(0) else LFSR16(s2_miss)(log2Up(nWays)-1,0)
|
val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0)
|
||||||
val entagbits = code.width(tagBits)
|
val entagbits = code.width(tagBits)
|
||||||
val tag_array = SeqMem(Vec(Bits(width = entagbits), nWays), nSets)
|
val tag_array = SeqMem(Vec(Bits(width = entagbits), nWays), nSets)
|
||||||
val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid)
|
val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid)
|
||||||
when (refill_done) {
|
when (refill_done) {
|
||||||
val tag = code.encode(s2_tag).toUInt
|
val tag = code.encode(refill_tag).toUInt
|
||||||
tag_array.write(s2_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _))
|
tag_array.write(s1_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _))
|
||||||
}
|
}
|
||||||
|
|
||||||
val vb_array = Reg(init=Bits(0, nSets*nWays))
|
val vb_array = Reg(init=Bits(0, nSets*nWays))
|
||||||
when (refill_done && !invalidated) {
|
when (refill_done && !invalidated) {
|
||||||
vb_array := vb_array.bitSet(Cat(repl_way, s2_idx), Bool(true))
|
vb_array := vb_array.bitSet(Cat(repl_way, s1_idx), Bool(true))
|
||||||
}
|
}
|
||||||
when (io.invalidate) {
|
when (io.invalidate) {
|
||||||
vb_array := Bits(0)
|
vb_array := Bits(0)
|
||||||
invalidated := Bool(true)
|
invalidated := Bool(true)
|
||||||
}
|
}
|
||||||
val s2_disparity = Wire(Vec(Bool(), nWays))
|
val s1_disparity = Vec.fill(nWays){Bool()}
|
||||||
for (i <- 0 until nWays)
|
for (i <- 0 until nWays)
|
||||||
when (s2_valid && s2_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s2_idx), Bool(false)) }
|
when (s1_valid && s1_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s1_idx), Bool(false)) }
|
||||||
|
|
||||||
val s1_tag_match = Wire(Vec(Bool(), nWays))
|
val s1_tag_match = Vec.fill(nWays){Bool()}
|
||||||
val s2_tag_hit = Wire(Vec(Bool(), nWays))
|
val s1_tag_hit = Vec.fill(nWays){Bool()}
|
||||||
val s2_dout = Reg(Vec(Bits(width = code.width(rowBits)), nWays))
|
val s1_dout = Vec.fill(nWays){(Bits())}
|
||||||
|
|
||||||
for (i <- 0 until nWays) {
|
for (i <- 0 until nWays) {
|
||||||
val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool
|
val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool
|
||||||
val s2_vb = Reg(Bool())
|
|
||||||
val s2_tag_disparity = Reg(Bool())
|
|
||||||
val s2_tag_match = Reg(Bool())
|
|
||||||
val tag_out = tag_rdata(i)
|
val tag_out = tag_rdata(i)
|
||||||
|
val s1_tag_disparity = code.decode(tag_out).error
|
||||||
when (s1_valid && rdy && !stall) {
|
when (s1_valid && rdy && !stall) {
|
||||||
s2_vb := s1_vb
|
|
||||||
s2_tag_disparity := code.decode(tag_out).error
|
|
||||||
s2_tag_match := s1_tag_match(i)
|
|
||||||
}
|
}
|
||||||
s1_tag_match(i) := tag_out(tagBits-1,0) === s1_tag
|
s1_tag_match(i) := tag_out(tagBits-1,0) === s1_tag
|
||||||
s2_tag_hit(i) := s2_vb && s2_tag_match
|
s1_tag_hit(i) := s1_vb && s1_tag_match(i)
|
||||||
s2_disparity(i) := s2_vb && (s2_tag_disparity || code.decode(s2_dout(i)).error)
|
s1_disparity(i) := s1_vb && (s1_tag_disparity || code.decode(s1_dout(i)).error)
|
||||||
}
|
}
|
||||||
s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_)
|
s1_any_tag_hit := s1_tag_hit.reduceLeft(_||_) && !s1_disparity.reduceLeft(_||_)
|
||||||
|
|
||||||
for (i <- 0 until nWays) {
|
for (i <- 0 until nWays) {
|
||||||
val data_array = SeqMem(Bits(width = code.width(rowBits)), nSets*refillCycles)
|
val data_array = SeqMem(Bits(width = code.width(rowBits)), nSets*refillCycles)
|
||||||
val wen = narrow_grant.valid && repl_way === UInt(i)
|
val wen = narrow_grant.valid && repl_way === UInt(i)
|
||||||
when (wen) {
|
when (wen) {
|
||||||
val e_d = code.encode(narrow_grant.bits.data).toUInt
|
val e_d = code.encode(narrow_grant.bits.data).toUInt
|
||||||
if(refillCycles > 1) data_array.write(Cat(s2_idx, refill_cnt), e_d)
|
if(refillCycles > 1) data_array.write(Cat(s1_idx, refill_cnt), e_d)
|
||||||
else data_array.write(s2_idx, e_d)
|
else data_array.write(s1_idx, e_d)
|
||||||
}
|
}
|
||||||
val s0_raddr = s0_pgoff(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0))
|
val s0_raddr = s0_pgoff(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0))
|
||||||
val s1_rdata = data_array.read(s0_raddr, !wen && s0_valid)
|
val s1_rdata = data_array.read(s0_raddr, !wen && s0_valid)
|
||||||
// if s1_tag_match is critical, replace with partial tag check
|
// if s1_tag_match is critical, replace with partial tag check
|
||||||
when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s2_dout(i) := s1_rdata }
|
s1_dout(i) := 0
|
||||||
|
when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s1_dout(i) := s1_rdata }
|
||||||
}
|
}
|
||||||
val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(rowBytes)-1,log2Up(coreInstBytes)) << log2Up(coreInstBits)))(coreInstBits-1,0))
|
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
|
||||||
io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word)
|
|
||||||
io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout)
|
|
||||||
|
|
||||||
// output signals
|
// output signals
|
||||||
io.resp.valid := s2_hit
|
io.resp.valid := s1_hit
|
||||||
io.mem.acquire.valid := (state === s_request)
|
io.mem.acquire.valid := (state === s_request)
|
||||||
io.mem.acquire.bits := GetBlock(addr_block = s2_addr >> blockOffBits)
|
io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits)
|
||||||
|
|
||||||
// control state machine
|
// control state machine
|
||||||
switch (state) {
|
switch (state) {
|
||||||
is (s_ready) {
|
is (s_ready) {
|
||||||
when (s2_miss) { state := s_request }
|
when (s1_miss) { state := s_request }
|
||||||
invalidated := Bool(false)
|
invalidated := Bool(false)
|
||||||
}
|
}
|
||||||
is (s_request) {
|
is (s_request) {
|
||||||
|
@ -47,6 +47,8 @@ class RoCCInterface(implicit p: Parameters) extends Bundle {
|
|||||||
val iptw = new TLBPTWIO
|
val iptw = new TLBPTWIO
|
||||||
val dptw = new TLBPTWIO
|
val dptw = new TLBPTWIO
|
||||||
val pptw = new TLBPTWIO
|
val pptw = new TLBPTWIO
|
||||||
|
val fpu_req = Decoupled(new FPInput)
|
||||||
|
val fpu_resp = Decoupled(new FPResult).flip
|
||||||
val exception = Bool(INPUT)
|
val exception = Bool(INPUT)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,13 +13,15 @@ case object BuildRoCC extends Field[Seq[RoccParameters]]
|
|||||||
case class RoccParameters(
|
case class RoccParameters(
|
||||||
opcodes: OpcodeSet,
|
opcodes: OpcodeSet,
|
||||||
generator: Parameters => RoCC,
|
generator: Parameters => RoCC,
|
||||||
nMemChannels: Int = 1)
|
nMemChannels: Int = 1,
|
||||||
|
useFPU: Boolean = false)
|
||||||
|
|
||||||
abstract class Tile(resetSignal: Bool = null)
|
abstract class Tile(resetSignal: Bool = null)
|
||||||
(implicit p: Parameters) extends Module(_reset = resetSignal) {
|
(implicit p: Parameters) extends Module(_reset = resetSignal) {
|
||||||
val buildRocc = p(BuildRoCC)
|
val buildRocc = p(BuildRoCC)
|
||||||
val usingRocc = !buildRocc.isEmpty
|
val usingRocc = !buildRocc.isEmpty
|
||||||
val nRocc = buildRocc.size
|
val nRocc = buildRocc.size
|
||||||
|
val nFPUPorts = buildRocc.filter(_.useFPU).size
|
||||||
val nDCachePorts = 2 + nRocc
|
val nDCachePorts = 2 + nRocc
|
||||||
val nPTWPorts = 2 + 3 * nRocc
|
val nPTWPorts = 2 + 3 * nRocc
|
||||||
val nCachedTileLinkPorts = 1
|
val nCachedTileLinkPorts = 1
|
||||||
@ -53,8 +55,8 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile(
|
|||||||
icache.io.cpu <> core.io.imem
|
icache.io.cpu <> core.io.imem
|
||||||
core.io.ptw <> ptw.io.dpath
|
core.io.ptw <> ptw.io.dpath
|
||||||
|
|
||||||
//If so specified, build an FPU module and wire it in
|
val fpuOpt = if (p(UseFPU)) Some(Module(new FPU)) else None
|
||||||
if (p(UseFPU)) core.io.fpu <> Module(new FPU()(p)).io
|
fpuOpt.foreach(fpu => core.io.fpu <> fpu.io)
|
||||||
|
|
||||||
// Connect the caches and ROCC to the outer memory system
|
// Connect the caches and ROCC to the outer memory system
|
||||||
io.cached.head <> dcache.io.mem
|
io.cached.head <> dcache.io.mem
|
||||||
@ -87,10 +89,32 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile(
|
|||||||
rocc
|
rocc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nFPUPorts > 0) {
|
||||||
|
fpuOpt.foreach { fpu =>
|
||||||
|
val fpArb = Module(new InOrderArbiter(new FPInput, new FPResult, nFPUPorts))
|
||||||
|
val fp_roccs = roccs.zip(buildRocc)
|
||||||
|
.filter { case (_, params) => params.useFPU }
|
||||||
|
.map { case (rocc, _) => rocc.io }
|
||||||
|
fpArb.io.in_req <> fp_roccs.map(_.fpu_req)
|
||||||
|
fp_roccs.zip(fpArb.io.in_resp).foreach {
|
||||||
|
case (rocc, fpu_resp) => rocc.fpu_resp <> fpu_resp
|
||||||
|
}
|
||||||
|
fpu.io.cp_req <> fpArb.io.out_req
|
||||||
|
fpArb.io.out_resp <> fpu.io.cp_resp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _)
|
core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _)
|
||||||
core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _)
|
core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _)
|
||||||
respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp))
|
respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp))
|
||||||
|
|
||||||
roccs.flatMap(_.io.dmem) :+ iMemArb.io.out
|
roccs.flatMap(_.io.dmem) :+ iMemArb.io.out
|
||||||
} else { Seq(icache.io.mem) })
|
} else { Seq(icache.io.mem) })
|
||||||
|
|
||||||
|
if (!usingRocc || nFPUPorts == 0) {
|
||||||
|
fpuOpt.foreach { fpu =>
|
||||||
|
fpu.io.cp_req.valid := Bool(false)
|
||||||
|
fpu.io.cp_resp.ready := Bool(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user