From 52e31f3298c4d10f5cea6fcea48cf05ad774bae7 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Aug 2013 14:39:23 -0700 Subject: [PATCH] Bypass scoreboard updates This reduces div/mul/D$ miss latency by 1 cycle. --- rocket/src/main/scala/ctrl.scala | 30 ++++++++++++++++++++---------- rocket/src/main/scala/dpath.scala | 19 +++++++------------ rocket/src/main/scala/tlb.scala | 2 +- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 46f4ad3b..d899bbf3 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -37,6 +37,8 @@ class CtrlDpathIO extends Bundle() val ex_mem_type = Bits(OUTPUT, 3) val ex_rs2_val = Bool(OUTPUT) val mem_rs2_val = Bool(OUTPUT) + val mem_ll_bypass_rs1 = Bool(OUTPUT) + val mem_ll_bypass_rs2 = Bool(OUTPUT) // exception handling val exception = Bool(OUTPUT); val cause = UInt(OUTPUT, 6); @@ -605,11 +607,17 @@ class Control(implicit conf: RocketConfiguration) extends Module class Scoreboard(n: Int) { val r = Reg(init=Bits(0, n)) - var _next = r + private var _next = r + private var cur = r var ens = Bool(false) - def apply(addr: UInt) = r(addr) def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) - def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr)) + def clear(en: Bool, addr: UInt): Unit = { + val m = ~mask(en, addr) + update(en, _next & m) + //cur = cur & m + } + def read(addr: UInt) = r(addr) + def readBypassed(addr: UInt) = cur(addr) private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0)) private def update(en: Bool, update: UInt) = { _next = update @@ -628,10 +636,10 @@ class Control(implicit conf: RocketConfiguration) extends Module fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) - io.fpu.dec.ren1 && fp_sboard(id_raddr1) || - io.fpu.dec.ren2 && fp_sboard(id_raddr2) || - io.fpu.dec.ren3 && fp_sboard(id_raddr3) || - io.fpu.dec.wen && fp_sboard(id_waddr) + io.fpu.dec.ren1 && fp_sboard.readBypassed(id_raddr1) || + io.fpu.dec.ren2 && fp_sboard.readBypassed(id_raddr2) || + io.fpu.dec.ren3 && fp_sboard.readBypassed(id_raddr3) || + io.fpu.dec.wen && fp_sboard.readBypassed(id_waddr) } else Bool(false) // write cause to PCR on an exception @@ -699,10 +707,12 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val) || fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) + io.dpath.mem_ll_bypass_rs1 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr1 + io.dpath.mem_ll_bypass_rs2 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr2 val id_sboard_hazard = - (id_raddr1 != UInt(0) && id_renx1 && sboard(id_raddr1) || - id_raddr2 != UInt(0) && id_renx2 && sboard(id_raddr2) || - id_waddr != UInt(0) && id_wen && sboard(id_waddr)) + (id_raddr1 != UInt(0) && id_renx1 && sboard.read(id_raddr1) && !io.dpath.mem_ll_bypass_rs1 || + id_raddr2 != UInt(0) && id_renx2 && sboard.read(id_raddr2) && !io.dpath.mem_ll_bypass_rs2 || + id_waddr != UInt(0) && id_wen && sboard.read(id_waddr)) val ctrl_stalld = id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f62e3327..f71f0d99 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -71,22 +71,17 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // bypass muxes val id_rs1_zero = id_raddr1 === UInt(0) val id_rs1_ex_bypass = io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr - val id_rs1_mem_bypass = io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr - val id_rs1_bypass = id_rs1_zero || id_rs1_ex_bypass || id_rs1_mem_bypass - val id_rs1_bypass_src = Mux(id_rs1_zero, UInt(0), Mux(id_rs1_ex_bypass, UInt(1), UInt(2) | io.ctrl.mem_load)) - val id_rs1 = - Mux(id_raddr1 === UInt(0), UInt(0), - Mux(wb_wen && id_raddr1 === wb_reg_waddr, wb_wdata, - readRF(id_raddr1))) + val id_rs1_mem_bypass = io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr + val id_rs1_bypass = id_rs1_zero || id_rs1_ex_bypass || id_rs1_mem_bypass || io.ctrl.mem_ll_bypass_rs1 + val id_rs1_bypass_src = Mux(id_rs1_zero, UInt(0), Mux(id_rs1_ex_bypass, UInt(1), Mux(io.ctrl.mem_load, UInt(3), UInt(2)))) + val id_rs1 = Mux(wb_wen && id_raddr1 === wb_reg_waddr, wb_wdata, readRF(id_raddr1)) val id_rs2_zero = id_raddr2 === UInt(0) val id_rs2_ex_bypass = io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr val id_rs2_mem_bypass = io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr - val id_rs2_bypass = id_rs2_zero || id_rs2_ex_bypass || id_rs2_mem_bypass - val id_rs2_bypass_src = Mux(id_rs2_zero, UInt(0), Mux(id_rs2_ex_bypass, UInt(1), UInt(2) | io.ctrl.mem_load)) - val id_rs2 = Mux(id_raddr2 === UInt(0), UInt(0), - Mux(wb_wen && id_raddr2 === wb_reg_waddr, wb_wdata, - readRF(id_raddr2))) + val id_rs2_bypass = id_rs2_zero || id_rs2_ex_bypass || id_rs2_mem_bypass || io.ctrl.mem_ll_bypass_rs2 + val id_rs2_bypass_src = Mux(id_rs2_zero, UInt(0), Mux(id_rs2_ex_bypass, UInt(1), Mux(io.ctrl.mem_load, UInt(3), UInt(2)))) + val id_rs2 = Mux(wb_wen && id_raddr2 === wb_reg_waddr, wb_wdata, readRF(id_raddr2)) // immediate generation def imm(sel: Bits, inst: Bits) = { diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 586e646c..0e9d6112 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -120,7 +120,7 @@ class TLB(entries: Int) extends Module val sx_array = Reg(Bits()) // supervisor execute permission when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn - val perm = (!io.ptw.resp.bits.error).toSInt & io.ptw.resp.bits.perm(5,0) + val perm = (!io.ptw.resp.bits.error).toSInt & io.ptw.resp.bits.perm ur_array := ur_array.bitSet(r_refill_waddr, perm(0)) uw_array := uw_array.bitSet(r_refill_waddr, perm(1)) ux_array := ux_array.bitSet(r_refill_waddr, perm(2))