From ffe23a1ee815329fa613c14fa6765747be13bef2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 2 Jan 2012 00:25:11 -0800 Subject: [PATCH] fix WAW hazard handling --- rocket/src/main/scala/cpu.scala | 1 + rocket/src/main/scala/ctrl.scala | 82 +++++++++++---------------- rocket/src/main/scala/ctrl_util.scala | 2 - rocket/src/main/scala/dcache.scala | 1 + rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/nbdcache.scala | 1 + 6 files changed, 37 insertions(+), 51 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 08e5a0ea..950adc54 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -103,6 +103,7 @@ class rocketProc extends Component ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; ctrl.io.dmem.resp_nack := arb.io.cpu.resp_nack; dpath.io.dmem.resp_val := arb.io.cpu.resp_val; + dpath.io.dmem.resp_replay := io.dmem.resp_replay; dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; dpath.io.dmem.resp_data := arb.io.cpu.resp_data; dpath.io.dmem.resp_data_subword := io.dmem.resp_data_subword; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ee18bc63..e6406f81 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -302,10 +302,7 @@ class rocketCtrl extends Component val id_raddr2 = io.dpath.inst(21,17); val id_raddr1 = io.dpath.inst(26,22); - val id_waddr = io.dpath.inst(31,27); - - val id_ren2 = id_renx2.toBool; - val id_ren1 = id_renx1.toBool; + val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); val id_console_out_val = id_wen_pcr.toBool && (id_raddr2 === PCR_CONSOLE); @@ -329,7 +326,6 @@ class rocketCtrl extends Component val id_stall_raddr2 = sboard.io.stalla; val id_stall_raddr1 = sboard.io.stallb; val id_stall_waddr = sboard.io.stallc; - val id_stall_ra = sboard.io.stallra; val id_reg_btb_hit = Reg(resetVal = Bool(false)); val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); @@ -581,62 +577,50 @@ class rocketCtrl extends Component io.dpath.stalld ); - // check for loads and amos in execute and mem stages to detect load/use hazards + // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val ex_mem_cmd_load = ex_reg_mem_val && ((ex_reg_mem_cmd === M_XRD) || ex_reg_mem_cmd(3).toBool); - - val lu_stall_ex = - ex_mem_cmd_load && - ((id_ren1 && (id_raddr1 === io.dpath.ex_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.ex_waddr))); + val data_hazard_ex = + (ex_mem_cmd_load || ex_reg_div_mul_val) && + ((id_renx1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || + (id_renx2.toBool && (id_raddr2 === io.dpath.ex_waddr)) || + (id_wen.toBool && (id_waddr === io.dpath.ex_waddr))); + // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. + // stall for WAW-but-not-RAW hazards on LW/LD/AMO. + val mem_mem_cmd_load = + mem_reg_mem_val && ((mem_reg_mem_cmd === M_XRD) || mem_reg_mem_cmd(3).toBool); val mem_mem_cmd_load_bh = - mem_reg_mem_val && - (mem_reg_mem_cmd === M_XRD) && + mem_mem_cmd_load && ((mem_reg_mem_type === MT_B) || (mem_reg_mem_type === MT_BU) || (mem_reg_mem_type === MT_H) || (mem_reg_mem_type === MT_HU)); - - val lu_stall_mem = - mem_mem_cmd_load_bh && - ((id_ren1 && (id_raddr1 === io.dpath.mem_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.mem_waddr))); + val raw_hazard_mem = + (id_renx1.toBool && (id_raddr1 === io.dpath.mem_waddr)) || + (id_renx2.toBool && (id_raddr2 === io.dpath.mem_waddr)); + val waw_hazard_mem = + (id_wen.toBool && (id_waddr === io.dpath.mem_waddr)); + val data_hazard_mem = + (mem_mem_cmd_load_bh || mem_reg_div_mul_val) && (raw_hazard_mem || waw_hazard_mem) || + mem_mem_cmd_load && (!raw_hazard_mem && waw_hazard_mem) - val lu_stall_wb = - dcache_miss && - ((id_ren1 && (id_raddr1 === io.dpath.wb_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.wb_waddr))); + // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. + val data_hazard_wb = + (dcache_miss || wb_reg_div_mul_val) && + ((id_renx1.toBool && (id_raddr1 === io.dpath.wb_waddr)) || + (id_renx2.toBool && (id_raddr2 === io.dpath.wb_waddr)) || + (id_wen.toBool && (id_waddr === io.dpath.wb_waddr))); - val lu_stall = lu_stall_ex || lu_stall_mem || lu_stall_wb; - - // check for divide and multiply instructions in ex,mem,wb stages - val dm_stall_ex = - ex_reg_div_mul_val && - ((id_ren1 && (id_raddr1 === io.dpath.ex_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.ex_waddr))); - - val dm_stall_mem = - mem_reg_div_mul_val && - ((id_ren1 && (id_raddr1 === io.dpath.mem_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.mem_waddr))); - - val dm_stall_wb = - wb_reg_div_mul_val && - ((id_ren1 && (id_raddr1 === io.dpath.wb_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.wb_waddr))); - - val dm_stall = dm_stall_ex || dm_stall_mem || dm_stall_wb; + val data_hazard = data_hazard_ex || data_hazard_mem || data_hazard_wb; val ctrl_stalld = !take_pc && ( - dm_stall || - lu_stall || - id_ren2 && id_stall_raddr2 || - id_ren1 && id_stall_raddr1 || - (id_sel_wa === WA_RD) && id_stall_waddr || - (id_sel_wa === WA_RA) && id_stall_ra || + data_hazard || + id_renx2.toBool && id_stall_raddr2 || + id_renx1.toBool && id_stall_raddr1 || + id_wen.toBool && id_stall_waddr || id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || id_console_out_val && !io.console.rdy || @@ -662,8 +646,8 @@ class rocketCtrl extends Component io.dpath.killm := kill_mem; io.dpath.mem_load := mem_reg_mem_val && ((mem_reg_mem_cmd === M_XRD) || mem_reg_mem_cmd(3).toBool); - io.dpath.ren2 := id_ren2; - io.dpath.ren1 := id_ren1; + io.dpath.ren2 := id_renx2.toBool; + io.dpath.ren1 := id_renx1.toBool; io.dpath.sel_alu2 := id_sel_alu2; io.dpath.sel_alu1 := id_sel_alu1.toBool; io.dpath.fn_dw := id_fn_dw.toBool; diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index 73661fa6..55744650 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -18,7 +18,6 @@ class ioCtrlSboard extends Bundle() val stalla = Bool('output); val stallb = Bool('output); val stallc = Bool('output); - val stallra = Bool('output); } class rocketCtrlSboard extends Component @@ -34,7 +33,6 @@ class rocketCtrlSboard extends Component io.stalla := reg_busy(io.raddra).toBool; io.stallb := reg_busy(io.raddrb).toBool; io.stallc := reg_busy(io.raddrc).toBool; - io.stallra := reg_busy(RA).toBool; } class ioCtrlCnt extends Bundle() diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 31266888..fd7bfdea 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -21,6 +21,7 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { val resp_miss = Bool('output); val resp_nack = Bool('output); val resp_val = Bool('output); + val resp_replay = Bool('output); val resp_data = Bits(64, 'output); val resp_data_subword = Bits(64, 'output); val resp_tag = Bits(DCACHE_TAG_BITS, 'output); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 42afd337..4f506cf3 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -11,6 +11,7 @@ class ioDpathDmem extends Bundle() val req_tag = UFix(CPU_TAG_BITS, 'output); val req_data = Bits(64, 'output); val resp_val = Bool('input); + val resp_replay = Bool('input); val resp_tag = Bits(CPU_TAG_BITS, 'input); val resp_data = Bits(64, 'input); val resp_data_subword = Bits(64, 'input); diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 498d8e44..70977b4d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -770,6 +770,7 @@ class HellaCache(lines: Int) extends Component { io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack io.cpu.resp_val := (tag_hit && !nack_hit && r_req_read) || replayer.io.cpu_resp_val + io.cpu.resp_replay := replayer.io.cpu_resp_val io.cpu.resp_miss := tag_miss && !nack_miss && r_req_read io.cpu.resp_tag := Mux(replayer.io.cpu_resp_val, replayer.io.cpu_resp_tag, r_cpu_req_tag) io.cpu.resp_data := loadgen.io.dout