From 0369b05deb41f2eef3c4edd031d5d16c3f18664e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 17 Jan 2012 21:12:31 -0800 Subject: [PATCH] move replays to writeback stage --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/ctrl.scala | 59 ++++++++++++++------------ rocket/src/main/scala/dpath.scala | 10 +++-- rocket/src/main/scala/dpath_util.scala | 13 +++--- rocket/src/main/scala/dtlb.scala | 6 +-- rocket/src/main/scala/itlb.scala | 5 ++- 6 files changed, 54 insertions(+), 41 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 6d0b92ce..bada957b 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -21,7 +21,7 @@ object Constants val PC_BR = UFix(3, 4); val PC_JR = UFix(4, 4); val PC_PCR = UFix(5, 4); - val PC_MEM = UFix(6, 4); + val PC_WB = UFix(6, 4); val PC_EVEC = UFix(7, 4); val KF_Y = UFix(1, 1); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 7d9ba6d9..8955aa6a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -11,6 +11,7 @@ class ioCtrlDpath extends Bundle() // outputs to datapath val sel_pc = UFix(4, 'output); val wen_btb = Bool('output); + val clr_btb = Bool('output); val stallf = Bool('output); val stalld = Bool('output); val killf = Bool('output); @@ -327,6 +328,7 @@ class rocketCtrl extends Component val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val id_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val id_reg_icmiss = Reg(resetVal = Bool(false)); + val id_reg_replay = Reg(resetVal = Bool(false)); val ex_reg_br_type = Reg(){UFix(width = 4)}; val ex_reg_btb_hit = Reg(){Bool()}; @@ -345,7 +347,7 @@ class rocketCtrl extends Component val ex_reg_xcpt_privileged = Reg(resetVal = Bool(false)); val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); - val ex_reg_icmiss = Reg(resetVal = Bool(false)); + val ex_reg_replay = Reg(resetVal = Bool(false)); val mem_reg_inst_di = Reg(resetVal = Bool(false)); val mem_reg_inst_ei = Reg(resetVal = Bool(false)); @@ -364,22 +366,24 @@ class rocketCtrl extends Component val wb_reg_eret = Reg(resetVal = Bool(false)); val wb_reg_exception = Reg(resetVal = Bool(false)); val wb_reg_badvaddr_wen = Reg(resetVal = Bool(false)); + val wb_reg_replay = Reg(resetVal = Bool(false)); val wb_reg_cause = Reg(){UFix()}; val take_pc = Wire() { Bool() }; when (!io.dpath.stalld) { when (io.dpath.killf) { + id_reg_btb_hit <== Bool(false); id_reg_xcpt_ma_inst <== Bool(false); id_reg_xcpt_itlb <== Bool(false); - id_reg_btb_hit <== Bool(false); } otherwise{ + id_reg_btb_hit <== io.dpath.btb_hit; id_reg_xcpt_ma_inst <== if_reg_xcpt_ma_inst; id_reg_xcpt_itlb <== io.xcpt_itlb; - id_reg_btb_hit <== io.dpath.btb_hit; } - id_reg_icmiss <== !take_pc && !io.imem.resp_val; + id_reg_icmiss <== !io.imem.resp_val; + id_reg_replay <== !take_pc && !io.imem.resp_val; } // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) @@ -402,7 +406,7 @@ class rocketCtrl extends Component ex_reg_xcpt_privileged <== Bool(false); ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== Bool(false); - ex_reg_icmiss <== Bool(false); + ex_reg_replay <== Bool(false); } otherwise { ex_reg_br_type <== id_br_type; @@ -421,7 +425,7 @@ class rocketCtrl extends Component // ex_reg_xcpt_fpu <== id_fp_val.toBool; ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== id_syscall.toBool; - ex_reg_icmiss <== id_reg_icmiss; + ex_reg_replay <== id_reg_replay; } ex_reg_mem_cmd <== id_mem_cmd; ex_reg_mem_type <== id_mem_type; @@ -492,14 +496,14 @@ class rocketCtrl extends Component wb_reg_eret <== Bool(false); wb_reg_inst_di <== Bool(false); wb_reg_inst_ei <== Bool(false); + wb_reg_div_mul_val <== Bool(false); } otherwise { wb_reg_eret <== mem_reg_eret; wb_reg_inst_di <== mem_reg_inst_di; wb_reg_inst_ei <== mem_reg_inst_ei; + wb_reg_div_mul_val <== mem_reg_div_mul_val; } - - wb_reg_div_mul_val <== mem_reg_div_mul_val; // exception handling // FIXME: verify PC in MEM stage points to valid, restartable instruction @@ -547,40 +551,42 @@ class rocketCtrl extends Component Mux(mem_xcpt_dtlb_st, UFix(11,5), // store fault UFix(0,5))))))))))); // instruction address misaligned - wb_reg_exception <== mem_exception; - wb_reg_badvaddr_wen <== mem_xcpt_dtlb_ld || mem_xcpt_dtlb_st; - wb_reg_cause <== mem_cause; - // write cause to PCR on an exception io.dpath.exception := wb_reg_exception; io.dpath.cause := wb_reg_cause; io.dpath.badvaddr_wen := wb_reg_badvaddr_wen; - // replay mem stage PC on a DTLB miss - val replay_mem = io.dtlb_miss || io.dmem.resp_nack || mem_reg_replay; - val kill_mem = io.dtlb_miss || io.dmem.resp_nack || mem_exception; - val kill_dcache = io.dtlb_miss || mem_reg_kill || mem_exception; - // control transfer from ex/mem val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match val br_jr_taken = br_taken || jr_taken val take_pc_ex = !ex_btb_match && br_jr_taken || ex_reg_btb_hit && !br_jr_taken - val take_pc_mem = mem_exception || mem_reg_eret || replay_mem - take_pc <== take_pc_ex || take_pc_mem + val take_pc_mem = Bool(false) //mem_exception || mem_reg_eret; + val take_pc_wb = wb_reg_replay || wb_reg_exception || wb_reg_eret; + take_pc <== take_pc_ex || take_pc_mem || take_pc_wb; + + // replay mem stage PC on a DTLB miss + val replay_mem = io.dtlb_miss || io.dmem.resp_nack || mem_reg_replay; + val kill_mem = io.dtlb_miss || io.dmem.resp_nack || take_pc_wb || mem_exception || mem_reg_kill; + val kill_dcache = io.dtlb_miss || take_pc_wb || mem_exception || mem_reg_kill; // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions val replay_ex = dcache_miss && Reg(io.dpath.mem_lu_bypass) || mem_reg_privileged || mem_reg_flush_inst || - ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) - val kill_ex = take_pc_mem || replay_ex + ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) + val kill_ex = take_pc_mem || take_pc_wb || replay_ex - mem_reg_replay <== (ex_reg_icmiss || replay_ex) && !take_pc_mem; - mem_reg_kill <== kill_ex + mem_reg_replay <== replay_ex && !(take_pc_mem || take_pc_wb); + mem_reg_kill <== kill_ex; + + wb_reg_replay <== replay_mem && !take_pc_wb; + wb_reg_exception <== mem_exception && !take_pc_wb; + wb_reg_badvaddr_wen <== (mem_xcpt_dtlb_ld || mem_xcpt_dtlb_st) && !take_pc_wb; + wb_reg_cause <== mem_cause; io.dpath.sel_pc := - Mux(replay_mem, PC_MEM, // dtlb miss - Mux(mem_exception, PC_EVEC, // exception - Mux(mem_reg_eret, PC_PCR, // eret instruction + Mux(wb_reg_exception, PC_EVEC, // exception + Mux(wb_reg_replay, PC_WB, // replay + Mux(wb_reg_eret, PC_PCR, // eret instruction Mux(ex_reg_btb_hit && !br_jr_taken, PC_EX4, // mispredicted not taken branch Mux(!ex_btb_match && br_taken, PC_BR, // mispredicted taken branch Mux(!ex_btb_match && jr_taken, PC_JR, // mispredicted jump register @@ -588,6 +594,7 @@ class rocketCtrl extends Component PC_4))))))); // PC+4 io.dpath.wen_btb := !ex_btb_match && br_jr_taken && !kill_ex; + io.dpath.clr_btb := ex_reg_btb_hit && !br_jr_taken || id_reg_icmiss; // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val ex_mem_cmd_load = diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index bb5aae43..4ef112f6 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -111,6 +111,7 @@ class rocketDpath extends Component val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); // writeback definitions + val wb_reg_valid = Reg(resetVal = Bool(false)); val wb_reg_pc = Reg() { UFix() }; val wb_reg_waddr = Reg() { UFix() }; val wb_reg_wdata = Reg() { Bits() }; @@ -145,9 +146,9 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_EX4, ex_reg_pc_plus4, Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, - Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_wdata(VADDR_BITS-1,0), // only used for ERET + Mux(io.ctrl.sel_pc === PC_PCR, wb_reg_wdata(VADDR_BITS-1,0), // only used for ERET Mux(io.ctrl.sel_pc === PC_EVEC, pcr.io.evec, - Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, + Mux(io.ctrl.sel_pc === PC_WB, wb_reg_pc, if_pc_plus4))))))); // PC_4 when (!io.ctrl.stallf) { @@ -164,6 +165,7 @@ class rocketDpath extends Component btb.io.current_pc4 := if_pc_plus4; btb.io.hit ^^ io.ctrl.btb_hit; btb.io.wen ^^ io.ctrl.wen_btb; + btb.io.clr ^^ io.ctrl.clr_btb; btb.io.correct_pc4 := ex_reg_pc_plus4; io.ctrl.btb_match := id_reg_pc === jr_br_target; @@ -345,7 +347,7 @@ class rocketDpath extends Component tsc_reg <== tsc_reg + UFix(1); // instructions retired counter val irt_reg = Reg(resetVal = UFix(0,64)); - when (mem_reg_valid) { irt_reg <== irt_reg + UFix(1); } + when (wb_reg_valid) { irt_reg <== irt_reg + UFix(1); } // writeback select mux ex_wdata := @@ -394,10 +396,12 @@ class rocketDpath extends Component wb_reg_raddr2 <== mem_reg_raddr2; when (io.ctrl.killm) { + wb_reg_valid <== Bool(false); wb_reg_ctrl_wen <== Bool(false); wb_reg_ctrl_wen_pcr <== Bool(false); } otherwise { + wb_reg_valid <== mem_reg_valid; wb_reg_ctrl_wen <== mem_reg_ctrl_wen && !io.dmem.resp_miss; wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index bf13d0c0..c011b614 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -12,6 +12,7 @@ class ioDpathBTB extends Bundle() val hit = Bool('output); val target = UFix(VADDR_BITS, 'output); val wen = Bool('input); + val clr = Bool('input); val correct_pc4 = UFix(VADDR_BITS, 'input); val correct_target = UFix(VADDR_BITS, 'input); } @@ -27,13 +28,13 @@ class rocketDpathBTB(entries: Int) extends Component val tagmsb = (VADDR_BITS-idxmsb-1)+(VADDR_BITS-idxlsb)-1; val taglsb = (VADDR_BITS-idxlsb); - val rst_lwlr_pf = Mem(entries, io.wen, io.correct_pc4(idxmsb,idxlsb), UFix(1,1), resetVal = UFix(0,1)); - val lwlr_pf = Mem(entries, io.wen, io.correct_pc4(idxmsb,idxlsb), - Cat(io.correct_pc4(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb)), resetVal = UFix(0,1)); - val is_val = rst_lwlr_pf(io.current_pc4(idxmsb,idxlsb)); - val tag_target = lwlr_pf(io.current_pc4(idxmsb, idxlsb)); + val vb_array = Mem(entries, io.wen || io.clr, io.correct_pc4(idxmsb,idxlsb), !io.clr, resetVal = Bool(false)); + val tag_target_array = Mem(entries, io.wen, io.correct_pc4(idxmsb,idxlsb), + Cat(io.correct_pc4(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb))) + val is_val = vb_array(io.current_pc4(idxmsb,idxlsb)); + val tag_target = tag_target_array(io.current_pc4(idxmsb, idxlsb)); - io.hit := (is_val & (tag_target(tagmsb,taglsb) === io.current_pc4(VADDR_BITS-1, idxmsb+1))).toBool; + io.hit := is_val && (tag_target(tagmsb,taglsb) === io.current_pc4(VADDR_BITS-1, idxmsb+1)); io.target := Cat(tag_target(taglsb-1, 0), Bits(0,idxlsb)).toUFix; } diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index cea13c60..372cc4fd 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -55,10 +55,10 @@ class rocketDTLB(entries: Int) extends Component r_cpu_req_vpn <== io.cpu.req_vpn; r_cpu_req_cmd <== io.cpu.req_cmd; r_cpu_req_asid <== io.cpu.req_asid; + r_cpu_req_val <== Bool(true); } - - when (io.cpu.req_rdy) { - r_cpu_req_val <== io.cpu.req_val; + otherwise { + r_cpu_req_val <== Bool(false); } val req_load = (r_cpu_req_cmd === M_XRD); diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 827d731c..f63fd1c7 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -106,9 +106,10 @@ class rocketITLB(entries: Int) extends Component when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_vpn <== io.cpu.req_vpn; r_cpu_req_asid <== io.cpu.req_asid; + r_cpu_req_val <== Bool(true); } - when (io.cpu.req_rdy) { - r_cpu_req_val <== io.cpu.req_val; + otherwise { + r_cpu_req_val <== Bool(false); } val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn);