From 128ec567edfcf295abeea878220fe07caa7f5e28 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Feb 2012 01:32:52 -0800 Subject: [PATCH] make BTB fully associative; don't use it for JALR JALR created a long path from the ALU in execute stage to an address comparator to the next-PC mux. the benfit was close to nil, anyway. --- rocket/src/main/scala/consts.scala | 1 + rocket/src/main/scala/cpu.scala | 2 +- rocket/src/main/scala/ctrl.scala | 20 ++++----- rocket/src/main/scala/dpath.scala | 14 +++---- rocket/src/main/scala/dpath_util.scala | 57 +++++++++++++++++--------- rocket/src/main/scala/top.scala | 4 +- 6 files changed, 57 insertions(+), 41 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 1e25faf8..6ce9df0e 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -22,6 +22,7 @@ object Constants val PC_PCR = UFix(4, 3); val PC_WB = UFix(5, 3); val PC_EVEC = UFix(6, 3); + val PC_JR = UFix(7, 3); val KF_Y = UFix(1, 1); val KF_N = UFix(0, 1); diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index beed47df..978dc409 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -60,7 +60,7 @@ class rocketProc extends Component io.imem.req_idx := dpath.io.imem.req_addr(PGIDX_BITS-1,0); io.imem.req_ppn := itlb.io.cpu.resp_ppn; io.imem.req_val := ctrl.io.imem.req_val; - io.imem.invalidate := ctrl.io.flush_inst; + io.imem.invalidate := ctrl.io.dpath.flush_inst; ctrl.io.imem.resp_val := io.imem.resp_val; dpath.io.imem.resp_data := io.imem.resp_data; ctrl.io.xcpt_itlb := itlb.io.cpu.exception; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2880ae5b..26fce4fa 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -40,6 +40,7 @@ class ioCtrlDpath extends Bundle() val ex_wen = Bool(OUTPUT); val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); + val flush_inst = Bool(OUTPUT); // enable/disable interrupts val irq_enable = Bool(OUTPUT); val irq_disable = Bool(OUTPUT); @@ -50,7 +51,6 @@ class ioCtrlDpath extends Bundle() // inputs from datapath val xcpt_ma_inst = Bool(INPUT); // high on a misaligned/illegal virtual PC val btb_hit = Bool(INPUT); - val btb_match = Bool(INPUT); val inst = Bits(32, INPUT); val br_eq = Bool(INPUT); val br_lt = Bool(INPUT); @@ -84,7 +84,6 @@ class ioCtrlAll extends Bundle() val dtlb_kill = Bool(OUTPUT); val dtlb_rdy = Bool(INPUT); val dtlb_miss = Bool(INPUT); - val flush_inst = Bool(OUTPUT); val xcpt_dtlb_ld = Bool(INPUT); val xcpt_dtlb_st = Bool(INPUT); val xcpt_itlb = Bool(INPUT); @@ -422,8 +421,8 @@ class rocketCtrl extends Component (ex_reg_br_type === BR_LTU) & bltu | (ex_reg_br_type === BR_GE) & bge | (ex_reg_br_type === BR_GEU) & bgeu | - (ex_reg_br_type === BR_J) | - (ex_reg_br_type === BR_JR); // treat J/JAL/JALR like a taken branch + (ex_reg_br_type === BR_J); // treat J/JAL like taken branches + val jr_taken = ex_reg_br_type === BR_JR val mem_reg_div_mul_val = Reg(){Bool()}; val mem_reg_eret = Reg(){Bool()}; @@ -573,8 +572,7 @@ class rocketCtrl extends Component UFix(0,5)))))))))))); // instruction address misaligned // control transfer from ex/mem - val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match - val take_pc_ex = !ex_btb_match && br_taken || ex_reg_btb_hit && !br_taken + val take_pc_ex = ex_reg_btb_hit != br_taken || jr_taken val take_pc_wb = wb_reg_replay || wb_reg_exception || wb_reg_eret; take_pc := take_pc_ex || take_pc_wb; @@ -612,11 +610,12 @@ class rocketCtrl extends Component Mux(wb_reg_replay, PC_WB, // replay Mux(wb_reg_eret, PC_PCR, // eret instruction Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch - Mux(!ex_btb_match && br_taken, PC_BR, // mispredicted taken branch + Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch + Mux(jr_taken, PC_JR, // taken JALR Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB - PC_4)))))); // PC+4 + PC_4))))))); // PC+4 - io.dpath.wen_btb := !ex_btb_match && br_taken; + io.dpath.wen_btb := !ex_reg_btb_hit && br_taken io.dpath.clr_btb := ex_reg_btb_hit && !br_taken || id_reg_icmiss; io.imem.req_val := take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay) @@ -678,8 +677,7 @@ class rocketCtrl extends Component val ctrl_killd = take_pc || ctrl_stalld; val ctrl_killf = take_pc || !io.imem.resp_val; - io.flush_inst := wb_reg_flush_inst; - + io.dpath.flush_inst := wb_reg_flush_inst; io.dpath.stallf := ctrl_stallf; io.dpath.stalld := ctrl_stalld; io.dpath.killf := ctrl_killf; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index d5981227..6dfa0a16 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -47,7 +47,7 @@ class rocketDpath extends Component { val io = new ioDpathAll(); - val btb = new rocketDpathBTB(8); // # of entries in BTB + val btb = new rocketDpathBTB(4); // # of entries in BTB val if_btb_target = btb.io.target; @@ -142,19 +142,16 @@ class rocketDpath extends Component val ex_ea_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0)) val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix - - val ex_br_target_sel = Reg(io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE) - val ex_br_target = Mux(ex_br_target_sel, ex_branch_target, ex_effective_address) - btb.io.correct_target := ex_br_target val if_next_pc = Mux(io.ctrl.sel_pc === PC_BTB, Cat(if_btb_target(VADDR_BITS-1), if_btb_target), Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, - Mux(io.ctrl.sel_pc === PC_BR, ex_br_target, + Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, + Mux(io.ctrl.sel_pc === PC_JR, ex_effective_address, Mux(io.ctrl.sel_pc === PC_PCR, wb_reg_wdata(VADDR_BITS,0), // only used for ERET Mux(io.ctrl.sel_pc === PC_EVEC, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), Mux(io.ctrl.sel_pc === PC_WB, wb_reg_pc, - if_pc_plus4)))))); // PC_4 + if_pc_plus4))))))); // PC_4 when (!io.ctrl.stallf) { if_reg_pc <== if_next_pc.toUFix; @@ -171,7 +168,8 @@ class rocketDpath extends Component btb.io.wen <> io.ctrl.wen_btb; btb.io.clr <> io.ctrl.clr_btb; btb.io.correct_pc := ex_reg_pc; - io.ctrl.btb_match := id_reg_pc === ex_br_target; + btb.io.correct_target := ex_branch_target + btb.io.invalidate := io.ctrl.flush_inst // instruction decode stage when (!io.ctrl.stalld) { diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 32197f7b..4472c919 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -13,31 +13,50 @@ class ioDpathBTB extends Bundle() val target = UFix(VADDR_BITS, OUTPUT); val wen = Bool(INPUT); val clr = Bool(INPUT); + val invalidate = Bool(INPUT); val correct_pc = UFix(VADDR_BITS, INPUT); val correct_target = UFix(VADDR_BITS, INPUT); } -// basic direct-mapped branch target buffer +// fully-associative branch target buffer class rocketDpathBTB(entries: Int) extends Component { - val io = new ioDpathBTB(); - - val addr_bits = ceil(log10(entries)/log10(2)).toInt; - val idxlsb = 2; - val idxmsb = idxlsb+addr_bits-1; - val tagmsb = (VADDR_BITS-idxmsb-1)+(VADDR_BITS-idxlsb)-1; - val taglsb = (VADDR_BITS-idxlsb); - - val vb_array = Mem(entries, io.wen || io.clr, io.correct_pc(idxmsb,idxlsb), !io.clr, resetVal = Bool(false)); - val tag_target_array = Mem4(entries, io.wen, io.correct_pc(idxmsb,idxlsb), - Cat(io.correct_pc(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb))) - tag_target_array.setReadLatency(0); - tag_target_array.setTarget('inst); - val is_val = vb_array(io.current_pc(idxmsb,idxlsb)); - val tag_target = tag_target_array(io.current_pc(idxmsb, idxlsb)); - - io.hit := is_val && (tag_target(tagmsb,taglsb) === io.current_pc(VADDR_BITS-1, idxmsb+1)); - io.target := Cat(tag_target(taglsb-1, 0), Bits(0,idxlsb)).toUFix; + val io = new ioDpathBTB(); + + val do_update = io.wen || io.clr + val expected_tag = Mux(do_update, io.correct_pc, io.current_pc) + + val repl_way = LFSR16(io.wen)(log2up(entries)-1,0) // TODO: pseudo-LRU + + var hit_reduction = Bool(false) + val hit = Wire() { Bool() } + val mux = (new Mux1H(entries)) { Bits(width = VADDR_BITS) } + + for (i <- 0 until entries) { + val tag = Reg() { UFix() } + val target = Reg() { UFix() } + val valid = Reg(resetVal = Bool(false)) + val my_hit = valid && tag === expected_tag + val my_clr = io.clr && my_hit || io.invalidate + val my_wen = io.wen && (my_hit || !hit && UFix(i) === repl_way) + + when (my_clr) { + valid <== Bool(false) + } + when (my_wen) { + valid <== Bool(true) + tag <== io.correct_pc + target <== io.correct_target + } + + hit_reduction = hit_reduction || my_hit + mux.io.sel(i) := my_hit + mux.io.in(i) := target + } + hit := hit_reduction + + io.hit := hit + io.target := mux.io.out.toUFix } class ioDpathPCR extends Bundle() diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index a85b7245..2e5e0311 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -37,9 +37,9 @@ class Top() extends Component { object top_main { def main(args: Array[String]) = { // Can turn off --debug and --vcd when done with debugging to improve emulator performance - val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); +// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); // val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug"); -// val cpu_args = args ++ Array("--target-dir", "generated-src"); + val cpu_args = args ++ Array("--target-dir", "generated-src"); // Set variables based off of command flags // for(a <- args) { // a match {