1
0

make BTB fully associative; don't use it for JALR

JALR created a long path from the ALU in execute stage
to an address comparator to the next-PC mux.  the benfit
was close to nil, anyway.
This commit is contained in:
Andrew Waterman 2012-02-09 01:32:52 -08:00
parent fcc8081c4d
commit 128ec567ed
6 changed files with 57 additions and 41 deletions

View File

@ -22,6 +22,7 @@ object Constants
val PC_PCR = UFix(4, 3);
val PC_WB = UFix(5, 3);
val PC_EVEC = UFix(6, 3);
val PC_JR = UFix(7, 3);
val KF_Y = UFix(1, 1);
val KF_N = UFix(0, 1);

View File

@ -60,7 +60,7 @@ class rocketProc extends Component
io.imem.req_idx := dpath.io.imem.req_addr(PGIDX_BITS-1,0);
io.imem.req_ppn := itlb.io.cpu.resp_ppn;
io.imem.req_val := ctrl.io.imem.req_val;
io.imem.invalidate := ctrl.io.flush_inst;
io.imem.invalidate := ctrl.io.dpath.flush_inst;
ctrl.io.imem.resp_val := io.imem.resp_val;
dpath.io.imem.resp_data := io.imem.resp_data;
ctrl.io.xcpt_itlb := itlb.io.cpu.exception;

View File

@ -40,6 +40,7 @@ class ioCtrlDpath extends Bundle()
val ex_wen = Bool(OUTPUT);
val mem_wen = Bool(OUTPUT);
val wb_wen = Bool(OUTPUT);
val flush_inst = Bool(OUTPUT);
// enable/disable interrupts
val irq_enable = Bool(OUTPUT);
val irq_disable = Bool(OUTPUT);
@ -50,7 +51,6 @@ class ioCtrlDpath extends Bundle()
// inputs from datapath
val xcpt_ma_inst = Bool(INPUT); // high on a misaligned/illegal virtual PC
val btb_hit = Bool(INPUT);
val btb_match = Bool(INPUT);
val inst = Bits(32, INPUT);
val br_eq = Bool(INPUT);
val br_lt = Bool(INPUT);
@ -84,7 +84,6 @@ class ioCtrlAll extends Bundle()
val dtlb_kill = Bool(OUTPUT);
val dtlb_rdy = Bool(INPUT);
val dtlb_miss = Bool(INPUT);
val flush_inst = Bool(OUTPUT);
val xcpt_dtlb_ld = Bool(INPUT);
val xcpt_dtlb_st = Bool(INPUT);
val xcpt_itlb = Bool(INPUT);
@ -422,8 +421,8 @@ class rocketCtrl extends Component
(ex_reg_br_type === BR_LTU) & bltu |
(ex_reg_br_type === BR_GE) & bge |
(ex_reg_br_type === BR_GEU) & bgeu |
(ex_reg_br_type === BR_J) |
(ex_reg_br_type === BR_JR); // treat J/JAL/JALR like a taken branch
(ex_reg_br_type === BR_J); // treat J/JAL like taken branches
val jr_taken = ex_reg_br_type === BR_JR
val mem_reg_div_mul_val = Reg(){Bool()};
val mem_reg_eret = Reg(){Bool()};
@ -573,8 +572,7 @@ class rocketCtrl extends Component
UFix(0,5)))))))))))); // instruction address misaligned
// control transfer from ex/mem
val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match
val take_pc_ex = !ex_btb_match && br_taken || ex_reg_btb_hit && !br_taken
val take_pc_ex = ex_reg_btb_hit != br_taken || jr_taken
val take_pc_wb = wb_reg_replay || wb_reg_exception || wb_reg_eret;
take_pc := take_pc_ex || take_pc_wb;
@ -612,11 +610,12 @@ class rocketCtrl extends Component
Mux(wb_reg_replay, PC_WB, // replay
Mux(wb_reg_eret, PC_PCR, // eret instruction
Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch
Mux(!ex_btb_match && br_taken, PC_BR, // mispredicted taken branch
Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch
Mux(jr_taken, PC_JR, // taken JALR
Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB
PC_4)))))); // PC+4
PC_4))))))); // PC+4
io.dpath.wen_btb := !ex_btb_match && br_taken;
io.dpath.wen_btb := !ex_reg_btb_hit && br_taken
io.dpath.clr_btb := ex_reg_btb_hit && !br_taken || id_reg_icmiss;
io.imem.req_val := take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay)
@ -678,8 +677,7 @@ class rocketCtrl extends Component
val ctrl_killd = take_pc || ctrl_stalld;
val ctrl_killf = take_pc || !io.imem.resp_val;
io.flush_inst := wb_reg_flush_inst;
io.dpath.flush_inst := wb_reg_flush_inst;
io.dpath.stallf := ctrl_stallf;
io.dpath.stalld := ctrl_stalld;
io.dpath.killf := ctrl_killf;

View File

@ -47,7 +47,7 @@ class rocketDpath extends Component
{
val io = new ioDpathAll();
val btb = new rocketDpathBTB(8); // # of entries in BTB
val btb = new rocketDpathBTB(4); // # of entries in BTB
val if_btb_target = btb.io.target;
@ -143,18 +143,15 @@ class rocketDpath extends Component
val ex_ea_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0))
val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix
val ex_br_target_sel = Reg(io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE)
val ex_br_target = Mux(ex_br_target_sel, ex_branch_target, ex_effective_address)
btb.io.correct_target := ex_br_target
val if_next_pc =
Mux(io.ctrl.sel_pc === PC_BTB, Cat(if_btb_target(VADDR_BITS-1), if_btb_target),
Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4,
Mux(io.ctrl.sel_pc === PC_BR, ex_br_target,
Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target,
Mux(io.ctrl.sel_pc === PC_JR, ex_effective_address,
Mux(io.ctrl.sel_pc === PC_PCR, wb_reg_wdata(VADDR_BITS,0), // only used for ERET
Mux(io.ctrl.sel_pc === PC_EVEC, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec),
Mux(io.ctrl.sel_pc === PC_WB, wb_reg_pc,
if_pc_plus4)))))); // PC_4
if_pc_plus4))))))); // PC_4
when (!io.ctrl.stallf) {
if_reg_pc <== if_next_pc.toUFix;
@ -171,7 +168,8 @@ class rocketDpath extends Component
btb.io.wen <> io.ctrl.wen_btb;
btb.io.clr <> io.ctrl.clr_btb;
btb.io.correct_pc := ex_reg_pc;
io.ctrl.btb_match := id_reg_pc === ex_br_target;
btb.io.correct_target := ex_branch_target
btb.io.invalidate := io.ctrl.flush_inst
// instruction decode stage
when (!io.ctrl.stalld) {

View File

@ -13,31 +13,50 @@ class ioDpathBTB extends Bundle()
val target = UFix(VADDR_BITS, OUTPUT);
val wen = Bool(INPUT);
val clr = Bool(INPUT);
val invalidate = Bool(INPUT);
val correct_pc = UFix(VADDR_BITS, INPUT);
val correct_target = UFix(VADDR_BITS, INPUT);
}
// basic direct-mapped branch target buffer
// fully-associative branch target buffer
class rocketDpathBTB(entries: Int) extends Component
{
val io = new ioDpathBTB();
val addr_bits = ceil(log10(entries)/log10(2)).toInt;
val idxlsb = 2;
val idxmsb = idxlsb+addr_bits-1;
val tagmsb = (VADDR_BITS-idxmsb-1)+(VADDR_BITS-idxlsb)-1;
val taglsb = (VADDR_BITS-idxlsb);
val do_update = io.wen || io.clr
val expected_tag = Mux(do_update, io.correct_pc, io.current_pc)
val vb_array = Mem(entries, io.wen || io.clr, io.correct_pc(idxmsb,idxlsb), !io.clr, resetVal = Bool(false));
val tag_target_array = Mem4(entries, io.wen, io.correct_pc(idxmsb,idxlsb),
Cat(io.correct_pc(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb)))
tag_target_array.setReadLatency(0);
tag_target_array.setTarget('inst);
val is_val = vb_array(io.current_pc(idxmsb,idxlsb));
val tag_target = tag_target_array(io.current_pc(idxmsb, idxlsb));
val repl_way = LFSR16(io.wen)(log2up(entries)-1,0) // TODO: pseudo-LRU
io.hit := is_val && (tag_target(tagmsb,taglsb) === io.current_pc(VADDR_BITS-1, idxmsb+1));
io.target := Cat(tag_target(taglsb-1, 0), Bits(0,idxlsb)).toUFix;
var hit_reduction = Bool(false)
val hit = Wire() { Bool() }
val mux = (new Mux1H(entries)) { Bits(width = VADDR_BITS) }
for (i <- 0 until entries) {
val tag = Reg() { UFix() }
val target = Reg() { UFix() }
val valid = Reg(resetVal = Bool(false))
val my_hit = valid && tag === expected_tag
val my_clr = io.clr && my_hit || io.invalidate
val my_wen = io.wen && (my_hit || !hit && UFix(i) === repl_way)
when (my_clr) {
valid <== Bool(false)
}
when (my_wen) {
valid <== Bool(true)
tag <== io.correct_pc
target <== io.correct_target
}
hit_reduction = hit_reduction || my_hit
mux.io.sel(i) := my_hit
mux.io.in(i) := target
}
hit := hit_reduction
io.hit := hit
io.target := mux.io.out.toUFix
}
class ioDpathPCR extends Bundle()

View File

@ -37,9 +37,9 @@ class Top() extends Component {
object top_main {
def main(args: Array[String]) = {
// Can turn off --debug and --vcd when done with debugging to improve emulator performance
val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd");
// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd");
// val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug");
// val cpu_args = args ++ Array("--target-dir", "generated-src");
val cpu_args = args ++ Array("--target-dir", "generated-src");
// Set variables based off of command flags
// for(a <- args) {
// a match {