make BTB fully associative; don't use it for JALR
JALR created a long path from the ALU in execute stage to an address comparator to the next-PC mux. the benfit was close to nil, anyway.
This commit is contained in:
parent
fcc8081c4d
commit
128ec567ed
@ -22,6 +22,7 @@ object Constants
|
||||
val PC_PCR = UFix(4, 3);
|
||||
val PC_WB = UFix(5, 3);
|
||||
val PC_EVEC = UFix(6, 3);
|
||||
val PC_JR = UFix(7, 3);
|
||||
|
||||
val KF_Y = UFix(1, 1);
|
||||
val KF_N = UFix(0, 1);
|
||||
|
@ -60,7 +60,7 @@ class rocketProc extends Component
|
||||
io.imem.req_idx := dpath.io.imem.req_addr(PGIDX_BITS-1,0);
|
||||
io.imem.req_ppn := itlb.io.cpu.resp_ppn;
|
||||
io.imem.req_val := ctrl.io.imem.req_val;
|
||||
io.imem.invalidate := ctrl.io.flush_inst;
|
||||
io.imem.invalidate := ctrl.io.dpath.flush_inst;
|
||||
ctrl.io.imem.resp_val := io.imem.resp_val;
|
||||
dpath.io.imem.resp_data := io.imem.resp_data;
|
||||
ctrl.io.xcpt_itlb := itlb.io.cpu.exception;
|
||||
|
@ -40,6 +40,7 @@ class ioCtrlDpath extends Bundle()
|
||||
val ex_wen = Bool(OUTPUT);
|
||||
val mem_wen = Bool(OUTPUT);
|
||||
val wb_wen = Bool(OUTPUT);
|
||||
val flush_inst = Bool(OUTPUT);
|
||||
// enable/disable interrupts
|
||||
val irq_enable = Bool(OUTPUT);
|
||||
val irq_disable = Bool(OUTPUT);
|
||||
@ -50,7 +51,6 @@ class ioCtrlDpath extends Bundle()
|
||||
// inputs from datapath
|
||||
val xcpt_ma_inst = Bool(INPUT); // high on a misaligned/illegal virtual PC
|
||||
val btb_hit = Bool(INPUT);
|
||||
val btb_match = Bool(INPUT);
|
||||
val inst = Bits(32, INPUT);
|
||||
val br_eq = Bool(INPUT);
|
||||
val br_lt = Bool(INPUT);
|
||||
@ -84,7 +84,6 @@ class ioCtrlAll extends Bundle()
|
||||
val dtlb_kill = Bool(OUTPUT);
|
||||
val dtlb_rdy = Bool(INPUT);
|
||||
val dtlb_miss = Bool(INPUT);
|
||||
val flush_inst = Bool(OUTPUT);
|
||||
val xcpt_dtlb_ld = Bool(INPUT);
|
||||
val xcpt_dtlb_st = Bool(INPUT);
|
||||
val xcpt_itlb = Bool(INPUT);
|
||||
@ -422,8 +421,8 @@ class rocketCtrl extends Component
|
||||
(ex_reg_br_type === BR_LTU) & bltu |
|
||||
(ex_reg_br_type === BR_GE) & bge |
|
||||
(ex_reg_br_type === BR_GEU) & bgeu |
|
||||
(ex_reg_br_type === BR_J) |
|
||||
(ex_reg_br_type === BR_JR); // treat J/JAL/JALR like a taken branch
|
||||
(ex_reg_br_type === BR_J); // treat J/JAL like taken branches
|
||||
val jr_taken = ex_reg_br_type === BR_JR
|
||||
|
||||
val mem_reg_div_mul_val = Reg(){Bool()};
|
||||
val mem_reg_eret = Reg(){Bool()};
|
||||
@ -573,8 +572,7 @@ class rocketCtrl extends Component
|
||||
UFix(0,5)))))))))))); // instruction address misaligned
|
||||
|
||||
// control transfer from ex/mem
|
||||
val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match
|
||||
val take_pc_ex = !ex_btb_match && br_taken || ex_reg_btb_hit && !br_taken
|
||||
val take_pc_ex = ex_reg_btb_hit != br_taken || jr_taken
|
||||
val take_pc_wb = wb_reg_replay || wb_reg_exception || wb_reg_eret;
|
||||
take_pc := take_pc_ex || take_pc_wb;
|
||||
|
||||
@ -612,11 +610,12 @@ class rocketCtrl extends Component
|
||||
Mux(wb_reg_replay, PC_WB, // replay
|
||||
Mux(wb_reg_eret, PC_PCR, // eret instruction
|
||||
Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch
|
||||
Mux(!ex_btb_match && br_taken, PC_BR, // mispredicted taken branch
|
||||
Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch
|
||||
Mux(jr_taken, PC_JR, // taken JALR
|
||||
Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB
|
||||
PC_4)))))); // PC+4
|
||||
PC_4))))))); // PC+4
|
||||
|
||||
io.dpath.wen_btb := !ex_btb_match && br_taken;
|
||||
io.dpath.wen_btb := !ex_reg_btb_hit && br_taken
|
||||
io.dpath.clr_btb := ex_reg_btb_hit && !br_taken || id_reg_icmiss;
|
||||
|
||||
io.imem.req_val := take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay)
|
||||
@ -678,8 +677,7 @@ class rocketCtrl extends Component
|
||||
val ctrl_killd = take_pc || ctrl_stalld;
|
||||
val ctrl_killf = take_pc || !io.imem.resp_val;
|
||||
|
||||
io.flush_inst := wb_reg_flush_inst;
|
||||
|
||||
io.dpath.flush_inst := wb_reg_flush_inst;
|
||||
io.dpath.stallf := ctrl_stallf;
|
||||
io.dpath.stalld := ctrl_stalld;
|
||||
io.dpath.killf := ctrl_killf;
|
||||
|
@ -47,7 +47,7 @@ class rocketDpath extends Component
|
||||
{
|
||||
val io = new ioDpathAll();
|
||||
|
||||
val btb = new rocketDpathBTB(8); // # of entries in BTB
|
||||
val btb = new rocketDpathBTB(4); // # of entries in BTB
|
||||
|
||||
val if_btb_target = btb.io.target;
|
||||
|
||||
@ -142,19 +142,16 @@ class rocketDpath extends Component
|
||||
|
||||
val ex_ea_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0))
|
||||
val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix
|
||||
|
||||
val ex_br_target_sel = Reg(io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE)
|
||||
val ex_br_target = Mux(ex_br_target_sel, ex_branch_target, ex_effective_address)
|
||||
btb.io.correct_target := ex_br_target
|
||||
|
||||
val if_next_pc =
|
||||
Mux(io.ctrl.sel_pc === PC_BTB, Cat(if_btb_target(VADDR_BITS-1), if_btb_target),
|
||||
Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4,
|
||||
Mux(io.ctrl.sel_pc === PC_BR, ex_br_target,
|
||||
Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target,
|
||||
Mux(io.ctrl.sel_pc === PC_JR, ex_effective_address,
|
||||
Mux(io.ctrl.sel_pc === PC_PCR, wb_reg_wdata(VADDR_BITS,0), // only used for ERET
|
||||
Mux(io.ctrl.sel_pc === PC_EVEC, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec),
|
||||
Mux(io.ctrl.sel_pc === PC_WB, wb_reg_pc,
|
||||
if_pc_plus4)))))); // PC_4
|
||||
if_pc_plus4))))))); // PC_4
|
||||
|
||||
when (!io.ctrl.stallf) {
|
||||
if_reg_pc <== if_next_pc.toUFix;
|
||||
@ -171,7 +168,8 @@ class rocketDpath extends Component
|
||||
btb.io.wen <> io.ctrl.wen_btb;
|
||||
btb.io.clr <> io.ctrl.clr_btb;
|
||||
btb.io.correct_pc := ex_reg_pc;
|
||||
io.ctrl.btb_match := id_reg_pc === ex_br_target;
|
||||
btb.io.correct_target := ex_branch_target
|
||||
btb.io.invalidate := io.ctrl.flush_inst
|
||||
|
||||
// instruction decode stage
|
||||
when (!io.ctrl.stalld) {
|
||||
|
@ -13,31 +13,50 @@ class ioDpathBTB extends Bundle()
|
||||
val target = UFix(VADDR_BITS, OUTPUT);
|
||||
val wen = Bool(INPUT);
|
||||
val clr = Bool(INPUT);
|
||||
val invalidate = Bool(INPUT);
|
||||
val correct_pc = UFix(VADDR_BITS, INPUT);
|
||||
val correct_target = UFix(VADDR_BITS, INPUT);
|
||||
}
|
||||
|
||||
// basic direct-mapped branch target buffer
|
||||
// fully-associative branch target buffer
|
||||
class rocketDpathBTB(entries: Int) extends Component
|
||||
{
|
||||
val io = new ioDpathBTB();
|
||||
|
||||
val addr_bits = ceil(log10(entries)/log10(2)).toInt;
|
||||
val idxlsb = 2;
|
||||
val idxmsb = idxlsb+addr_bits-1;
|
||||
val tagmsb = (VADDR_BITS-idxmsb-1)+(VADDR_BITS-idxlsb)-1;
|
||||
val taglsb = (VADDR_BITS-idxlsb);
|
||||
|
||||
val vb_array = Mem(entries, io.wen || io.clr, io.correct_pc(idxmsb,idxlsb), !io.clr, resetVal = Bool(false));
|
||||
val tag_target_array = Mem4(entries, io.wen, io.correct_pc(idxmsb,idxlsb),
|
||||
Cat(io.correct_pc(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb)))
|
||||
tag_target_array.setReadLatency(0);
|
||||
tag_target_array.setTarget('inst);
|
||||
val is_val = vb_array(io.current_pc(idxmsb,idxlsb));
|
||||
val tag_target = tag_target_array(io.current_pc(idxmsb, idxlsb));
|
||||
|
||||
io.hit := is_val && (tag_target(tagmsb,taglsb) === io.current_pc(VADDR_BITS-1, idxmsb+1));
|
||||
io.target := Cat(tag_target(taglsb-1, 0), Bits(0,idxlsb)).toUFix;
|
||||
val io = new ioDpathBTB();
|
||||
|
||||
val do_update = io.wen || io.clr
|
||||
val expected_tag = Mux(do_update, io.correct_pc, io.current_pc)
|
||||
|
||||
val repl_way = LFSR16(io.wen)(log2up(entries)-1,0) // TODO: pseudo-LRU
|
||||
|
||||
var hit_reduction = Bool(false)
|
||||
val hit = Wire() { Bool() }
|
||||
val mux = (new Mux1H(entries)) { Bits(width = VADDR_BITS) }
|
||||
|
||||
for (i <- 0 until entries) {
|
||||
val tag = Reg() { UFix() }
|
||||
val target = Reg() { UFix() }
|
||||
val valid = Reg(resetVal = Bool(false))
|
||||
val my_hit = valid && tag === expected_tag
|
||||
val my_clr = io.clr && my_hit || io.invalidate
|
||||
val my_wen = io.wen && (my_hit || !hit && UFix(i) === repl_way)
|
||||
|
||||
when (my_clr) {
|
||||
valid <== Bool(false)
|
||||
}
|
||||
when (my_wen) {
|
||||
valid <== Bool(true)
|
||||
tag <== io.correct_pc
|
||||
target <== io.correct_target
|
||||
}
|
||||
|
||||
hit_reduction = hit_reduction || my_hit
|
||||
mux.io.sel(i) := my_hit
|
||||
mux.io.in(i) := target
|
||||
}
|
||||
hit := hit_reduction
|
||||
|
||||
io.hit := hit
|
||||
io.target := mux.io.out.toUFix
|
||||
}
|
||||
|
||||
class ioDpathPCR extends Bundle()
|
||||
|
@ -37,9 +37,9 @@ class Top() extends Component {
|
||||
object top_main {
|
||||
def main(args: Array[String]) = {
|
||||
// Can turn off --debug and --vcd when done with debugging to improve emulator performance
|
||||
val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd");
|
||||
// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd");
|
||||
// val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug");
|
||||
// val cpu_args = args ++ Array("--target-dir", "generated-src");
|
||||
val cpu_args = args ++ Array("--target-dir", "generated-src");
|
||||
// Set variables based off of command flags
|
||||
// for(a <- args) {
|
||||
// a match {
|
||||
|
Loading…
Reference in New Issue
Block a user