1
0

reduce superfluous replays

we only replay after a cache miss if we mis-scheduled the use of a load.
This commit is contained in:
Andrew Waterman 2012-01-01 21:28:38 -08:00
parent efc623cc36
commit eb657dd250
4 changed files with 32 additions and 19 deletions

View File

@ -68,9 +68,10 @@ object Constants
val WB_X = UFix(0, 3);
val WB_PC = UFix(0, 3);
val WB_ALU = UFix(1, 3);
val WB_PCR = UFix(2, 3);
val WB_TSC = UFix(3, 3);
val WB_PCR = UFix(1, 3);
val WB_ALU = UFix(2, 3);
val WB_TSC = UFix(4, 3);
val WB_IRT = UFix(5, 3);
val N = UFix(0, 1);
val Y = UFix(1, 1);

View File

@ -59,6 +59,7 @@ class ioCtrlDpath extends Bundle()
val div_result_val = Bool('input);
val mul_rdy = Bool('input);
val mul_result_val = Bool('input);
val mem_lu_bypass = Bool('input);
val ex_waddr = UFix(5,'input); // write addr from execute stage
val mem_waddr = UFix(5,'input); // write addr from memory stage
val wb_waddr = UFix(5,'input); // write addr from writeback stage
@ -261,6 +262,8 @@ class rocketCtrl extends Component
MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y),
MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y),
RDTIME-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N),
RDCYCLE-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N),
RDINSTRET->List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N),
// Instructions that have not yet been implemented
// Faking these for now so akaros will boot
@ -549,7 +552,7 @@ class rocketCtrl extends Component
// replay execute stage PC when the D$ is blocked, when the D$ misses,
// for privileged instructions, and for fence.i instructions
val ex_hazard = io.dmem.resp_miss || mem_reg_privileged || mem_reg_flush_inst
val ex_hazard = dcache_miss && Reg(io.dpath.mem_lu_bypass) || mem_reg_privileged || mem_reg_flush_inst
val mem_kill_ex = kill_mem || take_pc_mem
val kill_ex = mem_kill_ex || ex_hazard || !(io.dmem.req_rdy && io.dtlb_rdy) && ex_reg_mem_val
val ex_kill_dtlb = mem_kill_ex || ex_hazard || !io.dmem.req_rdy
@ -600,7 +603,12 @@ class rocketCtrl extends Component
((id_ren1 && (id_raddr1 === io.dpath.mem_waddr)) ||
(id_ren2 && (id_raddr2 === io.dpath.mem_waddr)));
val lu_stall = lu_stall_ex || lu_stall_mem;
val lu_stall_wb =
dcache_miss &&
((id_ren1 && (id_raddr1 === io.dpath.wb_waddr)) ||
(id_ren2 && (id_raddr2 === io.dpath.wb_waddr)));
val lu_stall = lu_stall_ex || lu_stall_mem || lu_stall_wb;
// check for divide and multiply instructions in ex,mem,wb stages
val dm_stall_ex =

View File

@ -194,24 +194,27 @@ class rocketDpath extends Component
UFix(0, 5)))));
// bypass muxes
val rs1_mem_lu_bypass = id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr;
val id_rs1 =
Mux(io.ctrl.div_wb, div_result,
Mux(io.ctrl.mul_wb, mul_result,
Mux(id_raddr1 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr1 === ex_reg_waddr, ex_wdata,
Mux(id_raddr1 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata,
Mux(id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr, io.dmem.resp_data,
Mux(rs1_mem_lu_bypass, io.dmem.resp_data,
Mux(id_raddr1 != UFix(0, 5) && r_dmem_resp_val && id_raddr1 === r_dmem_resp_waddr, io.dmem.resp_data_subword,
Mux(id_raddr1 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr1 === wb_reg_waddr, wb_reg_wdata,
id_rdata1)))))));
val rs2_mem_lu_bypass = id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr;
val id_rs2 =
Mux(id_raddr2 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr2 === ex_reg_waddr, ex_wdata,
Mux(id_raddr2 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata,
Mux(id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr, io.dmem.resp_data,
Mux(rs2_mem_lu_bypass, io.dmem.resp_data,
Mux(id_raddr2 != UFix(0, 5) && r_dmem_resp_val && id_raddr2 === r_dmem_resp_waddr, io.dmem.resp_data_subword,
Mux(id_raddr2 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr2 === wb_reg_waddr, wb_reg_wdata,
id_rdata2)))));
io.ctrl.mem_lu_bypass := rs1_mem_lu_bypass || rs2_mem_lu_bypass;
io.ctrl.inst := id_reg_inst;
// execute stage
@ -251,16 +254,14 @@ class rocketDpath extends Component
}
val ex_alu_in2 =
Mux(ex_reg_ctrl_sel_alu2 === A2_0, UFix(0, 64),
Mux(ex_reg_ctrl_sel_alu2 === A2_SEXT, ex_sign_extend,
Mux(ex_reg_ctrl_sel_alu2 === A2_SPLIT, ex_sign_extend_split,
Mux(ex_reg_ctrl_sel_alu2 === A2_RS2, ex_reg_rs2,
UFix(0, 64)))));
UFix(0, 64)))); // A2_0
val ex_alu_in1 =
Mux(ex_reg_ctrl_sel_alu1 === A1_RS1, ex_reg_rs1,
Mux(ex_reg_ctrl_sel_alu1 === A1_LUI, Cat(Fill(32, ex_reg_inst(26)),ex_reg_inst(26,7),UFix(0, 12)),
UFix(0, 64)));
Cat(Fill(32, ex_reg_inst(26)),ex_reg_inst(26,7),UFix(0, 12))); // A1_LUI
val ex_alu_shamt =
Cat(ex_alu_in2(5) & ex_reg_ctrl_fn_dw === DW_64, ex_alu_in2(4,0)).toUFix;
@ -330,15 +331,18 @@ class rocketDpath extends Component
// time stamp counter
val tsc_reg = Reg(resetVal = UFix(0,64));
tsc_reg <== tsc_reg + UFix(1);
// instructions retired counter
val irt_reg = Reg(resetVal = UFix(0,64));
when (mem_reg_valid) { irt_reg <== irt_reg + UFix(1); }
// writeback select mux
ex_wdata :=
Mux(ex_reg_ctrl_ll_wb || ex_reg_ctrl_wen_pcr, ex_reg_rs1,
Mux(ex_reg_ctrl_sel_wb === WB_PC, Cat(Fill(64-VADDR_BITS, ex_reg_pc_plus4(VADDR_BITS-1)), ex_reg_pc_plus4),
Mux(ex_reg_ctrl_sel_wb === WB_ALU, ex_alu_out,
Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr,
Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg,
Bits(0, 64)))))).toBits;
Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg,
ex_alu_out))))).toBits; // WB_ALU
// memory stage
mem_reg_pc <== ex_reg_pc;

View File

@ -24,11 +24,11 @@ class rocketDpathALU extends Component
// ADD, SUB
val sub = (io.fn === FN_SUB) || (io.fn === FN_SLT) || (io.fn === FN_SLTU)
val adder_rhs = Mux(sub, ~io.in2, io.in2)
val adder_out = (io.in1 + adder_rhs + sub.toUFix)(63,0)
val sum = (io.in1 + adder_rhs + sub.toUFix)(63,0)
// SLT, SLTU
val less = Mux(io.in1(63) === io.in2(63), adder_out(63), io.in1(63))
val lessu = Mux(io.in1(63) === io.in2(63), adder_out(63), io.in2(63))
val less = Mux(io.in1(63) === io.in2(63), sum(63), io.in1(63))
val lessu = Mux(io.in1(63) === io.in2(63), sum(63), io.in2(63))
// SLL, SRL, SRA
val sra = (io.fn === FN_SRA)
@ -42,8 +42,8 @@ class rocketDpathALU extends Component
val out64 = Wire { Bits(64) }
switch(io.fn)
{
is(FN_ADD) { out64 <== adder_out }
is(FN_SUB) { out64 <== adder_out }
is(FN_ADD) { out64 <== sum }
is(FN_SUB) { out64 <== sum }
is(FN_SLT) { out64 <== less }
is(FN_SLTU) { out64 <== lessu }
is(FN_AND) { out64 <== io.in1 & io.in2 }
@ -55,7 +55,7 @@ class rocketDpathALU extends Component
val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31)))
io.out := Cat(out_hi, out64(31,0)).toUFix
io.adder_out := adder_out
io.adder_out := sum
}
}