reduce superfluous replays
we only replay after a cache miss if we mis-scheduled the use of a load.
This commit is contained in:
parent
efc623cc36
commit
eb657dd250
@ -68,9 +68,10 @@ object Constants
|
||||
|
||||
val WB_X = UFix(0, 3);
|
||||
val WB_PC = UFix(0, 3);
|
||||
val WB_ALU = UFix(1, 3);
|
||||
val WB_PCR = UFix(2, 3);
|
||||
val WB_TSC = UFix(3, 3);
|
||||
val WB_PCR = UFix(1, 3);
|
||||
val WB_ALU = UFix(2, 3);
|
||||
val WB_TSC = UFix(4, 3);
|
||||
val WB_IRT = UFix(5, 3);
|
||||
|
||||
val N = UFix(0, 1);
|
||||
val Y = UFix(1, 1);
|
||||
|
@ -59,6 +59,7 @@ class ioCtrlDpath extends Bundle()
|
||||
val div_result_val = Bool('input);
|
||||
val mul_rdy = Bool('input);
|
||||
val mul_result_val = Bool('input);
|
||||
val mem_lu_bypass = Bool('input);
|
||||
val ex_waddr = UFix(5,'input); // write addr from execute stage
|
||||
val mem_waddr = UFix(5,'input); // write addr from memory stage
|
||||
val wb_waddr = UFix(5,'input); // write addr from writeback stage
|
||||
@ -261,6 +262,8 @@ class rocketCtrl extends Component
|
||||
MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y),
|
||||
MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y),
|
||||
RDTIME-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N),
|
||||
RDCYCLE-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N),
|
||||
RDINSTRET->List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N),
|
||||
|
||||
// Instructions that have not yet been implemented
|
||||
// Faking these for now so akaros will boot
|
||||
@ -549,7 +552,7 @@ class rocketCtrl extends Component
|
||||
|
||||
// replay execute stage PC when the D$ is blocked, when the D$ misses,
|
||||
// for privileged instructions, and for fence.i instructions
|
||||
val ex_hazard = io.dmem.resp_miss || mem_reg_privileged || mem_reg_flush_inst
|
||||
val ex_hazard = dcache_miss && Reg(io.dpath.mem_lu_bypass) || mem_reg_privileged || mem_reg_flush_inst
|
||||
val mem_kill_ex = kill_mem || take_pc_mem
|
||||
val kill_ex = mem_kill_ex || ex_hazard || !(io.dmem.req_rdy && io.dtlb_rdy) && ex_reg_mem_val
|
||||
val ex_kill_dtlb = mem_kill_ex || ex_hazard || !io.dmem.req_rdy
|
||||
@ -600,7 +603,12 @@ class rocketCtrl extends Component
|
||||
((id_ren1 && (id_raddr1 === io.dpath.mem_waddr)) ||
|
||||
(id_ren2 && (id_raddr2 === io.dpath.mem_waddr)));
|
||||
|
||||
val lu_stall = lu_stall_ex || lu_stall_mem;
|
||||
val lu_stall_wb =
|
||||
dcache_miss &&
|
||||
((id_ren1 && (id_raddr1 === io.dpath.wb_waddr)) ||
|
||||
(id_ren2 && (id_raddr2 === io.dpath.wb_waddr)));
|
||||
|
||||
val lu_stall = lu_stall_ex || lu_stall_mem || lu_stall_wb;
|
||||
|
||||
// check for divide and multiply instructions in ex,mem,wb stages
|
||||
val dm_stall_ex =
|
||||
|
@ -194,24 +194,27 @@ class rocketDpath extends Component
|
||||
UFix(0, 5)))));
|
||||
|
||||
// bypass muxes
|
||||
val rs1_mem_lu_bypass = id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr;
|
||||
val id_rs1 =
|
||||
Mux(io.ctrl.div_wb, div_result,
|
||||
Mux(io.ctrl.mul_wb, mul_result,
|
||||
Mux(id_raddr1 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr1 === ex_reg_waddr, ex_wdata,
|
||||
Mux(id_raddr1 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata,
|
||||
Mux(id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr, io.dmem.resp_data,
|
||||
Mux(rs1_mem_lu_bypass, io.dmem.resp_data,
|
||||
Mux(id_raddr1 != UFix(0, 5) && r_dmem_resp_val && id_raddr1 === r_dmem_resp_waddr, io.dmem.resp_data_subword,
|
||||
Mux(id_raddr1 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr1 === wb_reg_waddr, wb_reg_wdata,
|
||||
id_rdata1)))))));
|
||||
|
||||
val rs2_mem_lu_bypass = id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr;
|
||||
val id_rs2 =
|
||||
Mux(id_raddr2 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr2 === ex_reg_waddr, ex_wdata,
|
||||
Mux(id_raddr2 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata,
|
||||
Mux(id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr, io.dmem.resp_data,
|
||||
Mux(rs2_mem_lu_bypass, io.dmem.resp_data,
|
||||
Mux(id_raddr2 != UFix(0, 5) && r_dmem_resp_val && id_raddr2 === r_dmem_resp_waddr, io.dmem.resp_data_subword,
|
||||
Mux(id_raddr2 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr2 === wb_reg_waddr, wb_reg_wdata,
|
||||
id_rdata2)))));
|
||||
|
||||
io.ctrl.mem_lu_bypass := rs1_mem_lu_bypass || rs2_mem_lu_bypass;
|
||||
io.ctrl.inst := id_reg_inst;
|
||||
|
||||
// execute stage
|
||||
@ -251,16 +254,14 @@ class rocketDpath extends Component
|
||||
}
|
||||
|
||||
val ex_alu_in2 =
|
||||
Mux(ex_reg_ctrl_sel_alu2 === A2_0, UFix(0, 64),
|
||||
Mux(ex_reg_ctrl_sel_alu2 === A2_SEXT, ex_sign_extend,
|
||||
Mux(ex_reg_ctrl_sel_alu2 === A2_SPLIT, ex_sign_extend_split,
|
||||
Mux(ex_reg_ctrl_sel_alu2 === A2_RS2, ex_reg_rs2,
|
||||
UFix(0, 64)))));
|
||||
UFix(0, 64)))); // A2_0
|
||||
|
||||
val ex_alu_in1 =
|
||||
Mux(ex_reg_ctrl_sel_alu1 === A1_RS1, ex_reg_rs1,
|
||||
Mux(ex_reg_ctrl_sel_alu1 === A1_LUI, Cat(Fill(32, ex_reg_inst(26)),ex_reg_inst(26,7),UFix(0, 12)),
|
||||
UFix(0, 64)));
|
||||
Cat(Fill(32, ex_reg_inst(26)),ex_reg_inst(26,7),UFix(0, 12))); // A1_LUI
|
||||
|
||||
val ex_alu_shamt =
|
||||
Cat(ex_alu_in2(5) & ex_reg_ctrl_fn_dw === DW_64, ex_alu_in2(4,0)).toUFix;
|
||||
@ -330,15 +331,18 @@ class rocketDpath extends Component
|
||||
// time stamp counter
|
||||
val tsc_reg = Reg(resetVal = UFix(0,64));
|
||||
tsc_reg <== tsc_reg + UFix(1);
|
||||
// instructions retired counter
|
||||
val irt_reg = Reg(resetVal = UFix(0,64));
|
||||
when (mem_reg_valid) { irt_reg <== irt_reg + UFix(1); }
|
||||
|
||||
// writeback select mux
|
||||
ex_wdata :=
|
||||
Mux(ex_reg_ctrl_ll_wb || ex_reg_ctrl_wen_pcr, ex_reg_rs1,
|
||||
Mux(ex_reg_ctrl_sel_wb === WB_PC, Cat(Fill(64-VADDR_BITS, ex_reg_pc_plus4(VADDR_BITS-1)), ex_reg_pc_plus4),
|
||||
Mux(ex_reg_ctrl_sel_wb === WB_ALU, ex_alu_out,
|
||||
Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr,
|
||||
Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg,
|
||||
Bits(0, 64)))))).toBits;
|
||||
Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg,
|
||||
ex_alu_out))))).toBits; // WB_ALU
|
||||
|
||||
// memory stage
|
||||
mem_reg_pc <== ex_reg_pc;
|
||||
|
@ -24,11 +24,11 @@ class rocketDpathALU extends Component
|
||||
// ADD, SUB
|
||||
val sub = (io.fn === FN_SUB) || (io.fn === FN_SLT) || (io.fn === FN_SLTU)
|
||||
val adder_rhs = Mux(sub, ~io.in2, io.in2)
|
||||
val adder_out = (io.in1 + adder_rhs + sub.toUFix)(63,0)
|
||||
val sum = (io.in1 + adder_rhs + sub.toUFix)(63,0)
|
||||
|
||||
// SLT, SLTU
|
||||
val less = Mux(io.in1(63) === io.in2(63), adder_out(63), io.in1(63))
|
||||
val lessu = Mux(io.in1(63) === io.in2(63), adder_out(63), io.in2(63))
|
||||
val less = Mux(io.in1(63) === io.in2(63), sum(63), io.in1(63))
|
||||
val lessu = Mux(io.in1(63) === io.in2(63), sum(63), io.in2(63))
|
||||
|
||||
// SLL, SRL, SRA
|
||||
val sra = (io.fn === FN_SRA)
|
||||
@ -42,8 +42,8 @@ class rocketDpathALU extends Component
|
||||
val out64 = Wire { Bits(64) }
|
||||
switch(io.fn)
|
||||
{
|
||||
is(FN_ADD) { out64 <== adder_out }
|
||||
is(FN_SUB) { out64 <== adder_out }
|
||||
is(FN_ADD) { out64 <== sum }
|
||||
is(FN_SUB) { out64 <== sum }
|
||||
is(FN_SLT) { out64 <== less }
|
||||
is(FN_SLTU) { out64 <== lessu }
|
||||
is(FN_AND) { out64 <== io.in1 & io.in2 }
|
||||
@ -55,7 +55,7 @@ class rocketDpathALU extends Component
|
||||
|
||||
val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31)))
|
||||
io.out := Cat(out_hi, out64(31,0)).toUFix
|
||||
io.adder_out := adder_out
|
||||
io.adder_out := sum
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user