1
0

move store data generation into EX stage

doing so removes it from the critical path of FP store unrecoding.
This commit is contained in:
Andrew Waterman 2012-02-12 01:35:55 -08:00
parent 725190d0ee
commit 50a283d311
4 changed files with 68 additions and 47 deletions

View File

@ -41,6 +41,7 @@ class ioCtrlDpath extends Bundle()
val mem_wen = Bool(OUTPUT);
val wb_wen = Bool(OUTPUT);
val flush_inst = Bool(OUTPUT);
val ex_mem_type = UFix(3,OUTPUT)
// enable/disable interrupts
val irq_enable = Bool(OUTPUT);
val irq_disable = Bool(OUTPUT);
@ -202,7 +203,7 @@ class rocketCtrl extends Component
ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y,N),
FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N),
FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N),
CFLUSH-> List(Y, N,BR_N, REN_Y,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y),
CFLUSH-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y),
MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y,N),
MTPCR-> List(Y, N,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y,Y),
RDTIME-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
@ -244,21 +245,21 @@ class rocketCtrl extends Component
VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N)
VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N),
VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N)
))
val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs
@ -708,6 +709,7 @@ class rocketCtrl extends Component
io.dpath.wb_eret := wb_reg_eret;
io.dpath.irq_disable := wb_reg_inst_di;
io.dpath.irq_enable := wb_reg_inst_ei;
io.dpath.ex_mem_type := ex_reg_mem_type
io.dtlb_val := ex_reg_mem_val;
io.dtlb_kill := mem_reg_kill;

View File

@ -301,7 +301,7 @@ class rocketDpath extends Component
// D$ request interface (registered inside D$ module)
// other signals (req_val, req_rdy) connect to control module
io.dmem.req_addr := ex_effective_address.toUFix;
io.dmem.req_data := (if (HAVE_FPU) Mux(io.ctrl.ex_fp_val, io.fpu.store_data, ex_reg_rs2) else ex_reg_rs2)
io.dmem.req_data := (if (HAVE_FPU) Mux(io.ctrl.ex_fp_val, io.fpu.store_data, mem_reg_rs2) else mem_reg_rs2)
io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val, io.ctrl.ex_ext_mem_val).toUFix
// processor control regfile read
@ -342,11 +342,16 @@ class rocketDpath extends Component
Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg,
Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg,
ex_alu_out)))).toBits; // WB_ALU
// subword store data generation
val storegen = new StoreDataGen
storegen.io.typ := io.ctrl.ex_mem_type
storegen.io.din := ex_reg_rs2
// memory stage
mem_reg_pc := ex_reg_pc;
mem_reg_inst := ex_reg_inst
mem_reg_rs2 := ex_reg_rs2
mem_reg_rs2 := storegen.io.dout
mem_reg_waddr := ex_reg_waddr;
mem_reg_wdata := ex_wdata;
mem_reg_raddr1 := ex_reg_raddr1

View File

@ -14,6 +14,9 @@ class rocketFPUDecoder extends Component
val ren1 = Bool(OUTPUT)
val ren2 = Bool(OUTPUT)
val ren3 = Bool(OUTPUT)
val fromint = Bool(OUTPUT)
val toint = Bool(OUTPUT)
val store = Bool(OUTPUT)
}
// val fp =
// ListLookup(io.dpath.inst,
@ -87,22 +90,27 @@ class rocketFPUDecoder extends Component
val N = Bool(false)
val Y = Bool(true)
val X = Bool(false)
val decoder = ListLookup(io.inst,
List (N, N, N, N, N),
Array(FLW -> List(Y, Y, N, N, N),
FLD -> List(Y, Y, N, N, N),
FSW -> List(Y, N, N, Y, N),
FSD -> List(Y, N, N, Y, N),
MTFSR -> List(Y, N, N, N, N),
MFFSR -> List(Y, N, N, N, N)
List (N,X,X,X,X,X,X,X,X),
Array(FLW -> List(Y,Y,N,N,N,Y,N,N,N),
FLD -> List(Y,Y,N,N,N,N,N,N,N),
FSW -> List(Y,N,N,Y,N,Y,N,N,Y),
FSD -> List(Y,N,N,Y,N,N,N,N,Y),
MTFSR -> List(Y,N,N,N,N,X,N,Y,N),
MFFSR -> List(Y,N,N,N,N,X,N,Y,N)
))
val valid :: wen :: ren1 :: ren2 :: ren3 :: Nil = decoder
val valid :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: store :: Nil = decoder
io.valid := valid.toBool
io.wen := wen.toBool
io.ren1 := ren1.toBool
io.ren2 := ren2.toBool
io.ren3 := ren3.toBool
io.single := single.toBool
io.fromint := fromint.toBool
io.toint := toint.toBool
io.store := store.toBool
}
class ioDpathFPU extends Bundle {
@ -129,6 +137,9 @@ class rocketFPU extends Component
ex_reg_inst := io.req_inst
}
val fpdec = new rocketFPUDecoder
fpdec.io.inst := ex_reg_inst
// load response
val dmem_resp_val_fpu = io.dmem.resp_val && io.dmem.resp_tag(0).toBool
val load_wb = Reg(dmem_resp_val_fpu, resetVal = Bool(false))
@ -147,5 +158,18 @@ class rocketFPU extends Component
io.req_ready := Bool(true)
io.dpath.store_data := regfile(ex_reg_inst(21,17))
val ex_rs1 = regfile(ex_reg_inst(16,12))
val ex_rs2 = regfile(ex_reg_inst(21,17))
val ex_rs3 = regfile(ex_reg_inst(26,22))
val fp_toint_data = Reg() { Bits() }
when (fpdec.io.toint) {
fp_toint_data := ex_rs1
}
when (fpdec.io.store) {
fp_toint_data := ex_rs2
}
io.dpath.store_data := fp_toint_data
}

View File

@ -673,7 +673,7 @@ class HellaCacheDM extends Component {
val r_cpu_req_cmd = Reg() { Bits() }
val r_cpu_req_type = Reg() { Bits() }
val r_cpu_req_tag = Reg() { Bits() }
val r_cpu_req_data = Reg() { Bits() }
val r_amo_replay_data = Reg() { Bits() }
val p_store_valid = Reg(resetVal = Bool(false))
val p_store_data = Reg() { Bits() }
@ -705,16 +705,14 @@ class HellaCacheDM extends Component {
r_cpu_req_cmd := io.cpu.req_cmd
r_cpu_req_type := io.cpu.req_type
r_cpu_req_tag := io.cpu.req_tag
when (req_write) {
r_cpu_req_data := io.cpu.req_data
}
}
when (replay_amo_val) {
r_cpu_req_idx := Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset)
r_cpu_req_cmd := replayer.io.data_req.bits.cmd
r_cpu_req_type := replayer.io.data_req.bits.typ
r_cpu_req_data := replayer.io.data_req.bits.data
r_amo_replay_data := replayer.io.data_req.bits.data
}
val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data)
// refill counter
val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES)))
@ -813,15 +811,12 @@ class HellaCacheDM extends Component {
meta.io.state_req.bits.data.dirty := tag_match
// pending store data, also used for AMO RHS
val storegen = new StoreDataGen
val amoalu = new AMOALU
storegen.io.typ := r_cpu_req_type
storegen.io.din := r_cpu_req_data
when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) {
p_store_idx := r_cpu_req_idx
p_store_type := r_cpu_req_type
p_store_cmd := r_cpu_req_cmd
p_store_data := storegen.io.dout
p_store_data := cpu_req_data
}
when (p_amo) {
p_store_data := amoalu.io.out
@ -845,7 +840,7 @@ class HellaCacheDM extends Component {
meta_arb.io.in(1).valid := mshr.io.meta_req.valid
mshr.io.replay <> replayer.io.replay
replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy
replayer.io.sdq_enq.bits := storegen.io.dout
replayer.io.sdq_enq.bits := cpu_req_data
data_arb.io.in(0).bits.idx := mshr.io.mem_resp_idx
// replays
@ -952,7 +947,7 @@ class HellaCacheAssoc extends Component {
val r_cpu_req_cmd = Reg() { Bits() }
val r_cpu_req_type = Reg() { Bits() }
val r_cpu_req_tag = Reg() { Bits() }
val r_cpu_req_data = Reg() { Bits() }
val r_amo_replay_data = Reg() { Bits() }
val p_store_valid = Reg(resetVal = Bool(false))
val p_store_data = Reg() { Bits() }
@ -985,16 +980,14 @@ class HellaCacheAssoc extends Component {
r_cpu_req_cmd := io.cpu.req_cmd
r_cpu_req_type := io.cpu.req_type
r_cpu_req_tag := io.cpu.req_tag
when (req_write) {
r_cpu_req_data := io.cpu.req_data
}
}
when (replay_amo_val) {
r_cpu_req_idx := Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset)
r_cpu_req_cmd := replayer.io.data_req.bits.cmd
r_cpu_req_type := replayer.io.data_req.bits.typ
r_cpu_req_data := replayer.io.data_req.bits.data
r_amo_replay_data := replayer.io.data_req.bits.data
}
val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data)
// refill counter
val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES)))
@ -1107,16 +1100,13 @@ class HellaCacheAssoc extends Component {
meta.io.state_req.bits.way_en := Mux(clear_valid, replaced_way_oh, hit_way_oh)
// pending store data, also used for AMO RHS
val storegen = new StoreDataGen
val amoalu = new AMOALU
storegen.io.typ := r_cpu_req_type
storegen.io.din := r_cpu_req_data
when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) {
p_store_idx := r_cpu_req_idx
p_store_type := r_cpu_req_type
p_store_cmd := r_cpu_req_cmd
p_store_way_oh := Mux(r_replay_amo, replayer.io.way_oh, hit_way_oh)
p_store_data := storegen.io.dout
p_store_data := cpu_req_data
}
when (p_amo) {
p_store_data := amoalu.io.out
@ -1139,7 +1129,7 @@ class HellaCacheAssoc extends Component {
mshr.io.meta_req <> meta_arb.io.in(1)
mshr.io.replay <> replayer.io.replay
replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy
replayer.io.sdq_enq.bits := storegen.io.dout
replayer.io.sdq_enq.bits := cpu_req_data
data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx
data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh
replacer.io.pick_new_way := !io.cpu.req_kill && mshr.io.req_val && mshr.io.req_rdy