From 50a283d311d3ec3162315d1b1f43ed10f51f54b8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 12 Feb 2012 01:35:55 -0800 Subject: [PATCH] move store data generation into EX stage doing so removes it from the critical path of FP store unrecoding. --- rocket/src/main/scala/ctrl.scala | 34 +++++++++++----------- rocket/src/main/scala/dpath.scala | 9 ++++-- rocket/src/main/scala/fpu.scala | 42 ++++++++++++++++++++++------ rocket/src/main/scala/nbdcache.scala | 30 +++++++------------- 4 files changed, 68 insertions(+), 47 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 29c374b2..508c82b2 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -41,6 +41,7 @@ class ioCtrlDpath extends Bundle() val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); val flush_inst = Bool(OUTPUT); + val ex_mem_type = UFix(3,OUTPUT) // enable/disable interrupts val irq_enable = Bool(OUTPUT); val irq_disable = Bool(OUTPUT); @@ -202,7 +203,7 @@ class rocketCtrl extends Component ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y,N), FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N), FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N), - CFLUSH-> List(Y, N,BR_N, REN_Y,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), + CFLUSH-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y,N), MTPCR-> List(Y, N,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y,Y), RDTIME-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), @@ -244,21 +245,21 @@ class rocketCtrl extends Component VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N) + VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N) )) val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs @@ -708,6 +709,7 @@ class rocketCtrl extends Component io.dpath.wb_eret := wb_reg_eret; io.dpath.irq_disable := wb_reg_inst_di; io.dpath.irq_enable := wb_reg_inst_ei; + io.dpath.ex_mem_type := ex_reg_mem_type io.dtlb_val := ex_reg_mem_val; io.dtlb_kill := mem_reg_kill; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 2815fc48..acd8de2b 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -301,7 +301,7 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module io.dmem.req_addr := ex_effective_address.toUFix; - io.dmem.req_data := (if (HAVE_FPU) Mux(io.ctrl.ex_fp_val, io.fpu.store_data, ex_reg_rs2) else ex_reg_rs2) + io.dmem.req_data := (if (HAVE_FPU) Mux(io.ctrl.ex_fp_val, io.fpu.store_data, mem_reg_rs2) else mem_reg_rs2) io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val, io.ctrl.ex_ext_mem_val).toUFix // processor control regfile read @@ -342,11 +342,16 @@ class rocketDpath extends Component Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, ex_alu_out)))).toBits; // WB_ALU + + // subword store data generation + val storegen = new StoreDataGen + storegen.io.typ := io.ctrl.ex_mem_type + storegen.io.din := ex_reg_rs2 // memory stage mem_reg_pc := ex_reg_pc; mem_reg_inst := ex_reg_inst - mem_reg_rs2 := ex_reg_rs2 + mem_reg_rs2 := storegen.io.dout mem_reg_waddr := ex_reg_waddr; mem_reg_wdata := ex_wdata; mem_reg_raddr1 := ex_reg_raddr1 diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index b3f78003..878c0480 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -14,6 +14,9 @@ class rocketFPUDecoder extends Component val ren1 = Bool(OUTPUT) val ren2 = Bool(OUTPUT) val ren3 = Bool(OUTPUT) + val fromint = Bool(OUTPUT) + val toint = Bool(OUTPUT) + val store = Bool(OUTPUT) } // val fp = // ListLookup(io.dpath.inst, @@ -87,22 +90,27 @@ class rocketFPUDecoder extends Component val N = Bool(false) val Y = Bool(true) + val X = Bool(false) val decoder = ListLookup(io.inst, - List (N, N, N, N, N), - Array(FLW -> List(Y, Y, N, N, N), - FLD -> List(Y, Y, N, N, N), - FSW -> List(Y, N, N, Y, N), - FSD -> List(Y, N, N, Y, N), - MTFSR -> List(Y, N, N, N, N), - MFFSR -> List(Y, N, N, N, N) + List (N,X,X,X,X,X,X,X,X), + Array(FLW -> List(Y,Y,N,N,N,Y,N,N,N), + FLD -> List(Y,Y,N,N,N,N,N,N,N), + FSW -> List(Y,N,N,Y,N,Y,N,N,Y), + FSD -> List(Y,N,N,Y,N,N,N,N,Y), + MTFSR -> List(Y,N,N,N,N,X,N,Y,N), + MFFSR -> List(Y,N,N,N,N,X,N,Y,N) )) - val valid :: wen :: ren1 :: ren2 :: ren3 :: Nil = decoder + val valid :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: store :: Nil = decoder io.valid := valid.toBool io.wen := wen.toBool io.ren1 := ren1.toBool io.ren2 := ren2.toBool io.ren3 := ren3.toBool + io.single := single.toBool + io.fromint := fromint.toBool + io.toint := toint.toBool + io.store := store.toBool } class ioDpathFPU extends Bundle { @@ -129,6 +137,9 @@ class rocketFPU extends Component ex_reg_inst := io.req_inst } + val fpdec = new rocketFPUDecoder + fpdec.io.inst := ex_reg_inst + // load response val dmem_resp_val_fpu = io.dmem.resp_val && io.dmem.resp_tag(0).toBool val load_wb = Reg(dmem_resp_val_fpu, resetVal = Bool(false)) @@ -147,5 +158,18 @@ class rocketFPU extends Component io.req_ready := Bool(true) - io.dpath.store_data := regfile(ex_reg_inst(21,17)) + val ex_rs1 = regfile(ex_reg_inst(16,12)) + val ex_rs2 = regfile(ex_reg_inst(21,17)) + val ex_rs3 = regfile(ex_reg_inst(26,22)) + + val fp_toint_data = Reg() { Bits() } + + when (fpdec.io.toint) { + fp_toint_data := ex_rs1 + } + when (fpdec.io.store) { + fp_toint_data := ex_rs2 + } + + io.dpath.store_data := fp_toint_data } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index d72fbdfb..a7cd6748 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -673,7 +673,7 @@ class HellaCacheDM extends Component { val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() } val r_cpu_req_tag = Reg() { Bits() } - val r_cpu_req_data = Reg() { Bits() } + val r_amo_replay_data = Reg() { Bits() } val p_store_valid = Reg(resetVal = Bool(false)) val p_store_data = Reg() { Bits() } @@ -705,16 +705,14 @@ class HellaCacheDM extends Component { r_cpu_req_cmd := io.cpu.req_cmd r_cpu_req_type := io.cpu.req_type r_cpu_req_tag := io.cpu.req_tag - when (req_write) { - r_cpu_req_data := io.cpu.req_data - } } when (replay_amo_val) { r_cpu_req_idx := Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) r_cpu_req_cmd := replayer.io.data_req.bits.cmd r_cpu_req_type := replayer.io.data_req.bits.typ - r_cpu_req_data := replayer.io.data_req.bits.data + r_amo_replay_data := replayer.io.data_req.bits.data } + val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) // refill counter val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) @@ -813,15 +811,12 @@ class HellaCacheDM extends Component { meta.io.state_req.bits.data.dirty := tag_match // pending store data, also used for AMO RHS - val storegen = new StoreDataGen val amoalu = new AMOALU - storegen.io.typ := r_cpu_req_type - storegen.io.din := r_cpu_req_data when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { p_store_idx := r_cpu_req_idx p_store_type := r_cpu_req_type p_store_cmd := r_cpu_req_cmd - p_store_data := storegen.io.dout + p_store_data := cpu_req_data } when (p_amo) { p_store_data := amoalu.io.out @@ -845,7 +840,7 @@ class HellaCacheDM extends Component { meta_arb.io.in(1).valid := mshr.io.meta_req.valid mshr.io.replay <> replayer.io.replay replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy - replayer.io.sdq_enq.bits := storegen.io.dout + replayer.io.sdq_enq.bits := cpu_req_data data_arb.io.in(0).bits.idx := mshr.io.mem_resp_idx // replays @@ -952,7 +947,7 @@ class HellaCacheAssoc extends Component { val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() } val r_cpu_req_tag = Reg() { Bits() } - val r_cpu_req_data = Reg() { Bits() } + val r_amo_replay_data = Reg() { Bits() } val p_store_valid = Reg(resetVal = Bool(false)) val p_store_data = Reg() { Bits() } @@ -985,16 +980,14 @@ class HellaCacheAssoc extends Component { r_cpu_req_cmd := io.cpu.req_cmd r_cpu_req_type := io.cpu.req_type r_cpu_req_tag := io.cpu.req_tag - when (req_write) { - r_cpu_req_data := io.cpu.req_data - } } when (replay_amo_val) { r_cpu_req_idx := Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) r_cpu_req_cmd := replayer.io.data_req.bits.cmd r_cpu_req_type := replayer.io.data_req.bits.typ - r_cpu_req_data := replayer.io.data_req.bits.data + r_amo_replay_data := replayer.io.data_req.bits.data } + val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) // refill counter val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) @@ -1107,16 +1100,13 @@ class HellaCacheAssoc extends Component { meta.io.state_req.bits.way_en := Mux(clear_valid, replaced_way_oh, hit_way_oh) // pending store data, also used for AMO RHS - val storegen = new StoreDataGen val amoalu = new AMOALU - storegen.io.typ := r_cpu_req_type - storegen.io.din := r_cpu_req_data when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { p_store_idx := r_cpu_req_idx p_store_type := r_cpu_req_type p_store_cmd := r_cpu_req_cmd p_store_way_oh := Mux(r_replay_amo, replayer.io.way_oh, hit_way_oh) - p_store_data := storegen.io.dout + p_store_data := cpu_req_data } when (p_amo) { p_store_data := amoalu.io.out @@ -1139,7 +1129,7 @@ class HellaCacheAssoc extends Component { mshr.io.meta_req <> meta_arb.io.in(1) mshr.io.replay <> replayer.io.replay replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy - replayer.io.sdq_enq.bits := storegen.io.dout + replayer.io.sdq_enq.bits := cpu_req_data data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh replacer.io.pick_new_way := !io.cpu.req_kill && mshr.io.req_val && mshr.io.req_rdy