1
0

Fix fubar long-latency writeback control logic

Load miss writebacks happening at the same time as multiplication
wasn't working.  Hopefully this does it.
This commit is contained in:
Andrew Waterman 2014-01-14 04:02:43 -08:00
parent e8486817e6
commit 31060ea8ae
3 changed files with 8 additions and 4 deletions

View File

@ -44,5 +44,6 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Modu
resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n)) resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n))
resp.bits.nack := io.mem.resp.bits.nack && tag_hit resp.bits.nack := io.mem.resp.bits.nack && tag_hit
resp.bits.replay := io.mem.resp.bits.replay && tag_hit resp.bits.replay := io.mem.resp.bits.replay && tag_hit
resp.bits.load_replay_next := io.mem.resp.bits.load_replay_next && tag_hit
} }
} }

View File

@ -538,9 +538,10 @@ class Control(implicit conf: RocketConfiguration) extends Module
(mem_reg_mem_val && io.dmem.xcpt.pf.ld, UInt(10)), (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UInt(10)),
(mem_reg_mem_val && io.dmem.xcpt.pf.st, UInt(11)))) (mem_reg_mem_val && io.dmem.xcpt.pf.st, UInt(11))))
val dcache_kill_mem = mem_reg_wen && io.dmem.resp.bits.load_replay_next // structural hazard on writeback port
val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem
val replay_mem = mem_reg_replay || fpu_kill_mem val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
val killm_common = take_pc_wb || mem_reg_xcpt || !mem_reg_valid val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem
wb_reg_replay := replay_mem && !take_pc_wb wb_reg_replay := replay_mem && !take_pc_wb
@ -573,8 +574,7 @@ class Control(implicit conf: RocketConfiguration) extends Module
} }
val replay_wb_common = val replay_wb_common =
io.dmem.resp.bits.nack || wb_reg_replay || io.dmem.resp.bits.nack || wb_reg_replay || io.dpath.csr_replay
io.dpath.ll_wen && wb_reg_wen || io.dpath.csr_replay
val wb_rocc_val = wb_reg_rocc_val && !replay_wb_common val wb_rocc_val = wb_reg_rocc_val && !replay_wb_common
val replay_wb = replay_wb_common || wb_reg_rocc_val && !io.rocc.cmd.ready val replay_wb = replay_wb_common || wb_reg_rocc_val && !io.rocc.cmd.ready

View File

@ -697,6 +697,7 @@ class HellaCacheReq(implicit val conf: DCacheConfig) extends DCacheBundle {
class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle { class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle {
val nack = Bool() // comes 2 cycles after req.fire val nack = Bool() // comes 2 cycles after req.fire
val replay = Bool() val replay = Bool()
val load_replay_next = Bool() // next cycle, replay and has_data will be true
val typ = Bits(width = 3) val typ = Bits(width = 3)
val has_data = Bool() val has_data = Bool()
val data = Bits(width = conf.databits) val data = Bits(width = conf.databits)
@ -762,6 +763,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends
val s1_recycled = RegEnable(s2_recycle, s1_clk_en) val s1_recycled = RegEnable(s2_recycle, s1_clk_en)
val s1_read = isRead(s1_req.cmd) val s1_read = isRead(s1_req.cmd)
val s1_write = isWrite(s1_req.cmd) val s1_write = isWrite(s1_req.cmd)
val s1_sc = s1_req.cmd === M_XSC
val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd)
val dtlb = Module(new TLB(8)) val dtlb = Module(new TLB(8))
@ -1032,6 +1034,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends
io.cpu.resp.bits.nack := s2_valid && s2_nack io.cpu.resp.bits.nack := s2_valid && s2_nack
io.cpu.resp.bits := s2_req io.cpu.resp.bits := s2_req
io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc
io.cpu.resp.bits.load_replay_next := s1_replay && (s1_read || s1_sc)
io.cpu.resp.bits.replay := s2_replay io.cpu.resp.bits.replay := s2_replay
io.cpu.resp.bits.data := loadgen.word io.cpu.resp.bits.data := loadgen.word
io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte) io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte)