diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index d0b64d9f..579ea6f8 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -21,20 +21,26 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module for (i <- 1 until n) io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid - io.mem.req.bits := io.requestor(n-1).req.bits - io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UInt(n-1, log2Up(n))) - for (i <- n-2 to 0 by -1) { + for (i <- n-1 to 0 by -1) { val req = io.requestor(i).req - when (req.valid) { + def connect_s0() = { io.mem.req.bits.cmd := req.bits.cmd io.mem.req.bits.typ := req.bits.typ io.mem.req.bits.addr := req.bits.addr io.mem.req.bits.phys := req.bits.phys io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n))) } - when (r_valid(i)) { - io.mem.req.bits.kill := req.bits.kill - io.mem.req.bits.data := req.bits.data + def connect_s1() = { + io.mem.s1_kill := io.requestor(i).s1_kill + io.mem.s1_data := io.requestor(i).s1_data + } + + if (i == n-1) { + connect_s0() + connect_s1() + } else { + when (req.valid) { connect_s0() } + when (r_valid(i)) { connect_s1() } } } @@ -44,10 +50,9 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module resp.valid := io.mem.resp.valid && tag_hit io.requestor(i).xcpt := io.mem.xcpt io.requestor(i).ordered := io.mem.ordered + io.requestor(i).s2_nack := io.mem.s2_nack && tag_hit resp.bits := io.mem.resp.bits resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) - resp.bits.nack := io.mem.resp.bits.nack && tag_hit - resp.bits.replay := io.mem.resp.bits.replay && tag_hit io.requestor(i).replay_next.valid := io.mem.replay_next.valid && io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index e0d7a1fb..602b13e0 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -104,10 +104,8 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa icache.io.req.valid := !stall && !s0_same_block icache.io.req.bits.idx := io.cpu.npc icache.io.invalidate := io.cpu.invalidate - icache.io.req.bits.ppn := tlb.io.resp.ppn - icache.io.req.bits.kill := io.cpu.req.valid || - tlb.io.resp.miss || tlb.io.resp.xcpt_if || - icmiss || io.ptw.invalidate + icache.io.s1_ppn := tlb.io.resp.ppn + icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.ptw.invalidate io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid) io.cpu.resp.bits.pc := s2_pc diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index df6c6c36..94c7822e 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -16,8 +16,6 @@ trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) { val idx = UInt(width = pgIdxBits) - val ppn = UInt(width = ppnBits) // delayed one cycle - val kill = Bool() // delayed one cycle } class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters { @@ -28,6 +26,9 @@ class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1Cache class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters { val io = new Bundle { val req = Valid(new ICacheReq).flip + val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req + val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req + val resp = Decoupled(new ICacheResp) val invalidate = Bool(INPUT) val mem = new ClientUncachedTileLinkIO @@ -47,18 +48,18 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara val s1_valid = Reg(init=Bool(false)) val s1_pgoff = Reg(UInt(width = pgIdxBits)) - val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUInt + val s1_addr = Cat(io.s1_ppn, s1_pgoff).toUInt val s1_tag = s1_addr(tagBits+untagBits-1,untagBits) val s0_valid = io.req.valid || s1_valid && stall val s0_pgoff = Mux(s1_valid && stall, s1_pgoff, io.req.bits.idx) - s1_valid := io.req.valid && rdy || s1_valid && stall && !io.req.bits.kill + s1_valid := io.req.valid && rdy || s1_valid && stall && !io.s1_kill when (io.req.valid && rdy) { s1_pgoff := io.req.bits.idx } - val out_valid = s1_valid && !io.req.bits.kill && state === s_ready + val out_valid = s1_valid && !io.s1_kill && state === s_ready val s1_idx = s1_addr(untagBits-1,blockOffBits) val s1_offset = s1_addr(blockOffBits-1,0) val s1_hit = out_valid && s1_any_tag_hit diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9f691008..83d7b9da 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -33,6 +33,11 @@ trait HasL1HellaCacheParameters extends HasL1CacheParameters { val nMSHRs = p(NMSHRs) val nIOMSHRs = 1 val lrscCycles = p(LRSCCycles) + + require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed + require(isPow2(nSets)) + require(rowBits <= outerDataBits) + require(untagBits <= pgIdxBits) } abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module @@ -63,7 +68,6 @@ trait HasMissInfo extends HasL1HellaCacheParameters { class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p) with HasCoreMemOp { - val kill = Bool() val phys = Bool() } @@ -72,7 +76,6 @@ class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) w class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p) with HasCoreMemOp with HasCoreData { - val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() val has_data = Bool() val data_word_bypass = Bits(width = coreDataBits) @@ -92,6 +95,10 @@ class HellaCacheExceptions extends Bundle { // interface between D$ and processor/DTLB class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { val req = Decoupled(new HellaCacheReq) + val s1_kill = Bool(OUTPUT) // kill previous cycle's req + val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req + val s2_nack = Bool(INPUT) // req from two cycles ago is rejected + val resp = Valid(new HellaCacheResp).flip val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip val xcpt = (new HellaCacheExceptions).asInput @@ -207,8 +214,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.resp.bits.has_data := isRead(req.cmd) io.resp.bits.data := loadgen.data | req_cmd_sc io.resp.bits.store_data := req.data - io.resp.bits.nack := Bool(false) - io.resp.bits.replay := io.resp.valid + io.resp.bits.replay := Bool(true) when (io.req.fire()) { req := io.req.bits @@ -764,11 +770,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val mem = new ClientTileLinkIO } - require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed - require(isPow2(nSets)) require(isPow2(nWays)) // TODO: relax this - require(rowBits <= outerDataBits) - require(untagBits <= pgIdxBits) val wb = Module(new WritebackUnit) val prober = Module(new ProbeUnit) @@ -777,7 +779,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.cpu.req.ready := Bool(true) val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) val s1_req = Reg(io.cpu.req.bits) - val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill + val s1_valid_masked = s1_valid && !io.cpu.s1_kill val s1_replay = Reg(init=Bool(false)) val s1_clk_en = Reg(Bool()) @@ -826,12 +828,11 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) when (s1_clk_en) { - s2_req.kill := s1_req.kill s2_req.typ := s1_req.typ s2_req.phys := s1_req.phys s2_req.addr := s1_addr when (s1_write) { - s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.req.bits.data) + s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data) } when (s1_recycled) { s2_req.data := s1_req.data } s2_req.tag := s1_req.tag @@ -1075,7 +1076,6 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { cache_resp.bits.has_data := isRead(s2_req.cmd) cache_resp.bits.data := loadgen.data | s2_sc_fail cache_resp.bits.store_data := s2_req.data - cache_resp.bits.nack := s2_valid && s2_nack cache_resp.bits.replay := s2_replay val uncache_resp = Wire(Valid(new HellaCacheResp)) @@ -1083,6 +1083,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { uncache_resp.valid := mshrs.io.resp.valid mshrs.io.resp.ready := Reg(next= !(s1_valid || s1_replay)) + io.cpu.s2_nack := s2_valid && s2_nack io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp) io.cpu.resp.bits.data_word_bypass := loadgen.wordData io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid @@ -1111,17 +1112,15 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module req_arb.io.in(1).bits := io.requestor.req.bits io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready - val s2_nack = io.cache.resp.bits.nack - val s3_nack = Reg(next=s2_nack) - val s0_req_fire = io.cache.req.fire() val s1_req_fire = Reg(next=s0_req_fire) val s2_req_fire = Reg(next=s1_req_fire) + val s3_nack = Reg(next=io.cache.s2_nack) io.cache.req <> req_arb.io.out - io.cache.req.bits.kill := s2_nack io.cache.req.bits.phys := Bool(true) - io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) + io.cache.s1_kill := io.cache.s2_nack + io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) /* replay queues: replayq1 holds the older request. @@ -1147,13 +1146,13 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module replayq2.io.enq.bits.data := io.cache.resp.bits.store_data replayq2.io.deq.ready := Bool(false) - when (s2_nack) { + when (io.cache.s2_nack) { replayq1.io.enq.valid := Bool(true) replaying_cmb := Bool(true) } // when replaying request got sunk into the d$ - when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !s2_nack) { + when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !io.cache.s2_nack) { // see if there's a stashed request in replayq2 when (replayq2.io.deq.valid) { replayq1.io.enq.valid := Bool(true) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 6ec57d5c..09b2328d 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -118,8 +118,8 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD) io.mem.req.bits.typ := MT_D io.mem.req.bits.addr := pte_addr - io.mem.req.bits.kill := Bool(false) - io.mem.req.bits.data := pte_wdata.toBits + io.mem.s1_data := pte_wdata.toBits + io.mem.s1_kill := Bool(false) val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count) @@ -152,7 +152,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { } } is (s_wait) { - when (io.mem.resp.bits.nack) { + when (io.mem.s2_nack) { state := s_req } when (io.mem.resp.valid) { @@ -172,7 +172,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { } } is (s_wait_dirty) { - when (io.mem.resp.bits.nack) { + when (io.mem.s2_nack) { state := s_set_dirty } when (io.mem.resp.valid) { diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 351c1244..81778134 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -376,7 +376,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc - val replay_wb_common = io.dmem.resp.bits.nack || wb_reg_replay + val replay_wb_common = io.dmem.s2_nack || wb_reg_replay val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt @@ -388,9 +388,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool - val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt()(5,1) + val dmem_resp_waddr = io.dmem.resp.bits.tag >> 1 val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data - val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data + val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd) val ll_wdata = Wire(init = div.io.resp.bits.data) @@ -532,14 +532,15 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.fpu.dmem_resp_tag := dmem_resp_waddr io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem - io.dmem.req.bits.kill := killm_common || mem_xcpt + val ex_dcache_tag = Cat(ex_waddr, ex_ctrl.fp) + require(coreDCacheReqTagBits >= ex_dcache_tag.getWidth) + io.dmem.req.bits.tag := ex_dcache_tag io.dmem.req.bits.cmd := ex_ctrl.mem_cmd io.dmem.req.bits.typ := ex_ctrl.mem_type io.dmem.req.bits.phys := Bool(false) io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out) - io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp) - io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) - require(coreDCacheReqTagBits >= 6) + io.dmem.s1_kill := killm_common || mem_xcpt + io.dmem.s1_data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) io.dmem.invalidate_lr := wb_xcpt io.rocc.cmd.valid := wb_rocc_val