diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index c9e1675e..353cdde9 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -90,8 +90,14 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val q_depth = if (rational) (2 min maxUncachedInFlight-1) else 0 val tl_out_a = Wire(tl_out.a) tl_out.a <> (if (q_depth == 0) tl_out_a else Queue(tl_out_a, q_depth, flow = true)) - val tl_out_c = Wire(tl_out.c) - tl_out.c <> (if (cacheParams.acquireBeforeRelease) Queue(tl_out_c, cacheDataBeats, flow = true) else tl_out_c) + val (tl_out_c, release_queue_empty) = + if (cacheParams.acquireBeforeRelease) { + val q = Module(new Queue(tl_out.c.bits, cacheDataBeats, flow = true)) + tl_out.c <> q.io.deq + (q.io.enq, q.io.count === 0) + } else { + (tl_out.c, true.B) + } val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) val s1_probe = Reg(next=tl_out.b.fire(), init=Bool(false)) @@ -116,6 +122,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_retry :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 8) val cached_grant_wait = Reg(init=Bool(false)) val release_ack_wait = Reg(init=Bool(false)) + val can_acquire_before_release = !release_ack_wait && release_queue_empty val release_state = Reg(init=s_ready) val any_pstore_valid = Wire(Bool()) val inWriteback = release_state.isOneOf(s_voluntary_writeback, s_probe_rep_dirty) @@ -238,9 +245,9 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s2_data_corrected = (s2_data_decoded.map(_.corrected): Seq[UInt]).asUInt val s2_data_uncorrected = (s2_data_decoded.map(_.uncorrected): Seq[UInt]).asUInt val s2_valid_hit_pre_data_ecc = s2_valid_masked && s2_readwrite && !s2_meta_error && s2_hit - val s2_valid_data_error = s2_valid_hit_pre_data_ecc && s2_data_error && !release_ack_wait + val s2_valid_data_error = s2_valid_hit_pre_data_ecc && s2_data_error && can_acquire_before_release val s2_valid_hit = s2_valid_hit_pre_data_ecc && !s2_data_error && (!s2_waw_hazard || s2_store_merge) - val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_meta_error && !s2_hit && !release_ack_wait + val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_meta_error && !s2_hit && can_acquire_before_release val s2_valid_cached_miss = s2_valid_miss && !s2_uncached && !uncachedInFlight.asUInt.orR val s2_victimize = Bool(!usingDataScratchpad) && (s2_valid_cached_miss || s2_valid_data_error || s2_flush_valid) val s2_valid_uncached_pending = s2_valid_miss && s2_uncached && !uncachedInFlight.asUInt.andR @@ -497,8 +504,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { metaArb.io.in(6).bits.data := metaArb.io.in(4).bits.data // release - val (c_first, c_last, releaseDone, c_count) = edge.count(tl_out.c) - val releaseRejected = tl_out.c.valid && !tl_out.c.ready + val (c_first, c_last, releaseDone, c_count) = edge.count(tl_out_c) + val releaseRejected = tl_out_c.valid && !tl_out_c.ready val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire()) val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected) val releaseDataBeat = Cat(UInt(0), c_count) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid)) @@ -507,8 +514,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val cleanReleaseMessage = edge.ProbeAck(b = probe_bits, reportPermissions = s2_report_param) val dirtyReleaseMessage = edge.ProbeAck(b = probe_bits, reportPermissions = s2_report_param, data = 0.U) - tl_out.c.valid := s2_release_data_valid - tl_out.c.bits := nackResponseMessage + tl_out_c.valid := s2_release_data_valid + tl_out_c.bits := nackResponseMessage val newCoh = Wire(init = probeNewCoh) releaseWay := s2_probe_way @@ -524,11 +531,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { }.elsewhen (s2_prb_ack_data) { release_state := s_probe_rep_dirty }.elsewhen (s2_probe_state.isValid()) { - tl_out.c.valid := true - tl_out.c.bits := cleanReleaseMessage + tl_out_c.valid := true + tl_out_c.bits := cleanReleaseMessage release_state := Mux(releaseDone, s_probe_write_meta, s_probe_rep_clean) }.otherwise { - tl_out.c.valid := true + tl_out_c.valid := true probeNack := !releaseDone release_state := Mux(releaseDone, s_ready, s_probe_rep_miss) } @@ -543,21 +550,21 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { } } when (release_state === s_probe_rep_miss) { - tl_out.c.valid := true + tl_out_c.valid := true when (releaseDone) { release_state := s_ready } } when (release_state === s_probe_rep_clean) { - tl_out.c.valid := true - tl_out.c.bits := cleanReleaseMessage + tl_out_c.valid := true + tl_out_c.bits := cleanReleaseMessage when (releaseDone) { release_state := s_probe_write_meta } } when (release_state === s_probe_rep_dirty) { - tl_out.c.bits := dirtyReleaseMessage + tl_out_c.bits := dirtyReleaseMessage when (releaseDone) { release_state := s_probe_write_meta } } when (release_state.isOneOf(s_voluntary_writeback, s_voluntary_write_meta)) { if (edge.manager.anySupportAcquireB) - tl_out.c.bits := edge.Release(fromSource = 0.U, + tl_out_c.bits := edge.Release(fromSource = 0.U, toAddress = 0.U, lgSize = lgCacheBlockBytes, shrinkPermissions = s2_shrink_param, @@ -565,23 +572,23 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { newCoh := voluntaryNewCoh releaseWay := s2_victim_way when (releaseDone) { release_state := s_voluntary_write_meta } - when (tl_out.c.fire() && c_first) { release_ack_wait := true } + when (tl_out_c.fire() && c_first) { release_ack_wait := true } } - tl_out.c.bits.address := probe_bits.address - tl_out.c.bits.data := s2_data_corrected + tl_out_c.bits.address := probe_bits.address + tl_out_c.bits.data := s2_data_corrected dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles dataArb.io.in(2).bits.write := false - dataArb.io.in(2).bits.addr := tl_out.c.bits.address | (releaseDataBeat(log2Up(refillCycles)-1,0) << rowOffBits) + dataArb.io.in(2).bits.addr := tl_out_c.bits.address | (releaseDataBeat(log2Up(refillCycles)-1,0) << rowOffBits) dataArb.io.in(2).bits.wordMask := ~UInt(0, rowBytes / wordBytes) dataArb.io.in(2).bits.way_en := ~UInt(0, nWays) metaArb.io.in(4).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta) metaArb.io.in(4).bits.write := true metaArb.io.in(4).bits.way_en := releaseWay - metaArb.io.in(4).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, tl_out.c.bits.address(idxMSB, 0)) + metaArb.io.in(4).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, tl_out_c.bits.address(idxMSB, 0)) metaArb.io.in(4).bits.data.coh := newCoh - metaArb.io.in(4).bits.data.tag := tl_out.c.bits.address >> untagBits + metaArb.io.in(4).bits.data.tag := tl_out_c.bits.address >> untagBits when (metaArb.io.in(4).fire()) { release_state := s_ready } // cached response @@ -690,7 +697,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { // performance events io.cpu.perf.acquire := edge.done(tl_out_a) - io.cpu.perf.release := edge.done(tl_out.c) + io.cpu.perf.release := edge.done(tl_out_c) io.cpu.perf.tlbMiss := io.ptw.req.fire() def encodeData(x: UInt) = x.grouped(eccBits).map(dECC.encode(_)).asUInt