From 4807ce7cedae3a3394f9aae9349a8faa67011af0 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Mon, 24 Apr 2017 23:28:04 -0700 Subject: [PATCH] dcache: put a flow Q to absorb back-pressure without restarting pipeline (#701) * dcache: put a flow Q to absorb back-pressure without restarting pipeline When used with a RationalCrossing, pipelined MMIO does not come out cleanly. The first beat works, but if the second beat gets stalled, the pipeline is restarted. This is a quick hacky test to absorb the beats. Perhaps a better fix can be made to achieve the same effect. * dcache: provision as few stages as possible --- src/main/scala/rocket/DCache.scala | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index 72b68fac..868c31c5 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -11,6 +11,7 @@ import uncore.tilelink2._ import uncore.util._ import util._ import TLMessages._ +import scala.math.min class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { val addr = Bits(width = untagBits) @@ -57,6 +58,19 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { data.io.req <> dataArb.io.out dataArb.io.out.ready := true + val rational = p(coreplex.RocketCrossing) match { + case coreplex.SynchronousCrossing(_) => true + case _ => false + } + + val tl_out_a = Wire(tl_out.a) + val q_depth = if (rational) min(2, maxUncachedInFlight-1) else 0 + if (q_depth <= 0) { + tl_out.a <> tl_out_a + } else { + tl_out.a <> Queue(tl_out_a, q_depth, flow = true, pipe = true) + } + val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) val s1_probe = Reg(next=tl_out.b.fire(), init=Bool(false)) val probe_bits = RegEnable(tl_out.b.bits, tl_out.b.fire()) // TODO has data now :( @@ -176,7 +190,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val (s2_prb_ack_data, s2_report_param, probeNewCoh)= s2_probe_state.onProbe(probe_bits.param) val (s2_victim_dirty, s2_shrink_param, voluntaryNewCoh) = s2_victim_state.onCacheControl(M_FLUSH) val s2_update_meta = s2_hit_state =/= s2_new_hit_state - io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && tl_out.a.ready && !uncachedInFlight.asUInt.andR) + io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && tl_out_a.ready && !uncachedInFlight.asUInt.andR) when (io.cpu.s2_nack || (s2_valid_hit && s2_update_meta)) { s1_nack := true } val s3_valid = Reg(next = s2_valid, init=Bool(false)) @@ -285,17 +299,17 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { M_XA_MAXU -> edge.Arithmetic(a_source, access_address, a_size, a_data, TLAtomics.MAXU)._2)) } else { // If no managers support atomics, assert fail if processor asks for them - assert (!(tl_out.a.valid && pstore1_amo && s2_write && s2_uncached)) + assert (!(tl_out_a.valid && pstore1_amo && s2_write && s2_uncached)) Wire(new TLBundleA(edge.bundle)) } - tl_out.a.valid := (s2_valid_cached_miss && !s2_victim_dirty) || + tl_out_a.valid := (s2_valid_cached_miss && !s2_victim_dirty) || (s2_valid_uncached && !uncachedInFlight.asUInt.andR) - tl_out.a.bits := Mux(!s2_uncached, acquire, Mux(!s2_write, get, Mux(!pstore1_amo, put, atomics))) + tl_out_a.bits := Mux(!s2_uncached, acquire, Mux(!s2_write, get, Mux(!pstore1_amo, put, atomics))) // Set pending bits for outstanding TileLink transaction val a_sel = UIntToOH(a_source, maxUncachedInFlight+mmioOffset) >> mmioOffset - when (tl_out.a.fire()) { + when (tl_out_a.fire()) { when (s2_uncached) { (a_sel.toBools zip (uncachedInFlight zip uncachedReqs)) foreach { case (s, (f, r)) => when (s) { @@ -518,7 +532,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val flushed = Reg(init=Bool(true)) val flushing = Reg(init=Bool(false)) val flushCounter = Counter(nSets * nWays) - when (tl_out.a.fire() && !s2_uncached) { flushed := false } + when (tl_out_a.fire() && !s2_uncached) { flushed := false } when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) { io.cpu.s2_nack := !flushed when (!flushed) { @@ -542,6 +556,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { } // performance events - io.cpu.acquire := edge.done(tl_out.a) + io.cpu.acquire := edge.done(tl_out_a) io.cpu.release := edge.done(tl_out.c) }