diff --git a/src/main/scala/rocket/dcache.scala b/src/main/scala/rocket/dcache.scala index ed6a4015..4b2c7116 100644 --- a/src/main/scala/rocket/dcache.scala +++ b/src/main/scala/rocket/dcache.scala @@ -318,13 +318,13 @@ class DCache(maxUncachedInFlight: Int = 2)(implicit val p: Parameters) extends L } // grant - val (d_first, d_last, d_address_inc) = edge.firstlast(tl_out.d) + val (d_first, d_last, d_done, d_address_inc) = edge.addr_inc(tl_out.d) val grantIsCached = tl_out.d.bits.opcode.isOneOf(Grant, GrantData) val grantIsUncached = tl_out.d.bits.opcode.isOneOf(AccessAck, AccessAckData, HintAck) val grantIsVoluntary = tl_out.d.bits.opcode === ReleaseAck // Clears a different pending bit val grantIsRefill = tl_out.d.bits.opcode === GrantData // Writes the data array tl_out.d.ready := true - when (tl_out.d.fire() && d_last) { + when (d_done) { when (grantIsCached) { assert(cached_grant_wait, "A GrantData was unexpected by the dcache.") cached_grant_wait := false @@ -390,15 +390,11 @@ class DCache(maxUncachedInFlight: Int = 2)(implicit val p: Parameters) extends L metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays) // release - val (_, c_last, c_address_inc) = edge.firstlast(tl_out.c) - val releaseDone = tl_out.c.fire() && Mux(inWriteback, c_last, Bool(true)) + val (_, c_last, releaseDone, c_count) = edge.count(tl_out.c) val releaseRejected = tl_out.c.valid && !tl_out.c.ready val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire()) val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected) - - // TODO refactor these counters - val (writebackCount, _) = Counter(tl_out.c.fire() && inWriteback, refillCycles) - val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid)) + val releaseDataBeat = Cat(UInt(0), c_count) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid)) val nackResponseMessage = edge.ProbeAck( b = probe_bits, @@ -421,7 +417,7 @@ class DCache(maxUncachedInFlight: Int = 2)(implicit val p: Parameters) extends L data = s2_data)) tl_out.c.valid := s2_release_data_valid - tl_out.c.bits := nackResponseMessage // TODO was ClientMetadata.onReset.makeRelease(probe_bits) ... ok? + tl_out.c.bits := nackResponseMessage val newCoh = Wire(init = probeNewCoh) releaseWay := s2_probe_way @@ -461,7 +457,8 @@ class DCache(maxUncachedInFlight: Int = 2)(implicit val p: Parameters) extends L dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles dataArb.io.in(2).bits.write := false - dataArb.io.in(2).bits.addr := tl_out.c.bits.address | c_address_inc + dataArb.io.in(2).bits.addr := Cat(tl_out.c.bits.address(paddrBits-1, untagBits), + releaseDataBeat(log2Up(refillCycles)-1,0)) << rowOffBits dataArb.io.in(2).bits.way_en := ~UInt(0, nWays) metaWriteArb.io.in(2).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta) diff --git a/src/main/scala/uncore/axi4/ToTL.scala b/src/main/scala/uncore/axi4/ToTL.scala index 1921c37a..14a749bc 100644 --- a/src/main/scala/uncore/axi4/ToTL.scala +++ b/src/main/scala/uncore/axi4/ToTL.scala @@ -102,7 +102,7 @@ class AXI4ToTL extends LazyModule val d_resp = Mux(out.d.bits.error, AXI4Parameters.RESP_SLVERR, AXI4Parameters.RESP_OKAY) val d_hasData = edgeOut.hasData(out.d.bits) - val (_, d_last, _) = edgeOut.firstlast(out.d.bits, out.d.fire()) + val d_last = edgeOut.last(out.d) out.d.ready := Mux(d_hasData, ok_r.ready, ok_b.ready) ok_r.valid := out.d.valid && d_hasData diff --git a/src/main/scala/uncore/tilelink2/Broadcast.scala b/src/main/scala/uncore/tilelink2/Broadcast.scala index fdc1c14c..34c6a589 100644 --- a/src/main/scala/uncore/tilelink2/Broadcast.scala +++ b/src/main/scala/uncore/tilelink2/Broadcast.scala @@ -100,7 +100,7 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa // A tracker response is anything neither dropped nor a ReleaseAck val d_response = d_hasData || !d_what(1) - val (_, d_last, _) = edgeIn.firstlast(d_normal) + val d_last = edgeIn.last(d_normal) (trackers zip d_trackerOH.toBools) foreach { case (tracker, select) => tracker.d_last := select && d_normal.fire() && d_response && d_last } @@ -118,7 +118,7 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa // Decrement the tracker's outstanding probe counter val c_decrement = in.c.fire() && (c_probeack || c_probeackdata) - val (_, c_last, _) = edgeIn.firstlast(in.c) + val c_last = edgeIn.last(in.c) trackers foreach { tracker => tracker.probeack := c_decrement && c_last && tracker.line === (in.c.bits.address >> lineShift) } @@ -159,7 +159,7 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa // Which cache does a request come from? val a_cache = if (caches.size == 0) UInt(1) else Vec(caches.map(_.contains(in.a.bits.source))).asUInt - val (a_first, _, _) = edgeIn.firstlast(in.a) + val a_first = edgeIn.first(in.a) // To accept a request from A, the probe FSM must be idle and there must be a matching tracker val freeTrackers = Vec(trackers.map { t => t.idle }).asUInt diff --git a/src/main/scala/uncore/tilelink2/Edges.scala b/src/main/scala/uncore/tilelink2/Edges.scala index 35730b78..f2c6af63 100644 --- a/src/main/scala/uncore/tilelink2/Edges.scala +++ b/src/main/scala/uncore/tilelink2/Edges.scala @@ -175,19 +175,43 @@ class TLEdge( } } - def firstlast(bits: TLChannel, fire: Bool): (Bool, Bool, UInt) = { + def firstlastHelper(bits: TLChannel, fire: Bool): (Bool, Bool, Bool, UInt) = { val beats1 = numBeats1(bits) val counter = RegInit(UInt(0, width = log2Up(maxTransfer / manager.beatBytes))) val counter1 = counter - UInt(1) val first = counter === UInt(0) val last = counter === UInt(1) || beats1 === UInt(0) + val done = last && fire + val count = (beats1 & ~counter1) when (fire) { counter := Mux(first, beats1, counter1) } - (first, last, (beats1 & ~counter1) << log2Ceil(manager.beatBytes)) + (first, last, done, count) } - def firstlast(x: DecoupledIO[TLChannel]): (Bool, Bool, UInt) = firstlast(x.bits, x.fire()) + def first(bits: TLChannel, fire: Bool): Bool = firstlastHelper(bits, fire)._1 + def first(x: DecoupledIO[TLChannel]): Bool = first(x.bits, x.fire()) + + def last(bits: TLChannel, fire: Bool): Bool = firstlastHelper(bits, fire)._2 + def last(x: DecoupledIO[TLChannel]): Bool = last(x.bits, x.fire()) + + def firstlast(bits: TLChannel, fire: Bool): (Bool, Bool, Bool) = { + val r = firstlastHelper(bits, fire) + (r._1, r._2, r._3) + } + def firstlast(x: DecoupledIO[TLChannel]): (Bool, Bool, Bool) = firstlast(x.bits, x.fire()) + + def count(bits: TLChannel, fire: Bool): (Bool, Bool, Bool, UInt) = { + val r = firstlastHelper(bits, fire) + (r._1, r._2, r._3, r._4) + } + def count(x: DecoupledIO[TLChannel]): (Bool, Bool, Bool, UInt) = count(x.bits, x.fire()) + + def addr_inc(bits: TLChannel, fire: Bool): (Bool, Bool, Bool, UInt) = { + val r = firstlastHelper(bits, fire) + (r._1, r._2, r._3, r._4 << log2Ceil(manager.beatBytes)) + } + def addr_inc(x: DecoupledIO[TLChannel]): (Bool, Bool, Bool, UInt) = addr_inc(x.bits, x.fire()) } class TLEdgeOut( diff --git a/src/main/scala/uncore/tilelink2/Fuzzer.scala b/src/main/scala/uncore/tilelink2/Fuzzer.scala index de9c9fa5..c7ff80b9 100644 --- a/src/main/scala/uncore/tilelink2/Fuzzer.scala +++ b/src/main/scala/uncore/tilelink2/Fuzzer.scala @@ -113,12 +113,10 @@ class TLFuzzer( // Progress within each operation val a = out.a.bits - val (a_first, a_last, _) = edge.firstlast(out.a) - val req_done = out.a.fire() && a_last + val (a_first, a_last, req_done) = edge.firstlast(out.a) val d = out.d.bits - val (d_first, d_last, _) = edge.firstlast(out.d) - val resp_done = out.d.fire() && d_last + val (d_first, d_last, resp_done) = edge.firstlast(out.d) // Source ID generation val idMap = Module(new IDMapGenerator(inFlight)) diff --git a/src/main/scala/uncore/tilelink2/Monitor.scala b/src/main/scala/uncore/tilelink2/Monitor.scala index f9d66988..4a6e9d50 100644 --- a/src/main/scala/uncore/tilelink2/Monitor.scala +++ b/src/main/scala/uncore/tilelink2/Monitor.scala @@ -283,7 +283,7 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source } def legalizeMultibeatA(a: DecoupledSnoop[TLBundleA], edge: TLEdge)(implicit sourceInfo: SourceInfo) { - val (a_first, _, _) = edge.firstlast(a.bits, a.fire()) + val a_first = edge.first(a.bits, a.fire()) val opcode = Reg(UInt()) val param = Reg(UInt()) val size = Reg(UInt()) @@ -306,7 +306,7 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source } def legalizeMultibeatB(b: DecoupledSnoop[TLBundleB], edge: TLEdge)(implicit sourceInfo: SourceInfo) { - val (b_first, _, _) = edge.firstlast(b.bits, b.fire()) + val b_first = edge.first(b.bits, b.fire()) val opcode = Reg(UInt()) val param = Reg(UInt()) val size = Reg(UInt()) @@ -329,7 +329,7 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source } def legalizeMultibeatC(c: DecoupledSnoop[TLBundleC], edge: TLEdge)(implicit sourceInfo: SourceInfo) { - val (c_first, _, _) = edge.firstlast(c.bits, c.fire()) + val c_first = edge.first(c.bits, c.fire()) val opcode = Reg(UInt()) val param = Reg(UInt()) val size = Reg(UInt()) @@ -352,7 +352,7 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source } def legalizeMultibeatD(d: DecoupledSnoop[TLBundleD], edge: TLEdge)(implicit sourceInfo: SourceInfo) { - val (d_first, _, _) = edge.firstlast(d.bits, d.fire()) + val d_first = edge.first(d.bits, d.fire()) val opcode = Reg(UInt()) val param = Reg(UInt()) val size = Reg(UInt()) @@ -387,8 +387,8 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source def legalizeSourceUnique(bundle: TLBundleSnoop, edge: TLEdge)(implicit sourceInfo: SourceInfo) { val inflight = RegInit(UInt(0, width = edge.client.endSourceId)) - val (_, a_last, _) = edge.firstlast(bundle.a.bits, bundle.a.fire()) - val (_, d_last, _) = edge.firstlast(bundle.d.bits, bundle.d.fire()) + val a_last = edge.last(bundle.a.bits, bundle.a.fire()) + val d_last = edge.last(bundle.d.bits, bundle.d.fire()) if (edge.manager.minLatency > 0) { assert(bundle.a.bits.source =/= bundle.d.bits.source || !bundle.a.valid || !bundle.d.valid, s"'A' and 'D' concurrent, despite minlatency ${edge.manager.minLatency}" + extra) diff --git a/src/main/scala/uncore/tilelink2/RAMModel.scala b/src/main/scala/uncore/tilelink2/RAMModel.scala index 37dc2878..ec5c8d90 100644 --- a/src/main/scala/uncore/tilelink2/RAMModel.scala +++ b/src/main/scala/uncore/tilelink2/RAMModel.scala @@ -110,7 +110,7 @@ class TLRAMModel(log: String = "") extends LazyModule // Process A access requests val a = Reg(next = in.a.bits) val a_fire = Reg(next = in.a.fire(), init = Bool(false)) - val (a_first, a_last, a_address_inc) = edge.firstlast(a, a_fire) + val (a_first, a_last, _, a_address_inc) = edge.addr_inc(a, a_fire) val a_size = edge.size(a) val a_sizeOH = UIntToOH(a_size) val a_address = a.address | a_address_inc @@ -196,7 +196,7 @@ class TLRAMModel(log: String = "") extends LazyModule // Process D access responses val d = RegNext(out.d.bits) val d_fire = Reg(next = out.d.fire(), init = Bool(false)) - val (d_first, d_last, d_address_inc) = edge.firstlast(d, d_fire) + val (d_first, d_last, _, d_address_inc) = edge.addr_inc(d, d_fire) val d_size = edge.size(d) val d_sizeOH = UIntToOH(d_size) val d_base = d_flight.base diff --git a/src/main/scala/uncore/tilelink2/ToAXI4.scala b/src/main/scala/uncore/tilelink2/ToAXI4.scala index 6b476815..ae2f41e3 100644 --- a/src/main/scala/uncore/tilelink2/ToAXI4.scala +++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala @@ -80,7 +80,7 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true) extends LazyModule val a_source = in.a.bits.source val a_size = edgeIn.size(in.a.bits) val a_isPut = edgeIn.hasData(in.a.bits) - val (_, a_last, _) = edgeIn.firstlast(in.a) + val a_last = edgeIn.last(in.a) // Make sure the fields are within the bounds we assumed assert (a_source < UInt(1 << sourceBits))