diff --git a/src/main/scala/coreplex/RocketTiles.scala b/src/main/scala/coreplex/RocketTiles.scala index 0ee07d79..7f7493e2 100644 --- a/src/main/scala/coreplex/RocketTiles.scala +++ b/src/main/scala/coreplex/RocketTiles.scala @@ -47,8 +47,10 @@ trait HasRocketTiles extends CoreplexRISCVPlatform { case Synchronous => { val tile = LazyModule(new RocketTile(c, i)(pWithExtra)) val buffer = LazyModule(new TLBuffer) + val fixer = LazyModule(new TLFIFOFixer) buffer.node :=* tile.masterNode - l1tol2.node :=* buffer.node + fixer.node :=* buffer.node + l1tol2.node :=* fixer.node tile.slaveNode :*= cbus.node tile.intNode := intBar.intnode (io: HasRocketTilesBundle) => { @@ -62,8 +64,10 @@ trait HasRocketTiles extends CoreplexRISCVPlatform { val wrapper = LazyModule(new AsyncRocketTile(c, i)(pWithExtra)) val sink = LazyModule(new TLAsyncCrossingSink(depth, sync)) val source = LazyModule(new TLAsyncCrossingSource(sync)) + val fixer = LazyModule(new TLFIFOFixer) sink.node :=* wrapper.masterNode - l1tol2.node :=* sink.node + fixer.node :=* sink.node + l1tol2.node :=* fixer.node wrapper.slaveNode :*= source.node wrapper.intNode := intBar.intnode source.node :*= cbus.node @@ -79,8 +83,10 @@ trait HasRocketTiles extends CoreplexRISCVPlatform { val wrapper = LazyModule(new RationalRocketTile(c, i)(pWithExtra)) val sink = LazyModule(new TLRationalCrossingSink(util.FastToSlow)) val source = LazyModule(new TLRationalCrossingSource) + val fixer = LazyModule(new TLFIFOFixer) sink.node :=* wrapper.masterNode - l1tol2.node :=* sink.node + fixer.node :=* sink.node + l1tol2.node :=* fixer.node wrapper.slaveNode :*= source.node wrapper.intNode := intBar.intnode source.node :*= cbus.node diff --git a/src/main/scala/groundtest/Coreplex.scala b/src/main/scala/groundtest/Coreplex.scala index fb0b8756..60848787 100644 --- a/src/main/scala/groundtest/Coreplex.scala +++ b/src/main/scala/groundtest/Coreplex.scala @@ -40,7 +40,9 @@ class GroundTestCoreplex(implicit p: Parameters) extends BaseCoreplex { }})) } - tiles.foreach { l1tol2.node :=* _.masterNode } + val fixer = LazyModule(new TLFIFOFixer) + l1tol2.node :=* fixer.node + tiles.foreach { fixer.node :=* _.masterNode } val cbusRAM = LazyModule(new TLRAM(AddressSet(testRamAddr, 0xffff), false, cbus_beatBytes)) cbusRAM.node := TLFragmenter(cbus_beatBytes, cbus_lineBytes)(cbus.node) diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index ebb017fd..6f4ec90d 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -86,8 +86,9 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { io.cpu.req.ready := (release_state === s_ready) && !cached_grant_wait && !s1_nack // I/O MSHRs - val uncachedInFlight = Reg(init=Vec.fill(maxUncachedInFlight)(Bool(false))) - val uncachedReqs = Reg(Vec(maxUncachedInFlight, new HellaCacheReq)) + val mmioOffset = if (outer.scratch().isDefined) 0 else 1 + val uncachedInFlight = Seq.fill(maxUncachedInFlight) { RegInit(Bool(false)) } + val uncachedReqs = Seq.fill(maxUncachedInFlight) { Reg(new HellaCacheReq) } // hit initiation path dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd) @@ -171,7 +172,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val (s2_victim_dirty, s2_shrink_param, voluntaryNewCoh) = s2_victim_state.onCacheControl(M_FLUSH) val s2_update_meta = s2_hit_state =/= s2_new_hit_state io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && tl_out.a.ready && !uncachedInFlight.asUInt.andR) - when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true } + when (io.cpu.s2_nack || (s2_valid_hit && s2_update_meta)) { s1_nack := true } // exceptions val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes) @@ -244,13 +245,13 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) // Prepare a TileLink request message that initiates a transaction - val a_source = PriorityEncoder(~uncachedInFlight.asUInt) + val a_source = PriorityEncoder(~uncachedInFlight.asUInt << mmioOffset) // skip the MSHR val acquire_address = s2_req_block_addr val access_address = s2_req.addr val a_size = s2_req.typ(MT_SZ-2, 0) val a_data = Fill(beatWords, pstore1_storegen.data) val acquire = if (edge.manager.anySupportAcquireB) { - edge.Acquire(a_source, acquire_address, lgCacheBlockBytes, s2_grow_param)._2 // Cacheability checked by tlb + edge.Acquire(UInt(0), acquire_address, lgCacheBlockBytes, s2_grow_param)._2 // Cacheability checked by tlb } else { Wire(new TLBundleA(edge.bundle)) } @@ -278,10 +279,15 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { tl_out.a.bits := Mux(!s2_uncached, acquire, Mux(!s2_write, get, Mux(!pstore1_amo, put, atomics))) // Set pending bits for outstanding TileLink transaction + val a_sel = UIntToOH(a_source, maxUncachedInFlight+mmioOffset) >> mmioOffset when (tl_out.a.fire()) { when (s2_uncached) { - uncachedInFlight(a_source) := true - uncachedReqs(a_source) := s2_req + (a_sel.toBools zip (uncachedInFlight zip uncachedReqs)) foreach { case (s, (f, r)) => + when (s) { + f := Bool(true) + r := s2_req + } + } }.otherwise { cached_grant_wait := true } @@ -291,6 +297,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val (d_first, d_last, d_done, d_address_inc) = edge.addr_inc(tl_out.d) val grantIsCached = tl_out.d.bits.opcode.isOneOf(Grant, GrantData) val grantIsUncached = tl_out.d.bits.opcode.isOneOf(AccessAck, AccessAckData, HintAck) + val grantIsUncachedData = tl_out.d.bits.opcode === AccessAckData val grantIsVoluntary = tl_out.d.bits.opcode === ReleaseAck // Clears a different pending bit val grantIsRefill = tl_out.d.bits.opcode === GrantData // Writes the data array tl_out.d.ready := true @@ -299,15 +306,21 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { assert(cached_grant_wait, "A GrantData was unexpected by the dcache.") when(d_last) { cached_grant_wait := false } } .elsewhen (grantIsUncached) { - val id = tl_out.d.bits.source - val req = uncachedReqs(id) - assert(uncachedInFlight(id), "An AccessAck was unexpected by the dcache.") // TODO must handle Ack coming back on same cycle! - when(d_last) { uncachedInFlight(id) := false } - s2_data := tl_out.d.bits.data - s2_req.cmd := req.cmd - s2_req.typ := req.typ - s2_req.tag := req.tag - s2_req.addr := Cat(s1_paddr >> beatOffBits /* don't-care */, req.addr(beatOffBits-1, 0)) + val d_sel = UIntToOH(tl_out.d.bits.source, maxUncachedInFlight+mmioOffset) >> mmioOffset + val req = Mux1H(d_sel, uncachedReqs) + (d_sel.toBools zip uncachedInFlight) foreach { case (s, f) => + when (s && d_last) { + assert(f, "An AccessAck was unexpected by the dcache.") // TODO must handle Ack coming back on same cycle! + f := false + } + } + when (grantIsUncachedData) { + s2_data := tl_out.d.bits.data + s2_req.cmd := req.cmd + s2_req.typ := req.typ + s2_req.tag := req.tag + s2_req.addr := Cat(s1_paddr >> beatOffBits /* don't-care */, req.addr(beatOffBits-1, 0)) + } } .elsewhen (grantIsVoluntary) { assert(release_ack_wait, "A ReleaseAck was unexpected by the dcache.") // TODO should handle Ack coming back on same cycle! release_ack_wait := false @@ -333,7 +346,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { // don't accept uncached grants if there's a structural hazard on s2_data... val blockUncachedGrant = Reg(Bool()) blockUncachedGrant := dataArb.io.out.valid - when (grantIsUncached) { + when (grantIsUncachedData) { tl_out.d.ready := !(blockUncachedGrant || s1_valid) // ...but insert bubble to guarantee grant's eventual forward progress when (tl_out.d.valid && !tl_out.d.ready) { @@ -448,7 +461,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { io.cpu.ordered := !(s1_valid || s2_valid || cached_grant_wait || uncachedInFlight.asUInt.orR) // uncached response - io.cpu.replay_next := tl_out.d.fire() && grantIsUncached + io.cpu.replay_next := tl_out.d.fire() && grantIsUncachedData val doUncachedResp = Reg(next = io.cpu.replay_next) when (doUncachedResp) { assert(!s2_valid_hit) diff --git a/src/main/scala/rocket/HellaCache.scala b/src/main/scala/rocket/HellaCache.scala index f0b23107..9fcbdc15 100644 --- a/src/main/scala/rocket/HellaCache.scala +++ b/src/main/scala/rocket/HellaCache.scala @@ -13,6 +13,7 @@ import uncore.tilelink2._ import uncore.util.Code import util.{ParameterizedBundle, RandomReplacement} import scala.collection.mutable.ListBuffer +import scala.math.max case class DCacheParams( nSets: Int = 64, @@ -136,13 +137,22 @@ class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { abstract class HellaCache(implicit p: Parameters) extends LazyModule { private val cfg = p(TileKey).dcache.get - val node = TLClientNode(cfg.scratch.map { _ => - TLClientParameters(sourceId = IdRange(0, cfg.nMMIOs)) - } getOrElse { + val firstMMIO = max(1, cfg.nMSHRs) + + val node = TLClientNode(Seq(TLClientPortParameters( + clients = cfg.scratch.map { _ => Seq( TLClientParameters( - sourceId = IdRange(0, cfg.nMSHRs+cfg.nMMIOs), - supportsProbe = TransferSizes(1, cfg.blockBytes)) - }) + sourceId = IdRange(0, cfg.nMMIOs), + requestFifo = true)) + } getOrElse { Seq( + TLClientParameters( + sourceId = IdRange(0, firstMMIO), + supportsProbe = TransferSizes(1, cfg.blockBytes)), + TLClientParameters( + sourceId = IdRange(firstMMIO, firstMMIO+cfg.nMMIOs), + requestFifo = true)) + }, + minLatency = 1))) val module: HellaCacheModule } @@ -158,6 +168,9 @@ class HellaCacheModule(outer: HellaCache) extends LazyModuleImp(outer) implicit val edge = outer.node.edgesOut(0) val io = new HellaCacheBundle(outer) val tl_out = io.mem(0) + + // IOMSHRs must be FIFO + edge.manager.requireFifo() } object HellaCache { diff --git a/src/main/scala/uncore/tilelink2/FIFOFixer.scala b/src/main/scala/uncore/tilelink2/FIFOFixer.scala new file mode 100644 index 00000000..88498d02 --- /dev/null +++ b/src/main/scala/uncore/tilelink2/FIFOFixer.scala @@ -0,0 +1,78 @@ +// See LICENSE.SiFive for license details. + +package uncore.tilelink2 + +import Chisel._ +import chisel3.internal.sourceinfo.SourceInfo +import config._ +import diplomacy._ +import scala.math.max + +class TLFIFOFixer(implicit p: Parameters) extends LazyModule +{ + // We request downstream FIFO so we can use the existing fifoId + val node = TLAdapterNode( + clientFn = { cp => cp.copy(clients = cp.clients .map(c => c.copy(requestFifo = !c.supportsProbe))) }, + managerFn = { mp => mp.copy(managers = mp.managers.map(m => m.copy(fifoId = Some(0)))) }) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = node.bundleIn + val out = node.bundleOut + } + + ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) => + val maxId = edgeOut.manager.managers.flatMap(_.fifoId).foldLeft(0)(max) + val a_id = edgeOut.manager.findFifoIdFast(in.a.bits.address) + val a_nid = a_id === UInt(0) // no id = not FIFO + + val a_first = edgeIn.first(in.a) + val d_first = edgeOut.first(out.d) && out.d.bits.opcode =/= TLMessages.ReleaseAck + + val stalls = edgeIn.client.clients.filter(c => c.requestFifo && c.sourceId.size > 1).map { c => + val a_sel = c.sourceId.contains(in.a.bits.source) + val d_sel = c.sourceId.contains(in.d.bits.source) + val id = RegInit(UInt(0, width = log2Ceil(maxId+1))) + val count = RegInit(UInt(0, width = log2Ceil(c.sourceId.size+1))) + + val a_inc = in.a.fire() && a_first && a_sel + val d_dec = in.d.fire() && d_first && d_sel + count := count + a_inc.asUInt - d_dec.asUInt + when (in.a.fire() && a_sel) { id := a_id } + + a_sel && a_first && count =/= UInt(0) && (a_nid || id =/= a_id) + } + + val stall = stalls.foldLeft(Bool(false))(_||_) + + out.a <> in.a + in.d <> out.d + out.a.valid := in.a.valid && !stall + in.a.ready := out.a.ready && !stall + + if (edgeOut.manager.anySupportAcquireB && edgeOut.client.anySupportProbe) { + in .b <> out.b + out.c <> in .c + out.e <> in .e + } else { + in.b.valid := Bool(false) + in.c.ready := Bool(true) + in.e.ready := Bool(true) + out.b.ready := Bool(true) + out.c.valid := Bool(false) + out.e.valid := Bool(false) + } + } + } +} + +object TLFIFOFixer +{ + // applied to the TL source node; y.node := TLFIFOFixer()(x.node) + def apply()(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): TLOutwardNode = { + val fixer = LazyModule(new TLFIFOFixer) + fixer.node := x + fixer.node + } +} + diff --git a/src/main/scala/uncore/tilelink2/Parameters.scala b/src/main/scala/uncore/tilelink2/Parameters.scala index 26e531ed..2220ca52 100644 --- a/src/main/scala/uncore/tilelink2/Parameters.scala +++ b/src/main/scala/uncore/tilelink2/Parameters.scala @@ -23,7 +23,8 @@ case class TLManagerParameters( supportsPutPartial: TransferSizes = TransferSizes.none, supportsHint: TransferSizes = TransferSizes.none, // If fifoId=Some, all accesses sent to the same fifoId are executed and ACK'd in FIFO order - fifoId: Option[Int] = None) + // Note: you can only rely on this FIFO behaviour if your TLClientParameters include requestFifo + fifoId: Option[Int] = None) { require (!address.isEmpty) address.foreach { a => require (a.finite) } @@ -76,6 +77,8 @@ case class TLManagerPortParameters( require (endSinkId > 0) require (minLatency >= 0) + def requireFifo() = managers.foreach { m =>require (m.fifoId == Some(0)) } + // Bounds on required sizes def maxAddress = managers.map(_.maxAddress).max def maxTransfer = managers.map(_.maxTransfer).max @@ -157,6 +160,7 @@ case class TLManagerPortParameters( case class TLClientParameters( sourceId: IdRange = IdRange(0,1), nodePath: Seq[BaseNode] = Seq(), + requestFifo: Boolean = false, // only a request, not a requirement // Supports both Probe+Grant of these sizes supportsProbe: TransferSizes = TransferSizes.none, supportsArithmetic: TransferSizes = TransferSizes.none, @@ -174,6 +178,8 @@ case class TLClientParameters( require (supportsProbe.contains(supportsPutFull)) require (supportsProbe.contains(supportsPutPartial)) require (supportsProbe.contains(supportsHint)) + // If you need FIFO, you better not be TL-C (due to independent A vs. C order) + require (!requestFifo || !supportsProbe) val maxTransfer = List( supportsProbe.max, @@ -228,6 +234,8 @@ case class TLClientPortParameters( def find(id: UInt) = Vec(clients.map(_.sourceId.contains(id))) def contains(id: UInt) = find(id).reduce(_ || _) + def requestFifo(id: UInt) = Mux1H(find(id), clients.map(c => Bool(c.requestFifo))) + private def safety_helper(member: TLClientParameters => TransferSizes)(id: UInt, lgSize: UInt) = { val allSame = clients.map(member(_) == member(clients(0))).reduce(_ && _) if (allSame) member(clients(0)).containsLg(lgSize) else { diff --git a/src/main/scala/uncore/tilelink2/RAMModel.scala b/src/main/scala/uncore/tilelink2/RAMModel.scala index 0cfe05f6..fb4e4dfd 100644 --- a/src/main/scala/uncore/tilelink2/RAMModel.scala +++ b/src/main/scala/uncore/tilelink2/RAMModel.scala @@ -106,7 +106,7 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule val a_addr_hi = edge.addr_hi(a_address) val a_base = edge.address(a) val a_mask = edge.mask(a_base, a_size) - val a_fifo = edge.manager.hasFifoIdFast(a_base) + val a_fifo = edge.manager.hasFifoIdFast(a_base) && edge.client.requestFifo(a.source) // Grab the concurrency state we need val a_inc_bytes = inc_bytes.map(_.read(a_addr_hi)) @@ -192,7 +192,7 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule val d_address = d_base | d_address_inc val d_addr_hi = edge.addr_hi(d_address) val d_mask = edge.mask(d_base, d_size) - val d_fifo = edge.manager.hasFifoIdFast(d_flight.base) + val d_fifo = edge.manager.hasFifoIdFast(d_flight.base) && edge.client.requestFifo(d.source) // Grab the concurrency state we need val d_inc_bytes = inc_bytes.map(_.read(d_addr_hi)) diff --git a/src/main/scala/uncore/tilelink2/SourceShrinker.scala b/src/main/scala/uncore/tilelink2/SourceShrinker.scala index 0851a2af..a1938da5 100644 --- a/src/main/scala/uncore/tilelink2/SourceShrinker.scala +++ b/src/main/scala/uncore/tilelink2/SourceShrinker.scala @@ -13,11 +13,12 @@ class TLSourceShrinker(maxInFlight: Int)(implicit p: Parameters) extends LazyMod { require (maxInFlight > 0) + // The SourceShrinker completely destroys all FIFO property guarantees private val client = TLClientParameters(sourceId = IdRange(0, maxInFlight)) val node = TLAdapterNode( // We erase all client information since we crush the source Ids clientFn = { _ => TLClientPortParameters(clients = Seq(client)) }, - managerFn = { mp => mp }) + managerFn = { mp => mp.copy(managers = mp.managers.map(_.copy(fifoId = None))) }) lazy val module = new LazyModuleImp(this) { val io = new Bundle {