From 9f08c484bdc868f16138bc188cd79261620960cc Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 25 Apr 2017 17:55:06 -0700 Subject: [PATCH] tilelink2: ToAXI4 provide FIFO order semantics --- src/main/scala/uncore/tilelink2/ToAXI4.scala | 122 ++++++------------- 1 file changed, 36 insertions(+), 86 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/ToAXI4.scala b/src/main/scala/uncore/tilelink2/ToAXI4.scala index 8cfb311e..0229bc0f 100644 --- a/src/main/scala/uncore/tilelink2/ToAXI4.scala +++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala @@ -10,14 +10,19 @@ import util.PositionalMultiQueue import uncore.axi4._ import scala.math.{min, max} -case class TLToAXI4Node(idBits: Int) extends MixedAdapterNode(TLImp, AXI4Imp)( - dFn = { _ => - // We must erase all client information, because we crush their source Ids - val masters = Seq( +case class TLToAXI4Node(beatBytes: Int) extends MixedAdapterNode(TLImp, AXI4Imp)( + dFn = { p => + val idSize = p.clients.map { c => if (c.requestFifo) 1 else c.sourceId.size } + val idStart = idSize.scanLeft(0)(_+_).init + val masters = ((idStart zip idSize) zip p.clients) map { case ((start, size), c) => AXI4MasterParameters( - id = IdRange(0, 1 << idBits), - aligned = true)) - AXI4MasterPortParameters(masters) + id = IdRange(start, start+size), + aligned = true, + nodePath = c.nodePath) + } + AXI4MasterPortParameters( + masters = masters, + userBits = log2Ceil(p.endSourceId) + 4 + log2Ceil(beatBytes)) }, uFn = { p => TLManagerPortParameters( managers = p.slaves.map { case s => @@ -29,15 +34,15 @@ case class TLToAXI4Node(idBits: Int) extends MixedAdapterNode(TLImp, AXI4Imp)( nodePath = s.nodePath, supportsGet = s.supportsRead, supportsPutFull = s.supportsWrite, - supportsPutPartial = s.supportsWrite)}, - // AXI4 is NEVER fifo in TL sense (R+W are independent) + supportsPutPartial = s.supportsWrite, + fifoId = Some(0))}, beatBytes = p.beatBytes, minLatency = p.minLatency) }) -class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: Parameters) extends LazyModule +class TLToAXI4(beatBytes: Int, combinational: Boolean = true)(implicit p: Parameters) extends LazyModule { - val node = TLToAXI4Node(idBits) + val node = TLToAXI4Node(beatBytes) lazy val module = new LazyModuleImp(this) { val io = new Bundle { @@ -52,24 +57,23 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P require (slaves(0).interleavedId.isDefined) slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) } - // We need to ensure that a slave does not stall trying to send B while we need to receive R - // Since R&W have independent flow control, it is possible for a W to cut in-line and get into - // a slave's buffers, preventing us from getting all the R responses we need to release D for B. - // This risk is compounded by an AXI fragmentation. Even a slave which responds completely to - // AR before working on AW might have an AW slipped between two AR fragments. - val out_b = Queue.irrevocable(out.b, entries=edgeIn.client.endSourceId, flow=combinational) + // Construct the source=>ID mapping table + val idTable = Wire(Vec(edgeIn.client.endSourceId, out.aw.bits.id)) + (edgeIn.client.clients zip edgeOut.master.masters) foreach { case (c, m) => + for (i <- 0 until c.sourceId.size) { + idTable(c.sourceId.start + i) := UInt(m.id.start + (if (c.requestFifo) 0 else i)) + } + } // We need to keep the following state from A => D: (addr_lo, size, source) // All of those fields could potentially require 0 bits (argh. Chisel.) - // We will pack as many of the lowest bits of state as fit into the AXI ID. - // Any bits left-over must be put into a bank of Queues. - // The Queues are indexed by as many of the source bits as fit into the AXI ID. - // The Queues are deep enough that every source has guaranteed space in its Queue. + // We will pack all of that extra information into the user bits. val sourceBits = log2Ceil(edgeIn.client.endSourceId) val sizeBits = log2Ceil(edgeIn.maxLgSize+1) val addrBits = log2Ceil(edgeIn.manager.beatBytes) val stateBits = addrBits + sizeBits + sourceBits // could be 0 + require (stateBits <= out.aw.bits.params.userBits) val a_address = edgeIn.address(in.a.bits) val a_addr_lo = edgeIn.addr_lo(a_address) @@ -91,73 +95,17 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P require (addrEnd == stateBits) val a_state = (a_source << sourceOff) | (a_size << sizeOff) | (a_addr_lo << addrOff) - val a_id = if (idBits == 0) UInt(0) else a_state - val r_state = Wire(UInt(width = stateBits)) + val r_state = out.r.bits.user.getOrElse(UInt(0)) val r_source = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0) val r_size = if (sizeBits > 0) r_state(sizeEnd -1, sizeOff) else UInt(0) val r_addr_lo = if (addrBits > 0) r_state(addrEnd -1, addrOff) else UInt(0) - val b_state = Wire(UInt(width = stateBits)) + val b_state = out.b.bits.user.getOrElse(UInt(0)) val b_source = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0) val b_size = if (sizeBits > 0) b_state(sizeEnd -1, sizeOff) else UInt(0) val b_addr_lo = if (addrBits > 0) b_state(addrEnd -1, addrOff) else UInt(0) - val r_last = out.r.bits.last - val r_id = out.r.bits.id - val b_id = out_b.bits.id - - if (stateBits <= idBits) { // No need for any state tracking - r_state := r_id - b_state := b_id - } else { - val bankIndexBits = min(sourceBits, idBits) - val posBits = max(0, sourceBits - idBits) - val implicitBits = max(idBits, sourceBits) - val bankBits = stateBits - implicitBits - val numBanks = min(1 << bankIndexBits, edgeIn.client.endSourceId) - def bankEntries(i: Int) = (edgeIn.client.endSourceId+numBanks-i-1) / numBanks - - val banks = Seq.tabulate(numBanks) { i => - // We know there can only be as many outstanding requests as TL sources - // However, AXI read and write queues are not mutually FIFO. - // Therefore, we want to pop them individually, but share the storage. - val bypass = combinational && edgeOut.slave.minLatency == 0 - PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass) - } - - val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits) - val a_bankIndex = if (bankIndexBits == 0) UInt(0) else a_source(bankIndexBits-1, 0) - val r_bankIndex = if (bankIndexBits == 0) UInt(0) else r_id(bankIndexBits-1, 0) - val b_bankIndex = if (bankIndexBits == 0) UInt(0) else b_id(bankIndexBits-1, 0) - val a_bankSelect = UIntToOH(a_bankIndex, numBanks) - val r_bankSelect = UIntToOH(r_bankIndex, numBanks) - val b_bankSelect = UIntToOH(b_bankIndex, numBanks) - - banks.zipWithIndex.foreach { case (q, i) => - // Push a_state into the banks - q.io.enq.valid := in.a.fire() && a_last && a_bankSelect(i) - q.io.enq.bits.pos := a_bankPosition - q.io.enq.bits.data := a_state >> implicitBits - q.io.enq.bits.way := Mux(a_isPut, UInt(0), UInt(1)) - // Pop the bank's ways - q.io.deq(0).ready := out_b.fire() && b_bankSelect(i) - q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last - // The FIFOs must be valid when we're ready to pop them... - assert (q.io.deq(0).valid || !q.io.deq(0).ready) - assert (q.io.deq(1).valid || !q.io.deq(1).ready) - } - - val b_bankData = Vec(banks.map(_.io.deq(0).bits.data))(b_bankIndex) - val b_bankPos = Vec(banks.map(_.io.deq(0).bits.pos ))(b_bankIndex) - val r_bankData = Vec(banks.map(_.io.deq(1).bits.data))(r_bankIndex) - val r_bankPos = Vec(banks.map(_.io.deq(1).bits.pos ))(r_bankIndex) - - def optCat(x: (Boolean, UInt)*) = { Cat(x.toList.filter(_._1).map(_._2)) } - b_state := optCat((bankBits > 0, b_bankData), (posBits > 0, b_bankPos), (idBits > 0, b_id)) - r_state := optCat((bankBits > 0, r_bankData), (posBits > 0, r_bankPos), (idBits > 0, r_id)) - } - // We need these Queues because AXI4 queues are irrevocable val depth = if (combinational) 1 else 2 val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params))) @@ -179,7 +127,7 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P val arw = out_arw.bits arw.wen := a_isPut - arw.id := a_id // truncated + arw.id := idTable(a_source) arw.addr := a_address arw.len := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes) arw.size := Mux(a_size >= maxSize, maxSize, a_size) @@ -188,7 +136,9 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P arw.cache := UInt(0) // do not allow AXI to modify our transactions arw.prot := AXI4Parameters.PROT_PRIVILEDGED arw.qos := UInt(0) // no QoS + arw.user.foreach { _ := a_state } + // !!! Mix R-W stall here in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready) out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true)) @@ -204,11 +154,11 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P val r_wins = out.r.valid || r_holds_d out.r.ready := in.d.ready - out_b.ready := in.d.ready && !r_wins - in.d.valid := Mux(r_wins, out.r.valid, out_b.valid) + out.b.ready := in.d.ready && !r_wins + in.d.valid := Mux(r_wins, out.r.valid, out.b.valid) val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY - val b_error = out_b.bits.resp =/= AXI4Parameters.RESP_OKAY + val b_error = out.b.bits.resp =/= AXI4Parameters.RESP_OKAY val r_d = edgeIn.AccessAck(r_addr_lo, UInt(0), r_source, r_size, UInt(0), r_error) val b_d = edgeIn.AccessAck(b_addr_lo, UInt(0), b_source, b_size, b_error) @@ -226,9 +176,9 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P object TLToAXI4 { - // applied to the TL source node; y.node := TLToAXI4(idBits)(x.node) - def apply(idBits: Int, combinational: Boolean = true)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { - val axi4 = LazyModule(new TLToAXI4(idBits, combinational)) + // applied to the TL source node; y.node := TLToAXI4(beatBytes)(x.node) + def apply(beatBytes: Int, combinational: Boolean = true)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { + val axi4 = LazyModule(new TLToAXI4(beatBytes, combinational)) axi4.node := x axi4.node }