From feecfb53edb4065c8648edcdda9fc45d3fcf04e5 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 23 Jun 2017 16:01:31 -0700 Subject: [PATCH 1/2] axi4: Deinterleaver need not make a Q for an unused AXI id --- .../scala/uncore/axi4/Deinterleaver.scala | 58 +++++++++++-------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/src/main/scala/uncore/axi4/Deinterleaver.scala b/src/main/scala/uncore/axi4/Deinterleaver.scala index dcdf68b4..3778c91d 100644 --- a/src/main/scala/uncore/axi4/Deinterleaver.scala +++ b/src/main/scala/uncore/axi4/Deinterleaver.scala @@ -28,7 +28,7 @@ class AXI4Deinterleaver(maxReadBytes: Int)(implicit p: Parameters) extends LazyM } ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) => - val queues = edgeOut.master.endId + val endId = edgeOut.master.endId val beatBytes = edgeOut.slave.beatBytes val beats = (maxReadBytes+beatBytes-1) / beatBytes @@ -42,31 +42,43 @@ class AXI4Deinterleaver(maxReadBytes: Int)(implicit p: Parameters) extends LazyM // Nothing to do if only single-beat R in.r <> out.r } else { - // Buffer R response - val count = RegInit(Vec.fill(queues) { UInt(0, width=log2Ceil(beats+1)) }) - val qs = Seq.fill(queues) { Module(new Queue(out.r.bits, beats)) } + // Queues to buffer R responses + val qs = Seq.tabulate(endId) { i => + val depth = edgeOut.master.masters.find(_.id.contains(i)).flatMap(_.maxFlight).getOrElse(0) + if (depth > 0) { + Module(new Queue(out.r.bits, beats)).io + } else { + Wire(new QueueIO(out.r.bits, beats)) + } + } // Which ID is being enqueued and dequeued? val locked = RegInit(Bool(false)) - val deq_id = Reg(UInt(width=log2Up(queues))) + val deq_id = Reg(UInt(width=log2Up(endId))) val enq_id = out.r.bits.id - val deq_OH = UIntToOH(deq_id, queues) - val enq_OH = UIntToOH(enq_id, queues) + val deq_OH = UIntToOH(deq_id, endId) + val enq_OH = UIntToOH(enq_id, endId) // Track the number of completely received bursts per FIFO id - val next_count = Wire(count) - ((count zip next_count) zip (enq_OH.toBools zip deq_OH.toBools)) foreach { case ((p, n), (i, d)) => - val inc = i && out.r.fire() && out.r.bits.last - val dec = d && in.r.fire() && in.r.bits.last - n := p + inc.asUInt - dec.asUInt - // Bounds checking - assert (!dec || p =/= UInt(0)) - assert (!inc || p =/= UInt(beats)) - } - count := next_count + val pending = Cat(Seq.tabulate(endId) { i => + val depth = edgeOut.master.masters.find(_.id.contains(i)).flatMap(_.maxFlight).getOrElse(0) + if (depth == 0) { + Bool(false) + } else { + val count = RegInit(UInt(0, width=log2Ceil(beats+1))) + val next = Wire(count) + val inc = enq_OH(i) && out.r.fire() && out.r.bits.last + val dec = deq_OH(i) && in.r.fire() && in.r.bits.last + next := count + inc.asUInt - dec.asUInt + count := next + // Bounds checking + assert (!dec || count =/= UInt(0)) + assert (!inc || count =/= UInt(beats)) + next =/= UInt(0) + } + }.reverse) // Select which Q will we start sending next cycle - val pending = Cat(next_count.map(_ =/= UInt(0)).reverse) val winner = pending & ~(leftOR(pending) << 1) when (!locked || (in.r.fire() && in.r.bits.last)) { locked := pending.orR @@ -75,16 +87,16 @@ class AXI4Deinterleaver(maxReadBytes: Int)(implicit p: Parameters) extends LazyM // Transmit the selected burst to inner in.r.valid := locked - in.r.bits := Vec(qs.map(_.io.deq.bits))(deq_id) + in.r.bits := Vec(qs.map(_.deq.bits))(deq_id) (deq_OH.toBools zip qs) foreach { case (s, q) => - q.io.deq.ready := s && in.r.fire() + q.deq.ready := s && in.r.fire() } // Feed response into matching Q - out.r.ready := Vec(qs.map(_.io.enq.ready))(enq_id) + out.r.ready := Vec(qs.map(_.enq.ready))(enq_id) (enq_OH.toBools zip qs) foreach { case (s, q) => - q.io.enq.valid := s && out.r.valid - q.io.enq.bits := out.r.bits + q.enq.valid := s && out.r.valid + q.enq.bits := out.r.bits } } } From 8ca6c109940bf51558e9dece3c3e945a58477bbf Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 23 Jun 2017 16:59:55 -0700 Subject: [PATCH 2/2] tilelink2: ToAXI4 can strip off low source ID bits Some TL converters place extra meta data in the low bits of source. Examples include the TLFragmenter and CacheCork. This new argument makes it possible to save AXI4 ID space by reclaiming those bits upon conversion. --- src/main/scala/uncore/tilelink2/ToAXI4.scala | 21 ++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/ToAXI4.scala b/src/main/scala/uncore/tilelink2/ToAXI4.scala index e90e8623..a3c36445 100644 --- a/src/main/scala/uncore/tilelink2/ToAXI4.scala +++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala @@ -10,17 +10,22 @@ import util.ElaborationArtefacts import uncore.axi4._ import scala.math.{min, max} -case class TLToAXI4Node(beatBytes: Int) extends MixedAdapterNode(TLImp, AXI4Imp)( +case class TLToAXI4Node(beatBytes: Int, stripBits: Int = 0) extends MixedAdapterNode(TLImp, AXI4Imp)( dFn = { p => + p.clients.foreach { c => + require (c.sourceId.start % (1 << stripBits) == 0 && + c.sourceId.end % (1 << stripBits) == 0, + "Cannot strip bits of aligned client ${c.name}: ${c.sourceId}") + } val clients = p.clients.sortWith(TLToAXI4.sortByType _) - val idSize = clients.map { c => if (c.requestFifo) 1 else c.sourceId.size } + val idSize = clients.map { c => if (c.requestFifo) 1 else (c.sourceId.size >> stripBits) } val idStart = idSize.scanLeft(0)(_+_).init val masters = ((idStart zip idSize) zip clients) map { case ((start, size), c) => AXI4MasterParameters( name = c.name, id = IdRange(start, start+size), aligned = true, - maxFlight = Some(if (c.requestFifo) c.sourceId.size else 1), + maxFlight = Some(if (c.requestFifo) c.sourceId.size else (1 << stripBits)), nodePath = c.nodePath) } AXI4MasterPortParameters( @@ -43,9 +48,9 @@ case class TLToAXI4Node(beatBytes: Int) extends MixedAdapterNode(TLImp, AXI4Imp) minLatency = p.minLatency) }) -class TLToAXI4(beatBytes: Int, combinational: Boolean = true, adapterName: Option[String] = None)(implicit p: Parameters) extends LazyModule +class TLToAXI4(beatBytes: Int, combinational: Boolean = true, adapterName: Option[String] = None, stripBits: Int = 0)(implicit p: Parameters) extends LazyModule { - val node = TLToAXI4Node(beatBytes) + val node = TLToAXI4Node(beatBytes, stripBits) lazy val module = new LazyModuleImp(this) { val io = new Bundle { @@ -71,7 +76,7 @@ class TLToAXI4(beatBytes: Int, combinational: Boolean = true, adapterName: Optio var idCount = Array.fill(edgeOut.master.endId) { 0 } val maps = (edgeIn.client.clients.sortWith(TLToAXI4.sortByType) zip edgeOut.master.masters) flatMap { case (c, m) => for (i <- 0 until c.sourceId.size) { - val id = m.id.start + (if (c.requestFifo) 0 else i) + val id = m.id.start + (if (c.requestFifo) 0 else (i >> stripBits)) sourceStall(c.sourceId.start + i) := idStall(id) sourceTable(c.sourceId.start + i) := UInt(id) idCount(id) = idCount(id) + 1 @@ -221,8 +226,8 @@ class TLToAXI4(beatBytes: Int, combinational: Boolean = true, adapterName: Optio object TLToAXI4 { // applied to the TL source node; y.node := TLToAXI4(beatBytes)(x.node) - def apply(beatBytes: Int, combinational: Boolean = true, adapterName: Option[String] = None)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { - val axi4 = LazyModule(new TLToAXI4(beatBytes, combinational, adapterName)) + def apply(beatBytes: Int, combinational: Boolean = true, adapterName: Option[String] = None, stripBits: Int = 0)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { + val axi4 = LazyModule(new TLToAXI4(beatBytes, combinational, adapterName, stripBits)) axi4.node := x axi4.node }