From 0fe625c52f120a060b00b08ce004d3a663c240a5 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 1 Jun 2017 14:59:53 -0700 Subject: [PATCH 1/4] diplomacy: improve PMA circuit QoR --- src/main/scala/rocket/TLB.scala | 2 +- src/main/scala/uncore/apb/Parameters.scala | 4 ---- src/main/scala/uncore/axi4/Fragmenter.scala | 10 ++++---- src/main/scala/uncore/axi4/Parameters.scala | 4 ---- .../uncore/tilelink2/AtomicAutomata.scala | 15 ++++-------- .../scala/uncore/tilelink2/Parameters.scala | 24 +++++++++++++------ 6 files changed, 29 insertions(+), 30 deletions(-) diff --git a/src/main/scala/rocket/TLB.scala b/src/main/scala/rocket/TLB.scala index 51903d48..43d64a80 100644 --- a/src/main/scala/rocket/TLB.scala +++ b/src/main/scala/rocket/TLB.scala @@ -110,7 +110,7 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters pmp.io.prv := Mux(Bool(usingVM) && (do_refill || io.req.bits.passthrough /* PTW */), PRV.S, priv) val legal_address = edge.manager.findSafe(mpu_physaddr).reduce(_||_) def fastCheck(member: TLManagerParameters => Boolean) = - legal_address && Mux1H(edge.manager.findFast(mpu_physaddr), edge.manager.managers.map(m => Bool(member(m)))) + legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => Bool(b)) val cacheable = fastCheck(_.supportsAcquireB) val prot_r = fastCheck(_.supportsGet) && pmp.io.r val prot_w = fastCheck(_.supportsPutFull) && pmp.io.w diff --git a/src/main/scala/uncore/apb/Parameters.scala b/src/main/scala/uncore/apb/Parameters.scala index a3cb4d72..9cde7576 100644 --- a/src/main/scala/uncore/apb/Parameters.scala +++ b/src/main/scala/uncore/apb/Parameters.scala @@ -33,10 +33,6 @@ case class APBSlavePortParameters( val maxAddress = slaves.map(_.maxAddress).max - lazy val routingMask = AddressDecoder(slaves.map(_.address)) - def findSafe(address: UInt) = Vec(slaves.map(_.address.map(_.contains(address)).reduce(_ || _))) - def findFast(address: UInt) = Vec(slaves.map(_.address.map(_.widen(~routingMask)).distinct.map(_.contains(address)).reduce(_ || _))) - // Require disjoint ranges for addresses slaves.combinations(2).foreach { case Seq(x,y) => x.address.foreach { a => y.address.foreach { b => diff --git a/src/main/scala/uncore/axi4/Fragmenter.scala b/src/main/scala/uncore/axi4/Fragmenter.scala index d29f553d..43aa38a9 100644 --- a/src/main/scala/uncore/axi4/Fragmenter.scala +++ b/src/main/scala/uncore/axi4/Fragmenter.scala @@ -72,9 +72,12 @@ class AXI4Fragmenter()(implicit p: Parameters) extends LazyModule val hi = addr >> lgBytes val alignment = hi(AXI4Parameters.lenBits-1,0) - val allSame = supportedSizes1.filter(_ >= 0).distinct.size <= 1 - val dynamic1 = Mux1H(slave.findFast(addr), supportedSizes1.map(s => UInt(max(0, s)))) - val fixed1 = UInt(supportedSizes1.filter(_ >= 0).headOption.getOrElse(0)) + // We don't care about illegal addresses; bursts or no bursts... whatever circuit is simpler (AXI4ToTL will fix it) + val sizes1 = (supportedSizes1 zip slave.slaves.map(_.address)).filter(_._1 >= 0).groupBy(_._1).mapValues(_.flatMap(_._2)) + val reductionMask = AddressDecoder(sizes1.values.toList) + val support1 = Mux1H(sizes1.toList.map { case (v, a) => // maximum supported size-1 based on target address + (AddressSet.unify(a.map(_.widen(~reductionMask)).distinct).map(_.contains(addr)).reduce(_||_), UInt(v)) + }) /* We need to compute the largest transfer allowed by the AXI len. * len+1 is the number of beats to execute. @@ -86,7 +89,6 @@ class AXI4Fragmenter()(implicit p: Parameters) extends LazyModule val wipeHigh = ~leftOR(~len) // clear all bits in position >= a cleared bit val remain1 = fillLow | wipeHigh // MSB(a.len+1)-1 val align1 = ~leftOR(alignment) // transfer size limited by address alignment - val support1 = if (allSame) fixed1 else dynamic1 // maximum supported size-1 based on target address val maxSupported1 = remain1 & align1 & support1 // Take the minimum of all the limits // Things that cause us to degenerate to a single beat diff --git a/src/main/scala/uncore/axi4/Parameters.scala b/src/main/scala/uncore/axi4/Parameters.scala index 6b4a0c2e..d913f878 100644 --- a/src/main/scala/uncore/axi4/Parameters.scala +++ b/src/main/scala/uncore/axi4/Parameters.scala @@ -49,10 +49,6 @@ case class AXI4SlavePortParameters( require (maxTransfer <= limit, s"maxTransfer ($maxTransfer) cannot be larger than $limit on a $beatBytes*8 width bus") - lazy val routingMask = AddressDecoder(slaves.map(_.address)) - def findSafe(address: UInt) = Vec(slaves.map(_.address.map(_.contains(address)).reduce(_ || _))) - def findFast(address: UInt) = Vec(slaves.map(_.address.map(_.widen(~routingMask)).distinct.map(_.contains(address)).reduce(_ || _))) - // Require disjoint ranges for addresses slaves.combinations(2).foreach { case Seq(x,y) => x.address.foreach { a => y.address.foreach { b => diff --git a/src/main/scala/uncore/tilelink2/AtomicAutomata.scala b/src/main/scala/uncore/tilelink2/AtomicAutomata.scala index 4c4dbd78..02c3b1a5 100644 --- a/src/main/scala/uncore/tilelink2/AtomicAutomata.scala +++ b/src/main/scala/uncore/tilelink2/AtomicAutomata.scala @@ -57,7 +57,7 @@ class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, conc // Don't overprovision the CAM val camSize = min(domainsNeedingHelp.size, concurrency) // Compact the fifoIds to only those we care about - val camFifoIds = managers.map(m => UInt(m.fifoId.map(id => max(0, domainsNeedingHelp.indexOf(id))).getOrElse(0))) + def camFifoId(m: TLManagerParameters) = m.fifoId.map(id => max(0, domainsNeedingHelp.indexOf(id))).getOrElse(0) // CAM entry state machine val FREE = UInt(0) // unused waiting on Atomic from A @@ -65,11 +65,6 @@ class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, conc val AMO = UInt(2) // AccessDataAck sent up D waiting for A availability val ACK = UInt(1) // Put sent down A waiting for PutAck from D - def helper(select: Seq[Bool], x: Seq[TransferSizes], lgSize: UInt) = - if (!passthrough) Bool(false) else - if (x.map(_ == x(0)).reduce(_ && _)) x(0).containsLg(lgSize) else - Mux1H(select, x.map(_.containsLg(lgSize))) - val params = TLAtomicAutomata.CAMParams(out.a.bits.params, domainsNeedingHelp.size) // Do we need to do anything at all? if (camSize > 0) { @@ -85,10 +80,10 @@ class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, conc val cam_dmatch = cam_s.map(e => e.state =/= FREE) // D should inspect these entries // Can the manager already handle this message? + val a_address = edgeIn.address(in.a.bits) val a_size = edgeIn.size(in.a.bits) - val a_select = edgeOut.manager.findFast(edgeIn.address(in.a.bits)) - val a_canLogical = helper(a_select, managers.map(_.supportsLogical), a_size) - val a_canArithmetic = helper(a_select, managers.map(_.supportsArithmetic), a_size) + val a_canLogical = Bool(passthrough) && edgeOut.manager.supportsLogicalFast (a_address, a_size) + val a_canArithmetic = Bool(passthrough) && edgeOut.manager.supportsArithmeticFast(a_address, a_size) val a_isLogical = in.a.bits.opcode === TLMessages.LogicalData val a_isArithmetic = in.a.bits.opcode === TLMessages.ArithmeticData val a_isSupported = Mux(a_isLogical, a_canLogical, Mux(a_isArithmetic, a_canArithmetic, Bool(true))) @@ -103,7 +98,7 @@ class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, conc val a_d = a_cam_d.data // Does the A request conflict with an inflight AMO? - val a_fifoId = Mux1H(a_select, camFifoIds) + val a_fifoId = edgeOut.manager.fastProperty(a_address, camFifoId _, (i:Int) => UInt(i)) val a_cam_busy = (cam_abusy zip cam_a.map(_.fifoId === a_fifoId)) map { case (a,b) => a&&b } reduce (_||_) // (Where) are we are allocating in the CAM? diff --git a/src/main/scala/uncore/tilelink2/Parameters.scala b/src/main/scala/uncore/tilelink2/Parameters.scala index aec47951..dae24e06 100644 --- a/src/main/scala/uncore/tilelink2/Parameters.scala +++ b/src/main/scala/uncore/tilelink2/Parameters.scala @@ -101,20 +101,30 @@ case class TLManagerPortParameters( val anySupportPutPartial = managers.map(!_.supportsPutPartial.none).reduce(_ || _) val anySupportHint = managers.map(!_.supportsHint.none) .reduce(_ || _) - // Which bits suffice to distinguish between all managers - lazy val routingMask = AddressDecoder(managers.map(_.address)) - // These return Option[TLManagerParameters] for your convenience def find(address: BigInt) = managers.find(_.address.exists(_.contains(address))) // The safe version will check the entire address def findSafe(address: UInt) = Vec(managers.map(_.address.map(_.contains(address)).reduce(_ || _))) - // The fast version assumes the address is valid - def findFast(address: UInt) = Vec(managers.map(_.address.map(_.widen(~routingMask)).distinct.map(_.contains(address)).reduce(_ || _))) + // The fast version assumes the address is valid (you probably want fastProperty instead of this function) + def findFast(address: UInt) = { + val routingMask = AddressDecoder(managers.map(_.address)) + Vec(managers.map(_.address.map(_.widen(~routingMask)).distinct.map(_.contains(address)).reduce(_ || _))) + } + + // Compute the simplest AddressSets that decide a key + def fastPropertyGroup[K](p: TLManagerParameters => K): Map[K, Seq[AddressSet]] = { + val groups = managers.map(m => (p(m), m.address)).groupBy(_._1).mapValues(_.flatMap(_._2)) + val reductionMask = AddressDecoder(groups.values.toList) + groups.mapValues(seq => AddressSet.unify(seq.map(_.widen(~reductionMask)).distinct)) + } + // Select a property + def fastProperty[K, D <: Data](address: UInt, p: TLManagerParameters => K, d: K => D): D = + Mux1H(fastPropertyGroup(p).map { case (v, a) => (a.map(_.contains(address)).reduce(_||_), d(v)) }) // Note: returns the actual fifoId + 1 or 0 if None - def findFifoIdFast(address: UInt) = Mux1H(findFast(address), managers.map(m => UInt(m.fifoId.map(_+1).getOrElse(0)))) - def hasFifoIdFast(address: UInt) = Mux1H(findFast(address), managers.map(m => Bool(m.fifoId.isDefined))) + def findFifoIdFast(address: UInt) = fastProperty(address, _.fifoId.map(_+1).getOrElse(0), (i:Int) => UInt(i)) + def hasFifoIdFast(address: UInt) = fastProperty(address, _.fifoId.isDefined, (b:Boolean) => Bool(b)) // Does this Port manage this ID/address? def containsSafe(address: UInt) = findSafe(address).reduce(_ || _) From 5994714970c7c0ee5687e6c4c6701b5ad4372bd7 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 1 Jun 2017 15:16:01 -0700 Subject: [PATCH 2/4] diplomacy: move manager unification to meta-data only Now that PMA circuits already perform address unification, there is no QoR gained by throwing away the true and complete diplomatic address+node information. Defer the unification to pretty printing the DTS address map only. --- src/main/scala/coreplex/CoreplexNetwork.scala | 6 ++++-- src/main/scala/uncore/tilelink2/Xbar.scala | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/scala/coreplex/CoreplexNetwork.scala b/src/main/scala/coreplex/CoreplexNetwork.scala index 1da55214..e14858ae 100644 --- a/src/main/scala/coreplex/CoreplexNetwork.scala +++ b/src/main/scala/coreplex/CoreplexNetwork.scala @@ -82,8 +82,10 @@ trait CoreplexNetwork extends HasCoreplexParameters { } } + // Make topManagers an Option[] so as to avoid LM name reflection evaluating it... + lazy val topManagers = Some(ManagerUnification(l1tol2.node.edgesIn.headOption.map(_.manager.managers).getOrElse(Nil))) ResourceBinding { - val managers = l1tol2.node.edgesIn.headOption.map(_.manager.managers).getOrElse(Nil) + val managers = topManagers.get val max = managers.flatMap(_.address).map(_.max).max val width = ResourceInt((log2Ceil(max)+31) / 32) Resource(root, "width").bind(width) @@ -113,7 +115,7 @@ trait CoreplexNetworkModule extends HasCoreplexParameters { val io: CoreplexNetworkBundle println("Generated Address Map") - val ranges = outer.l1tol2.node.edgesIn(0).manager.managers.flatMap { manager => + val ranges = outer.topManagers.get.flatMap { manager => val prot = (if (manager.supportsGet) "R" else "") + (if (manager.supportsPutFull) "W" else "") + (if (manager.executable) "X" else "") + diff --git a/src/main/scala/uncore/tilelink2/Xbar.scala b/src/main/scala/uncore/tilelink2/Xbar.scala index f59c6ba5..baa3a969 100644 --- a/src/main/scala/uncore/tilelink2/Xbar.scala +++ b/src/main/scala/uncore/tilelink2/Xbar.scala @@ -57,14 +57,14 @@ class TLXbar(policy: TLArbiter.Policy = TLArbiter.lowestIndexFirst)(implicit p: seq(0).copy( minLatency = seq.map(_.minLatency).min, endSinkId = outputIdRanges.map(_.map(_.end).getOrElse(0)).max, - managers = ManagerUnification(seq.flatMap { port => + managers = seq.flatMap { port => require (port.beatBytes == seq(0).beatBytes, s"Xbar data widths don't match: ${port.managers.map(_.name)} has ${port.beatBytes}B vs ${seq(0).managers.map(_.name)} has ${seq(0).beatBytes}B") val fifoIdMapper = fifoIdFactory() port.managers map { manager => manager.copy( fifoId = manager.fifoId.map(fifoIdMapper(_)) )} - }) + } ) }) From 1f531b1593a4eefc86bc3682a8aaddadefe81390 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 1 Jun 2017 15:26:04 -0700 Subject: [PATCH 3/4] tilelink2: improve round robin arbiter QoR --- src/main/scala/uncore/tilelink2/Arbiter.scala | 4 ++-- src/main/scala/uncore/tilelink2/package.scala | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/Arbiter.scala b/src/main/scala/uncore/tilelink2/Arbiter.scala index 0c020937..a9a56009 100644 --- a/src/main/scala/uncore/tilelink2/Arbiter.scala +++ b/src/main/scala/uncore/tilelink2/Arbiter.scala @@ -13,12 +13,12 @@ object TLArbiter val lowestIndexFirst: Policy = (width, valids, select) => ~(leftOR(valids) << 1)(width-1, 0) - val roundRobin: Policy = (width, valids, select) => { + val roundRobin: Policy = (width, valids, select) => if (width == 1) UInt(1, width=1) else { val valid = valids(width-1, 0) assert (valid === valids) val mask = RegInit(~UInt(0, width=width)) val filter = Cat(valid & ~mask, valid) - val unready = (rightOR(filter, width*2) >> 1) | (mask << width) // last right shift unneeded + val unready = (rightOR(filter, width*2, width) >> 1) | (mask << width) val readys = ~((unready >> width) & unready(width-1, 0)) when (select && valid.orR) { mask := leftOR(readys & valid, width) diff --git a/src/main/scala/uncore/tilelink2/package.scala b/src/main/scala/uncore/tilelink2/package.scala index 7370b463..3a6f65ca 100644 --- a/src/main/scala/uncore/tilelink2/package.scala +++ b/src/main/scala/uncore/tilelink2/package.scala @@ -5,6 +5,7 @@ package uncore import Chisel._ import diplomacy._ import util._ +import scala.math.min package object tilelink2 { @@ -19,18 +20,20 @@ package object tilelink2 def UIntToOH1(x: UInt, width: Int) = ~(SInt(-1, width=width).asUInt << x)(width-1, 0) def trailingZeros(x: Int) = if (x > 0) Some(log2Ceil(x & -x)) else None // Fill 1s from low bits to high bits - def leftOR(x: UInt): UInt = leftOR(x, x.getWidth) - def leftOR(x: UInt, w: Integer): UInt = { + def leftOR(x: UInt): UInt = leftOR(x, x.getWidth, x.getWidth) + def leftOR(x: UInt, width: Integer, cap: Integer = 999999): UInt = { + val stop = min(width, cap) def helper(s: Int, x: UInt): UInt = - if (s >= w) x else helper(s+s, x | (x << s)(w-1,0)) - helper(1, x)(w-1, 0) + if (s >= stop) x else helper(s+s, x | (x << s)(width-1,0)) + helper(1, x)(width-1, 0) } // Fill 1s form high bits to low bits - def rightOR(x: UInt): UInt = rightOR(x, x.getWidth) - def rightOR(x: UInt, w: Integer): UInt = { + def rightOR(x: UInt): UInt = rightOR(x, x.getWidth, x.getWidth) + def rightOR(x: UInt, width: Integer, cap: Integer = 999999): UInt = { + val stop = min(width, cap) def helper(s: Int, x: UInt): UInt = - if (s >= w) x else helper(s+s, x | (x >> s)) - helper(1, x)(w-1, 0) + if (s >= stop) x else helper(s+s, x | (x >> s)) + helper(1, x)(width-1, 0) } // This gets used everywhere, so make the smallest circuit possible ... // Given an address and size, create a mask of beatBytes size From eb14329c63deedfb13adc56c9dd16a62ea6921ff Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 1 Jun 2017 15:29:45 -0700 Subject: [PATCH 4/4] tilelink2: only combine managers of the same resources --- src/main/scala/uncore/tilelink2/Parameters.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/Parameters.scala b/src/main/scala/uncore/tilelink2/Parameters.scala index dae24e06..f10f098e 100644 --- a/src/main/scala/uncore/tilelink2/Parameters.scala +++ b/src/main/scala/uncore/tilelink2/Parameters.scala @@ -355,9 +355,9 @@ object ManagerUnification def apply(managers: Seq[TLManagerParameters]) = { // To be unified, devices must agree on all of these terms case class TLManagerKey( + resources: Seq[Resource], regionType: RegionType.T, executable: Boolean, - lastNode: BaseNode, supportsAcquireT: TransferSizes, supportsAcquireB: TransferSizes, supportsArithmetic: TransferSizes, @@ -367,9 +367,9 @@ object ManagerUnification supportsPutPartial: TransferSizes, supportsHint: TransferSizes) def key(x: TLManagerParameters) = TLManagerKey( + resources = x.resources, regionType = x.regionType, executable = x.executable, - lastNode = x.nodePath.last, supportsAcquireT = x.supportsAcquireT, supportsAcquireB = x.supportsAcquireB, supportsArithmetic = x.supportsArithmetic,