Merge pull request #536 from ucb-bar/diplomacy-star-nodes

diplomacy: add :*= and :=* to support flexible # of edges
2017-01-30 11:19:33 -08:00
parent 03f2fe02ac f7f52cc722
commit b567a2a356
42 changed files with 1948 additions and 1895 deletions
--- a/src/main/scala/coreplex/Coreplex.scala
+++ b/src/main/scala/coreplex/Coreplex.scala
@ -11,17 +11,14 @@ import util._
 class DefaultCoreplex(implicit p: Parameters) extends BaseCoreplex
    with CoreplexRISCVPlatform
    with HasL2MasterPort
    with HasRocketTiles {
  override lazy val module = new DefaultCoreplexModule(this, () => new DefaultCoreplexBundle(this))
 }
 class DefaultCoreplexBundle[+L <: DefaultCoreplex](_outer: L) extends BaseCoreplexBundle(_outer)
    with CoreplexRISCVPlatformBundle
    with HasL2MasterPortBundle
    with HasRocketTilesBundle
 class DefaultCoreplexModule[+L <: DefaultCoreplex, +B <: DefaultCoreplexBundle[L]](_outer: L, _io: () => B) extends BaseCoreplexModule(_outer, _io)
    with CoreplexRISCVPlatformModule
    with HasL2MasterPortModule
    with HasRocketTilesModule
--- a/src/main/scala/coreplex/CoreplexNetwork.scala
+++ b/src/main/scala/coreplex/CoreplexNetwork.scala
@ -24,9 +24,13 @@ trait CoreplexNetwork extends HasCoreplexParameters {
  val mmio = TLOutputNode()
  val mmioInt = IntInputNode()
  val l2in = TLInputNode()
  intBar.intnode := mmioInt
  // Allows a variable number of inputs from outside to the Xbar
  l1tol2.node :=* l2in
  cbus.node :=
    TLBuffer()(
    TLAtomicAutomata(arithmetic = true)( // disable once TLB uses TL2 metadata
@ -43,6 +47,7 @@ trait CoreplexNetworkBundle extends HasCoreplexParameters {
  val mmio = outer.mmio.bundleOut
  val interrupts = outer.mmioInt.bundleIn
  val l2in = outer.l2in.bundleIn
 }
 trait CoreplexNetworkModule extends HasCoreplexParameters {
@ -93,21 +98,3 @@ trait BankedL2CoherenceManagersModule extends CoreplexNetworkModule {
  val outer: BankedL2CoherenceManagers
  val io: BankedL2CoherenceManagersBundle
 }
 /////
 trait HasL2MasterPort extends CoreplexNetwork {
  val module: HasL2MasterPortModule
  val l2in = TLInputNode()
  l1tol2.node := TLBuffer()(l2in)
 }
 trait HasL2MasterPortBundle extends CoreplexNetworkBundle {
  val outer: HasL2MasterPort
  val l2in = outer.l2in.bundleIn
 }
 trait HasL2MasterPortModule extends CoreplexNetworkModule {
  val outer: HasL2MasterPort
  val io: HasL2MasterPortBundle
 }
--- a/src/main/scala/diplomacy/Nodes.scala
+++ b/src/main/scala/diplomacy/Nodes.scala
@ -16,7 +16,9 @@ trait InwardNodeImp[DI, UI, EI, BI <: Data]
  def edgeI(pd: DI, pu: UI): EI
  def bundleI(ei: Seq[EI]): Vec[BI]
  def colour: String
-  def connect(bo: => BI, bi: => BI, e: => EI)(implicit p: Parameters, sourceInfo: SourceInfo): (Option[LazyModule], () => Unit)
+  def connect(bindings: () => Seq[(EI, BI, BI)])(implicit p: Parameters, sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) = {
    (None, () => bindings().foreach { case (_, i, o) => i <> o })
  }
  // optional methods to track node graph
  def mixI(pu: UI, node: InwardNode[DI, UI, BI]): UI = pu // insert node into parameters
@ -71,8 +73,17 @@ trait InwardNodeHandle[DI, UI, BI <: Data]
  val inward: InwardNode[DI, UI, BI]
  def := (h: OutwardNodeHandle[DI, UI, BI])(implicit p: Parameters, sourceInfo: SourceInfo): Option[LazyModule] =
    inward.:=(h)(p, sourceInfo)
  def :*= (h: OutwardNodeHandle[DI, UI, BI])(implicit p: Parameters, sourceInfo: SourceInfo): Option[LazyModule] =
    inward.:*=(h)(p, sourceInfo)
  def :=* (h: OutwardNodeHandle[DI, UI, BI])(implicit p: Parameters, sourceInfo: SourceInfo): Option[LazyModule] =
    inward.:=*(h)(p, sourceInfo)
 }
 sealed trait NodeBinding
 case object BIND_ONCE  extends NodeBinding
 case object BIND_QUERY extends NodeBinding
 case object BIND_STAR  extends NodeBinding
 trait InwardNode[DI, UI, BI <: Data] extends BaseNode with InwardNodeHandle[DI, UI, BI]
 {
  val inward = this
@ -81,21 +92,22 @@ trait InwardNode[DI, UI, BI <: Data] extends BaseNode with InwardNodeHandle[DI,
  require (!numPI.isEmpty, s"No number of inputs would be acceptable to ${name}${lazyModule.line}")
  require (numPI.start >= 0, s"${name} accepts a negative number of inputs${lazyModule.line}")
-  private val accPI = ListBuffer[(Int, OutwardNode[DI, UI, BI])]()
+  private val accPI = ListBuffer[(Int, OutwardNode[DI, UI, BI], NodeBinding)]()
  private var iRealized = false
  protected[diplomacy] def iPushed = accPI.size
-  protected[diplomacy] def iPush(index: Int, node: OutwardNode[DI, UI, BI])(implicit sourceInfo: SourceInfo) {
+  protected[diplomacy] def iPush(index: Int, node: OutwardNode[DI, UI, BI], binding: NodeBinding)(implicit sourceInfo: SourceInfo) {
    val info = sourceLine(sourceInfo, " at ", "")
    val noIs = numPI.size == 1 && numPI.contains(0)
    require (!noIs, s"${name}${lazyModule.line} was incorrectly connected as a sink" + info)
    require (!iRealized, s"${name}${lazyModule.line} was incorrectly connected as a sink after it's .module was used" + info)
-    accPI += ((index, node))
+    accPI += ((index, node, binding))
  }
-  private def reqI() = require(numPI.contains(accPI.size), s"${name} has ${accPI.size} inputs, expected ${numPI}${lazyModule.line}")
+  protected[diplomacy] lazy val iBindings = { iRealized = true; accPI.result() }
  protected[diplomacy] lazy val iPorts = { iRealized = true; reqI(); accPI.result() }
  protected[diplomacy] val iStar: Int
  protected[diplomacy] val iPortMapping: Seq[(Int, Int)]
  protected[diplomacy] val iParams: Seq[UI]
  val bundleIn: Vec[BI]
 }
@ -113,48 +125,84 @@ trait OutwardNode[DO, UO, BO <: Data] extends BaseNode with OutwardNodeHandle[DO
  require (!numPO.isEmpty, s"No number of outputs would be acceptable to ${name}${lazyModule.line}")
  require (numPO.start >= 0, s"${name} accepts a negative number of outputs${lazyModule.line}")
-  private val accPO = ListBuffer[(Int, InwardNode [DO, UO, BO])]()
+  private val accPO = ListBuffer[(Int, InwardNode [DO, UO, BO], NodeBinding)]()
  private var oRealized = false
  protected[diplomacy] def oPushed = accPO.size
-  protected[diplomacy] def oPush(index: Int, node: InwardNode [DO, UO, BO])(implicit sourceInfo: SourceInfo) {
+  protected[diplomacy] def oPush(index: Int, node: InwardNode [DO, UO, BO], binding: NodeBinding)(implicit sourceInfo: SourceInfo) {
    val info = sourceLine(sourceInfo, " at ", "")
    val noOs = numPO.size == 1 && numPO.contains(0)
    require (!noOs, s"${name}${lazyModule.line} was incorrectly connected as a source" + info)
    require (!oRealized, s"${name}${lazyModule.line} was incorrectly connected as a source after it's .module was used" + info)
-    accPO += ((index, node))
+    accPO += ((index, node, binding))
  }
-  private def reqO() = require(numPO.contains(accPO.size), s"${name} has ${accPO.size} outputs, expected ${numPO}${lazyModule.line}")
+  protected[diplomacy] lazy val oBindings = { oRealized = true; accPO.result() }
  protected[diplomacy] lazy val oPorts = { oRealized = true; reqO(); accPO.result() }
  protected[diplomacy] val oStar: Int
  protected[diplomacy] val oPortMapping: Seq[(Int, Int)]
  protected[diplomacy] val oParams: Seq[DO]
  val bundleOut: Vec[BO]
 }
-class MixedNode[DI, UI, EI, BI <: Data, DO, UO, EO, BO <: Data](
+abstract class MixedNode[DI, UI, EI, BI <: Data, DO, UO, EO, BO <: Data](
  inner: InwardNodeImp [DI, UI, EI, BI],
  outer: OutwardNodeImp[DO, UO, EO, BO])(
  private val dFn: (Int, Seq[DI]) => Seq[DO],
  private val uFn: (Int, Seq[UO]) => Seq[UI],
  protected[diplomacy] val numPO: Range.Inclusive,
  protected[diplomacy] val numPI: Range.Inclusive)
  extends BaseNode with InwardNode[DI, UI, BI] with OutwardNode[DO, UO, BO]
 {
-  // meta-data for printing the node graph
+  protected[diplomacy] def resolveStarO(i: Int, o: Int): Int
-  protected[diplomacy] def colour  = inner.colour
+  protected[diplomacy] def resolveStarI(i: Int, o: Int): Int
-  protected[diplomacy] def outputs = oPorts.map(_._2) zip edgesOut.map(e => outer.labelO(e))
+  protected[diplomacy] def mapParamsD(n: Int, p: Seq[DI]): Seq[DO]
-  protected[diplomacy] def inputs  = iPorts.map(_._2) zip edgesIn .map(e => inner.labelI(e))
+  protected[diplomacy] def mapParamsU(n: Int, p: Seq[UO]): Seq[UI]
  protected[diplomacy] lazy val (oPortMapping, iPortMapping, oStar, iStar) = {
    val oStars = oBindings.filter { case (_,_,b) => b == BIND_STAR }.size
    val iStars = iBindings.filter { case (_,_,b) => b == BIND_STAR }.size
    require (oStars + iStars <= 1, s"${name} appears beside a :*= ${iStars} times and a :=* ${oStars} times; at most once is allowed${lazyModule.line}")
    val oKnown = oBindings.map { case (_, n, b) => b match {
      case BIND_ONCE  => 1
      case BIND_QUERY => n.iStar
      case BIND_STAR  => 0 }}.foldLeft(0)(_+_)
    val iKnown = iBindings.map { case (_, n, b) => b match {
      case BIND_ONCE  => 1
      case BIND_QUERY => n.oStar
      case BIND_STAR  => 0 }}.foldLeft(0)(_+_)
    val oStar = if (oStars > 0) resolveStarO(iKnown, oKnown) else 0
    val iStar = if (iStars > 0) resolveStarI(iKnown, oKnown) else 0
    val oSum = oBindings.map { case (_, n, b) => b match {
      case BIND_ONCE  => 1
      case BIND_QUERY => n.iStar
      case BIND_STAR  => oStar }}.scanLeft(0)(_+_)
    val iSum = iBindings.map { case (_, n, b) => b match {
      case BIND_ONCE  => 1
      case BIND_QUERY => n.oStar
      case BIND_STAR  => iStar }}.scanLeft(0)(_+_)
    val oTotal = oSum.lastOption.getOrElse(0)
    val iTotal = iSum.lastOption.getOrElse(0)
    require(numPO.contains(oTotal), s"${name} has ${oTotal} outputs, expected ${numPO}${lazyModule.line}")
    require(numPI.contains(iTotal), s"${name} has ${iTotal} inputs, expected ${numPI}${lazyModule.line}")
    (oSum.init zip oSum.tail, iSum.init zip iSum.tail, oStar, iStar)
  }
  lazy val oPorts = oBindings.flatMap { case (i, n, _) =>
    val (start, end) = n.iPortMapping(i)
    (start until end) map { j => (j, n) }
  }
  lazy val iPorts = iBindings.flatMap { case (i, n, _) =>
    val (start, end) = n.oPortMapping(i)
    (start until end) map { j => (j, n) }
  }
  private def reqE(o: Int, i: Int) = require(i == o, s"${name} has ${i} inputs and ${o} outputs; they must match${lazyModule.line}")
  protected[diplomacy] lazy val oParams: Seq[DO] = {
-    val o = dFn(oPorts.size, iPorts.map { case (i, n) => n.oParams(i) })
+    val o = mapParamsD(oPorts.size, iPorts.map { case (i, n) => n.oParams(i) })
-    reqE(oPorts.size, o.size)
+    require (o.size == oPorts.size, s"Bug in diplomacy; ${name} has ${o.size} != ${oPorts.size} down/up outer parameters${lazyModule.line}")
    o.map(outer.mixO(_, this))
  }
  protected[diplomacy] lazy val iParams: Seq[UI] = {
-    val i = uFn(iPorts.size, oPorts.map { case (o, n) => n.iParams(o) })
+    val i = mapParamsU(iPorts.size, oPorts.map { case (o, n) => n.iParams(o) })
-    reqE(i.size, iPorts.size)
+    require (i.size == iPorts.size, s"Bug in diplomacy; ${name} has ${i.size} != ${iPorts.size} up/down inner parameters${lazyModule.line}")
    i.map(inner.mixI(_, this))
  }
@ -175,87 +223,174 @@ class MixedNode[DI, UI, EI, BI <: Data, DO, UO, EO, BO <: Data](
  lazy val bundleIn  = wireI(flipI(inner.bundleI(edgesIn)))
  // connects the outward part of a node with the inward part of this node
-  override def := (h: OutwardNodeHandle[DI, UI, BI])(implicit p: Parameters, sourceInfo: SourceInfo): Option[LazyModule] = {
+  private def bind(h: OutwardNodeHandle[DI, UI, BI], binding: NodeBinding)(implicit p: Parameters, sourceInfo: SourceInfo): Option[LazyModule] = {
    val x = this // x := y
    val y = h.outward
    val info = sourceLine(sourceInfo, " at ", "")
    require (!LazyModule.stack.isEmpty, s"${y.name} cannot be connected to ${x.name} outside of LazyModule scope" + info)
    val i = x.iPushed
    val o = y.oPushed
-    y.oPush(i, x)
+    y.oPush(i, x, binding match {
-    x.iPush(o, y)
+      case BIND_ONCE  => BIND_ONCE
-    val (out, binding) = inner.connect(y.bundleOut(o), x.bundleIn(i), x.edgesIn(i))
+      case BIND_STAR  => BIND_QUERY
-    LazyModule.stack.head.bindings = binding :: LazyModule.stack.head.bindings
+      case BIND_QUERY => BIND_STAR })
    x.iPush(o, y, binding)
    def bindings() = {
      val (iStart, iEnd) = x.iPortMapping(i)
      val (oStart, oEnd) = y.oPortMapping(o)
      require (iEnd - iStart == oEnd - oStart, s"Bug in diplomacy; ${iEnd-iStart} != ${oEnd-oStart} means port resolution failed")
      Seq.tabulate(iEnd - iStart) { j =>
        (x.edgesIn(iStart+j), x.bundleIn(iStart+j), y.bundleOut(oStart+j))
      }
    }
    val (out, newbinding) = inner.connect(bindings _)
    LazyModule.stack.head.bindings = newbinding :: LazyModule.stack.head.bindings
    out
  }
  override def :=  (h: OutwardNodeHandle[DI, UI, BI])(implicit p: Parameters, sourceInfo: SourceInfo): Option[LazyModule] = bind(h, BIND_ONCE)
  override def :*= (h: OutwardNodeHandle[DI, UI, BI])(implicit p: Parameters, sourceInfo: SourceInfo): Option[LazyModule] = bind(h, BIND_STAR)
  override def :=* (h: OutwardNodeHandle[DI, UI, BI])(implicit p: Parameters, sourceInfo: SourceInfo): Option[LazyModule] = bind(h, BIND_QUERY)
  // meta-data for printing the node graph
  protected[diplomacy] def colour  = inner.colour
  protected[diplomacy] def outputs = oPorts.map(_._2) zip edgesOut.map(e => outer.labelO(e))
  protected[diplomacy] def inputs  = iPorts.map(_._2) zip edgesIn .map(e => inner.labelI(e))
 }
 class MixedAdapterNode[DI, UI, EI, BI <: Data, DO, UO, EO, BO <: Data](
  inner: InwardNodeImp [DI, UI, EI, BI],
  outer: OutwardNodeImp[DO, UO, EO, BO])(
  dFn: DI => DO,
  uFn: UO => UI,
  num: Range.Inclusive = 0 to 999)
  extends MixedNode(inner, outer)(num, num)
 {
  protected[diplomacy] def resolveStarO(i: Int, o: Int): Int = {
    require (i >= o, s"${name} has ${o} outputs and ${i} inputs; cannot assign ${i-o} edges to resolve :=*${lazyModule.line}")
    i - o
  }
  protected[diplomacy] def resolveStarI(i: Int, o: Int): Int = {
    require (o >= i, s"${name} has ${o} outputs and ${i} inputs; cannot assign ${o-i} edges to resolve :*=${lazyModule.line}")
    o - i
  }
  protected[diplomacy] def mapParamsD(n: Int, p: Seq[DI]): Seq[DO] = {
    require(n == p.size, s"${name} has ${p.size} inputs and ${n} outputs; they must match${lazyModule.line}")
    p.map(dFn)
  }
  protected[diplomacy] def mapParamsU(n: Int, p: Seq[UO]): Seq[UI] = {
    require(n == p.size, s"${name} has ${n} inputs and ${p.size} outputs; they must match${lazyModule.line}")
    p.map(uFn)
  }
 }
-class SimpleNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B])(
+class MixedNexusNode[DI, UI, EI, BI <: Data, DO, UO, EO, BO <: Data](
-  oFn: (Int, Seq[D]) => Seq[D],
+  inner: InwardNodeImp [DI, UI, EI, BI],
-  iFn: (Int, Seq[U]) => Seq[U],
+  outer: OutwardNodeImp[DO, UO, EO, BO])(
-  numPO: Range.Inclusive,
+  dFn: Seq[DI] => DO,
-  numPI: Range.Inclusive)
+  uFn: Seq[UO] => UI,
-    extends MixedNode[D, U, EI, B, D, U, EO, B](imp, imp)(oFn, iFn, numPO, numPI)
+  numPO: Range.Inclusive = 1 to 999,
  numPI: Range.Inclusive = 1 to 999)
  extends MixedNode(inner, outer)(numPO, numPI)
 {
  require (numPO.end >= 1, s"${name} does not accept outputs${lazyModule.line}")
  require (numPI.end >= 1, s"${name} does not accept inputs${lazyModule.line}")
  protected[diplomacy] def resolveStarO(i: Int, o: Int): Int = {
    require (false, "${name} cannot resolve :=*${lazyModule.line}")
    0
  }
  protected[diplomacy] def resolveStarI(i: Int, o: Int): Int = {
    require (false, s"${name} cannot resolve :*=${lazyModule.line}")
    0
  }
  protected[diplomacy] def mapParamsD(n: Int, p: Seq[DI]): Seq[DO] = Seq.fill(n) { dFn(p) }
  protected[diplomacy] def mapParamsU(n: Int, p: Seq[UO]): Seq[UI] = Seq.fill(n) { uFn(p) }
 }
-class IdentityNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B])
+class AdapterNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B])(
-  extends SimpleNode(imp)({case (_, s) => s}, {case (_, s) => s}, 0 to 999, 0 to 999)
+  dFn: D => D,
  uFn: U => U,
  num: Range.Inclusive = 0 to 999)
    extends MixedAdapterNode[D, U, EI, B, D, U, EO, B](imp, imp)(dFn, uFn, num)
-class OutputNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B]) extends IdentityNode(imp)
+class NexusNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B])(
  dFn: Seq[D] => D,
  uFn: Seq[U] => U,
  numPO: Range.Inclusive = 1 to 999,
  numPI: Range.Inclusive = 1 to 999)
    extends MixedNexusNode[D, U, EI, B, D, U, EO, B](imp, imp)(dFn, uFn, numPO, numPI)
 class IdentityNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B])
  extends AdapterNode(imp)({s => s}, {s => s})
 class OutputNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B]) extends IdentityNode(imp)
 {
  override lazy val bundleIn = bundleOut
 }
-class InputNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B]) extends IdentityNode(imp)
+class InputNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B]) extends IdentityNode(imp)
 {
  override lazy val bundleOut = bundleIn
 }
-class SourceNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B])(po: PO, num: Range.Inclusive = 1 to 1)
+class SourceNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B])(po: Seq[D])
-  extends SimpleNode(imp)({case (n, Seq()) => Seq.fill(n)(po)}, {case (0, _) => Seq()}, num, 0 to 0)
+  extends MixedNode(imp, imp)(po.size to po.size, 0 to 0)
 {
-  require (num.end >= 1, s"${name} is a source which does not accept outputs${lazyModule.line}")
+  protected[diplomacy] def resolveStarO(i: Int, o: Int): Int = {
    require (po.size >= o, s"${name} has ${o} outputs out of ${po.size}; cannot assign ${po.size - o} edges to resolve :=*${lazyModule.line}")
    po.size - o
  }
  protected[diplomacy] def resolveStarI(i: Int, o: Int): Int = {
    require (false, s"${name} cannot resolve :*=${lazyModule.line}")
    0
  }
  protected[diplomacy] def mapParamsD(n: Int, p: Seq[D]): Seq[D] = po
  protected[diplomacy] def mapParamsU(n: Int, p: Seq[U]): Seq[U] = Seq()
  override lazy val bundleIn = { require(false, s"${name} has no bundleIn; try bundleOut?"); bundleOut }
 }
-class SinkNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B])(pi: PI, num: Range.Inclusive = 1 to 1)
+class SinkNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B])(pi: Seq[U])
-  extends SimpleNode(imp)({case (0, _) => Seq()}, {case (n, Seq()) => Seq.fill(n)(pi)}, 0 to 0, num)
+  extends MixedNode(imp, imp)(0 to 0, pi.size to pi.size)
 {
-  require (num.end >= 1, s"${name} is a sink which does not accept inputs${lazyModule.line}")
+  protected[diplomacy] def resolveStarO(i: Int, o: Int): Int = {
    require (false, s"${name} cannot resolve :=*${lazyModule.line}")
    0
  }
  protected[diplomacy] def resolveStarI(i: Int, o: Int): Int = {
    require (pi.size >= i, s"${name} has ${i} inputs out of ${pi.size}; cannot assign ${pi.size - i} edges to resolve :*=${lazyModule.line}")
    pi.size - i
  }
  protected[diplomacy] def mapParamsD(n: Int, p: Seq[D]): Seq[D] = Seq()
  protected[diplomacy] def mapParamsU(n: Int, p: Seq[U]): Seq[U] = pi
  override lazy val bundleOut = { require(false, s"${name} has no bundleOut; try bundleIn?"); bundleIn }
 }
-class BlindOutputNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B])(pi: Seq[PI])
+class BlindOutputNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B])(pi: Seq[U])
-  extends SimpleNode(imp)({case (0, _) => Seq()}, {case (_, Seq()) => pi}, 0 to 0, pi.size to pi.size)
+  extends SinkNode(imp)(pi)
 {
  override val flip = true
  override lazy val bundleOut = bundleIn
 }
-class BlindInputNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B])(po: Seq[PO])
+class BlindInputNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B])(po: Seq[D])
-  extends SimpleNode(imp)({case (_, Seq()) => po}, {case (0, _) => Seq()}, po.size to po.size, 0 to 0)
+  extends SourceNode(imp)(po)
 {
  override val flip = true
  override lazy val bundleIn = bundleOut
 }
-class InternalOutputNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B])(pi: Seq[PI])
+class InternalOutputNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B])(pi: Seq[U])
-  extends SimpleNode(imp)({case (0, _) => Seq()}, {case (_, Seq()) => pi}, 0 to 0, pi.size to pi.size)
+  extends SinkNode(imp)(pi)
 {
  override val wire = true
  override lazy val bundleOut = bundleIn
 }
-class InternalInputNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B])(po: Seq[PO])
+class InternalInputNode[D, U, EO, EI, B <: Data](imp: NodeImp[D, U, EO, EI, B])(po: Seq[D])
-  extends SimpleNode(imp)({case (_, Seq()) => po}, {case (0, _) => Seq()}, po.size to po.size, 0 to 0)
+  extends SourceNode(imp)(po)
 {
  override val wire = true
  override lazy val bundleIn = bundleOut
 }
 class InteriorNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B])
  (oFn: Seq[PO] => PO, iFn: Seq[PI] => PI, numPO: Range.Inclusive, numPI: Range.Inclusive)
  extends SimpleNode(imp)({case (n,s) => Seq.fill(n)(oFn(s))}, {case (n,s) => Seq.fill(n)(iFn(s))}, numPO, numPI)
 {
  require (numPO.end >= 1, s"${name} is an adapter which does not accept outputs${lazyModule.line}")
  require (numPI.end >= 1, s"${name} is an adapter which does not accept inputs${lazyModule.line}")
 }
--- a/src/main/scala/rocket/ScratchpadSlavePort.scala
+++ b/src/main/scala/rocket/ScratchpadSlavePort.scala
@ -13,7 +13,7 @@ import uncore.util._
 class ScratchpadSlavePort(implicit p: Parameters) extends LazyModule {
  val coreDataBytes = p(XLen)/8
-  val node = TLManagerNode(TLManagerPortParameters(
+  val node = TLManagerNode(Seq(TLManagerPortParameters(
    Seq(TLManagerParameters(
      address            = List(AddressSet(0x80000000L, BigInt(p(DataScratchpadSize)-1))),
      regionType         = RegionType.UNCACHED,
@ -25,7 +25,7 @@ class ScratchpadSlavePort(implicit p: Parameters) extends LazyModule {
      supportsGet        = TransferSizes(1, coreDataBytes),
      fifoId             = Some(0))), // requests handled in FIFO order
    beatBytes = coreDataBytes,
-    minLatency = 1))
+    minLatency = 1)))
  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
--- a/src/main/scala/rocketchip/BaseTop.scala
+++ b/src/main/scala/rocketchip/BaseTop.scala
@ -34,6 +34,7 @@ trait TopNetwork extends HasPeripheryParameters {
  val socBus = LazyModule(new TLXbar)
  val peripheryBus = LazyModule(new TLXbar)
  val intBus = LazyModule(new IntXbar)
  val l2 = LazyModule(new TLBuffer)
  peripheryBus.node :=
    TLBuffer()(
@ -62,13 +63,3 @@ class BaseTopBundle[+L <: BaseTop](_outer: L) extends BareTopBundle(_outer)
 class BaseTopModule[+L <: BaseTop, +B <: BaseTopBundle[L]](_outer: L, _io: () => B) extends BareTopModule(_outer, _io)
    with TopNetworkModule
 trait L2Crossbar extends TopNetwork {
  val l2 = LazyModule(new TLXbar)
 }
 trait L2CrossbarBundle extends TopNetworkBundle {
 }
 trait L2CrossbarModule extends TopNetworkModule {
 }
--- a/src/main/scala/rocketchip/Periphery.scala
+++ b/src/main/scala/rocketchip/Periphery.scala
@ -165,7 +165,7 @@ trait PeripheryMasterAXI4MMIOModule {
 /////
 // PeripherySlaveAXI4 is an example, make your own cake pattern like this one.
-trait PeripherySlaveAXI4 extends L2Crossbar {
+trait PeripherySlaveAXI4 extends TopNetwork {
  private val config = p(ExtIn)
  val l2_axi4 = AXI4BlindInputNode(Seq(AXI4MasterPortParameters(
    masters = Seq(AXI4MasterParameters(
@ -179,12 +179,12 @@ trait PeripherySlaveAXI4 extends L2Crossbar {
    l2_axi4))))
 }
-trait PeripherySlaveAXI4Bundle extends L2CrossbarBundle {
+trait PeripherySlaveAXI4Bundle extends TopNetworkBundle {
  val outer: PeripherySlaveAXI4
  val l2_axi4 = outer.l2_axi4.bundleIn
 }
-trait PeripherySlaveAXI4Module extends L2CrossbarModule {
+trait PeripherySlaveAXI4Module extends TopNetworkModule {
  val outer: PeripherySlaveAXI4
  val io: PeripherySlaveAXI4Bundle
  // nothing to do
@ -231,7 +231,7 @@ trait PeripheryMasterTLMMIOModule {
 /////
 // NOTE: this port is NOT allowed to issue Acquires
-trait PeripherySlaveTL extends L2Crossbar {
+trait PeripherySlaveTL extends TopNetwork {
  private val config = p(ExtIn)
  val l2_tl = TLBlindInputNode(Seq(TLClientPortParameters(
    clients = Seq(TLClientParameters(
@ -243,12 +243,12 @@ trait PeripherySlaveTL extends L2Crossbar {
    l2_tl))
 }
-trait PeripherySlaveTLBundle extends L2CrossbarBundle {
+trait PeripherySlaveTLBundle extends TopNetworkBundle {
  val outer: PeripherySlaveTL
  val l2_tl = outer.l2_tl.bundleIn
 }
-trait PeripherySlaveTLModule extends L2CrossbarModule {
+trait PeripherySlaveTLModule extends TopNetworkModule {
  val outer: PeripherySlaveTL
  val io: PeripherySlaveTLBundle
  // nothing to do
--- a/src/main/scala/rocketchip/RocketPlexMaster.scala
+++ b/src/main/scala/rocketchip/RocketPlexMaster.scala
@ -10,23 +10,23 @@ import uncore.devices._
 import util._
 import coreplex._
-trait RocketPlexMaster extends L2Crossbar {
+trait RocketPlexMaster extends TopNetwork {
  val module: RocketPlexMasterModule
  val mem: Seq[TLInwardNode]
  val coreplex = LazyModule(new DefaultCoreplex)
-  coreplex.l2in := l2.node
+  coreplex.l2in :=* l2.node
  socBus.node := coreplex.mmio
  coreplex.mmioInt := intBus.intnode
  mem.foreach { _ := coreplex.mem }
 }
-trait RocketPlexMasterBundle extends L2CrossbarBundle {
+trait RocketPlexMasterBundle extends TopNetworkBundle {
  val outer: RocketPlexMaster
 }
-trait RocketPlexMasterModule extends L2CrossbarModule {
+trait RocketPlexMasterModule extends TopNetworkModule {
  val outer: RocketPlexMaster
  val io: RocketPlexMasterBundle
  val clock: Clock
--- a/src/main/scala/uncore/ahb/Nodes.scala
+++ b/src/main/scala/uncore/ahb/Nodes.scala
@ -19,10 +19,6 @@ object AHBImp extends NodeImp[AHBMasterPortParameters, AHBSlavePortParameters, A
  override def labelI(ei: AHBEdgeParameters) = (ei.slave.beatBytes * 8).toString
  override def labelO(eo: AHBEdgeParameters) = (eo.slave.beatBytes * 8).toString
  def connect(bo: => AHBBundle, bi: => AHBBundle, ei: => AHBEdgeParameters)(implicit p: Parameters, sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) = {
    (None, () => { bi <> bo })
  }
  override def mixO(pd: AHBMasterPortParameters, node: OutwardNode[AHBMasterPortParameters, AHBSlavePortParameters, AHBBundle]): AHBMasterPortParameters  =
   pd.copy(masters = pd.masters.map  { c => c.copy (nodePath = node +: c.nodePath) })
  override def mixI(pu: AHBSlavePortParameters, node: InwardNode[AHBMasterPortParameters, AHBSlavePortParameters, AHBBundle]): AHBSlavePortParameters =
@ -31,16 +27,14 @@ object AHBImp extends NodeImp[AHBMasterPortParameters, AHBSlavePortParameters, A
 // Nodes implemented inside modules
 case class AHBIdentityNode() extends IdentityNode(AHBImp)
-case class AHBMasterNode(portParams: AHBMasterPortParameters, numPorts: Range.Inclusive = 1 to 1)
+case class AHBMasterNode(portParams: Seq[AHBMasterPortParameters]) extends SourceNode(AHBImp)(portParams)
-  extends SourceNode(AHBImp)(portParams, numPorts)
+case class AHBSlaveNode(portParams: Seq[AHBSlavePortParameters]) extends SinkNode(AHBImp)(portParams)
-case class AHBSlaveNode(portParams: AHBSlavePortParameters, numPorts: Range.Inclusive = 1 to 1)
+case class AHBNexusNode(
-  extends SinkNode(AHBImp)(portParams, numPorts)
+  masterFn:       Seq[AHBMasterPortParameters] => AHBMasterPortParameters,
-case class AHBAdapterNode(
+  slaveFn:        Seq[AHBSlavePortParameters]  => AHBSlavePortParameters,
-  masterFn:       Seq[AHBMasterPortParameters]  => AHBMasterPortParameters,
+  numMasterPorts: Range.Inclusive = 1 to 999,
-  slaveFn:        Seq[AHBSlavePortParameters] => AHBSlavePortParameters,
+  numSlavePorts:  Range.Inclusive = 1 to 999)
-  numMasterPorts: Range.Inclusive = 1 to 1,
+  extends NexusNode(AHBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
  numSlavePorts:  Range.Inclusive = 1 to 1)
  extends InteriorNode(AHBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
 // Nodes passed from an inner module
 case class AHBOutputNode() extends OutputNode(AHBImp)
--- a/src/main/scala/uncore/ahb/RegisterRouter.scala
+++ b/src/main/scala/uncore/ahb/RegisterRouter.scala
@ -9,13 +9,13 @@ import regmapper._
 import scala.math.{min,max}
 class AHBRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
-  extends AHBSlaveNode(AHBSlavePortParameters(
+  extends AHBSlaveNode(Seq(AHBSlavePortParameters(
    Seq(AHBSlaveParameters(
      address       = Seq(address),
      executable    = executable,
      supportsWrite = TransferSizes(1, min(address.alignment.toInt, beatBytes * AHBParameters.maxTransfer)),
      supportsRead  = TransferSizes(1, min(address.alignment.toInt, beatBytes * AHBParameters.maxTransfer)))),
-    beatBytes  = beatBytes))
+    beatBytes  = beatBytes)))
 {
  require (address.contiguous)
--- a/src/main/scala/uncore/ahb/SRAM.scala
+++ b/src/main/scala/uncore/ahb/SRAM.scala
@ -8,14 +8,14 @@ import diplomacy._
 class AHBRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
 {
-  val node = AHBSlaveNode(AHBSlavePortParameters(
+  val node = AHBSlaveNode(Seq(AHBSlavePortParameters(
    Seq(AHBSlaveParameters(
      address       = List(address),
      regionType    = RegionType.UNCACHED,
      executable    = executable,
      supportsRead  = TransferSizes(1, beatBytes * AHBParameters.maxTransfer),
      supportsWrite = TransferSizes(1, beatBytes * AHBParameters.maxTransfer))),
-    beatBytes  = beatBytes))
+    beatBytes  = beatBytes)))
  // We require the address range to include an entire beat (for the write mask)
  require ((address.mask & (beatBytes-1)) == beatBytes-1)
--- a/src/main/scala/uncore/ahb/Xbar.scala
+++ b/src/main/scala/uncore/ahb/Xbar.scala
@ -9,7 +9,7 @@ import regmapper._
 import scala.math.{min,max}
 class AHBFanout()(implicit p: Parameters) extends LazyModule {
-  val node = AHBAdapterNode(
+  val node = AHBNexusNode(
    numSlavePorts  = 1 to 1,
    numMasterPorts = 1 to 32,
    masterFn = { case Seq(m) => m },
--- a/src/main/scala/uncore/apb/Nodes.scala
+++ b/src/main/scala/uncore/apb/Nodes.scala
@ -19,10 +19,6 @@ object APBImp extends NodeImp[APBMasterPortParameters, APBSlavePortParameters, A
  override def labelI(ei: APBEdgeParameters) = (ei.slave.beatBytes * 8).toString
  override def labelO(eo: APBEdgeParameters) = (eo.slave.beatBytes * 8).toString
  def connect(bo: => APBBundle, bi: => APBBundle, ei: => APBEdgeParameters)(implicit p: Parameters, sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) = {
    (None, () => { bi <> bo })
  }
  override def mixO(pd: APBMasterPortParameters, node: OutwardNode[APBMasterPortParameters, APBSlavePortParameters, APBBundle]): APBMasterPortParameters  =
   pd.copy(masters = pd.masters.map  { c => c.copy (nodePath = node +: c.nodePath) })
  override def mixI(pu: APBSlavePortParameters, node: InwardNode[APBMasterPortParameters, APBSlavePortParameters, APBBundle]): APBSlavePortParameters =
@ -31,16 +27,14 @@ object APBImp extends NodeImp[APBMasterPortParameters, APBSlavePortParameters, A
 // Nodes implemented inside modules
 case class APBIdentityNode() extends IdentityNode(APBImp)
-case class APBMasterNode(portParams: APBMasterPortParameters, numPorts: Range.Inclusive = 1 to 1)
+case class APBMasterNode(portParams: Seq[APBMasterPortParameters]) extends SourceNode(APBImp)(portParams)
-  extends SourceNode(APBImp)(portParams, numPorts)
+case class APBSlaveNode(portParams: Seq[APBSlavePortParameters]) extends SinkNode(APBImp)(portParams)
-case class APBSlaveNode(portParams: APBSlavePortParameters, numPorts: Range.Inclusive = 1 to 1)
+case class APBNexusNode(
-  extends SinkNode(APBImp)(portParams, numPorts)
+  masterFn:       Seq[APBMasterPortParameters] => APBMasterPortParameters,
-case class APBAdapterNode(
+  slaveFn:        Seq[APBSlavePortParameters]  => APBSlavePortParameters,
  masterFn:       Seq[APBMasterPortParameters]  => APBMasterPortParameters,
  slaveFn:        Seq[APBSlavePortParameters] => APBSlavePortParameters,
  numMasterPorts: Range.Inclusive = 1 to 1,
  numSlavePorts:  Range.Inclusive = 1 to 1)
-  extends InteriorNode(APBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
+  extends NexusNode(APBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
 // Nodes passed from an inner module
 case class APBOutputNode() extends OutputNode(APBImp)
--- a/src/main/scala/uncore/apb/RegisterRouter.scala
+++ b/src/main/scala/uncore/apb/RegisterRouter.scala
@ -9,13 +9,13 @@ import regmapper._
 import scala.math.{min,max}
 class APBRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
-  extends APBSlaveNode(APBSlavePortParameters(
+  extends APBSlaveNode(Seq(APBSlavePortParameters(
    Seq(APBSlaveParameters(
      address       = Seq(address),
      executable    = executable,
      supportsWrite = true,
      supportsRead  = true)),
-    beatBytes  = beatBytes))
+    beatBytes  = beatBytes)))
 {
  require (address.contiguous)
--- a/src/main/scala/uncore/apb/SRAM.scala
+++ b/src/main/scala/uncore/apb/SRAM.scala
@ -8,14 +8,14 @@ import diplomacy._
 class APBRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
 {
-  val node = APBSlaveNode(APBSlavePortParameters(
+  val node = APBSlaveNode(Seq(APBSlavePortParameters(
    Seq(APBSlaveParameters(
      address       = List(address),
      regionType    = RegionType.UNCACHED,
      executable    = executable,
      supportsRead  = true,
      supportsWrite = true)),
-    beatBytes  = beatBytes))
+    beatBytes  = beatBytes)))
  // We require the address range to include an entire beat (for the write mask)
  require ((address.mask & (beatBytes-1)) == beatBytes-1)
--- a/src/main/scala/uncore/apb/Xbar.scala
+++ b/src/main/scala/uncore/apb/Xbar.scala
@ -9,7 +9,7 @@ import regmapper._
 import scala.math.{min,max}
 class APBFanout()(implicit p: Parameters) extends LazyModule {
-  val node = APBAdapterNode(
+  val node = APBNexusNode(
    numSlavePorts  = 1 to 1,
    numMasterPorts = 1 to 32,
    masterFn = { case Seq(m) => m },
--- a/src/main/scala/uncore/axi4/Buffer.scala
+++ b/src/main/scala/uncore/axi4/Buffer.scala
@ -18,8 +18,8 @@ class AXI4Buffer(aw: Int = 2, w: Int = 2, b: Int = 2, ar: Int = 2, r: Int = 2, p
  require (r  >= 0)
  val node = AXI4AdapterNode(
-    masterFn = { case Seq(p) => p },
+    masterFn = { p => p },
-    slaveFn  = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,min(aw,ar)) + min(1,min(r,b))) })
+    slaveFn  = { p => p.copy(minLatency = p.minLatency + min(1,min(aw,ar)) + min(1,min(r,b))) })
  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
--- a/src/main/scala/uncore/axi4/Fragmenter.scala
+++ b/src/main/scala/uncore/axi4/Fragmenter.scala
@ -23,8 +23,8 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio
  def mapMaster(m: AXI4MasterParameters) = m.copy(aligned = true)
  val node = AXI4AdapterNode(
-    masterFn = { case Seq(mp) => mp.copy(masters = mp.masters.map(m => mapMaster(m))) },
+    masterFn = { mp => mp.copy(masters = mp.masters.map(m => mapMaster(m))) },
-    slaveFn  = { case Seq(sp) => sp.copy(slaves  = sp.slaves .map(s => mapSlave(s, sp.beatBytes))) })
+    slaveFn  = { sp => sp.copy(slaves  = sp.slaves .map(s => mapSlave(s, sp.beatBytes))) })
  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
@ -32,256 +32,253 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio
      val out = node.bundleOut
    }
-    val edgeOut   = node.edgesOut(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val edgeIn    = node.edgesIn(0)
+      val slave     = edgeOut.slave
-    val slave     = edgeOut.slave
+      val slaves    = slave.slaves
-    val slaves    = slave.slaves
+      val beatBytes = slave.beatBytes
-    val beatBytes = slave.beatBytes
+      val lgBytes   = log2Ceil(beatBytes)
-    val lgBytes   = log2Ceil(beatBytes)
+      val master    = edgeIn.master
-    val master    = edgeIn.master
+      val masters   = master.masters
    val masters   = master.masters
-    // If the user claimed this was a lite interface, then there must be only one Id
+      // If the user claimed this was a lite interface, then there must be only one Id
-    require (!lite || master.endId == 1)
+      require (!lite || master.endId == 1)
-    // We don't support fragmenting to sub-beat accesses
+      // We don't support fragmenting to sub-beat accesses
-    slaves.foreach { s =>
+      slaves.foreach { s =>
-      require (!s.supportsRead  || s.supportsRead.contains(beatBytes))
+        require (!s.supportsRead  || s.supportsRead.contains(beatBytes))
-      require (!s.supportsWrite || s.supportsWrite.contains(beatBytes))
+        require (!s.supportsWrite || s.supportsWrite.contains(beatBytes))
-    }
+      }
-    /* We need to decompose a request into 
+      /* We need to decompose a request into 
-     *   FIXED => each beat is a new request
+       *   FIXED => each beat is a new request
-     *   WRAP/INCR => take xfr up to next power of two, capped by max size of target
+       *   WRAP/INCR => take xfr up to next power of two, capped by max size of target
-     *
+       *
-     * On AR and AW, we fragment one request into many
+       * On AR and AW, we fragment one request into many
-     * On W we set 'last' on beats which are fragment boundaries
+       * On W we set 'last' on beats which are fragment boundaries
-     * On R we clear 'last' on the fragments being reassembled
+       * On R we clear 'last' on the fragments being reassembled
-     * On B we clear 'valid' on the responses for the injected fragments
+       * On B we clear 'valid' on the responses for the injected fragments
-     *
+       *
-     * AR=>R and AW+W=>B are completely independent state machines.
+       * AR=>R and AW+W=>B are completely independent state machines.
     */
    /* Returns the number of beats to execute and the new address */
    def fragment(a: IrrevocableIO[AXI4BundleA], supportedSizes1: Seq[Int]): (IrrevocableIO[AXI4BundleA], Bool, UInt) = {
      val out = Wire(a)
      val busy   = RegInit(Bool(false))
      val r_addr = Reg(UInt(width = a.bits.params.addrBits))
      val r_len  = Reg(UInt(width = AXI4Parameters.lenBits))
      val len  = Mux(busy, r_len,  a.bits.len)
      val addr = Mux(busy, r_addr, a.bits.addr)
      val lo = if (lgBytes == 0) UInt(0) else addr(lgBytes-1, 0)
      val hi = addr >> lgBytes
      val alignment = hi(AXI4Parameters.lenBits-1,0)
      val allSame = supportedSizes1.filter(_ >= 0).distinct.size <= 1
      val dynamic1 = Mux1H(slave.findFast(addr), supportedSizes1.map(s => UInt(max(0, s))))
      val fixed1 = UInt(supportedSizes1.filter(_ >= 0).headOption.getOrElse(0))
      /* We need to compute the largest transfer allowed by the AXI len.
       * len+1 is the number of beats to execute.
       * We want the MSB(len+1)-1; one less than the largest power of two we could execute.
       * There are two cases; either len is 2^n-1 in which case we leave it unchanged, ELSE
       *   fill the bits from highest to lowest, and shift right by one bit.
       */
      val fillLow  = rightOR(len) >> 1   // set   all bits in positions <  a set     bit
      val wipeHigh = ~leftOR(~len)       // clear all bits in position  >= a cleared bit
      val remain1  = fillLow | wipeHigh  // MSB(a.len+1)-1
      val align1   = ~leftOR(alignment)  // transfer size limited by address alignment
      val support1 = if (allSame) fixed1 else dynamic1 // maximum supported size-1 based on target address
      val maxSupported1 = remain1 & align1 & support1 // Take the minimum of all the limits
-      // Things that cause us to degenerate to a single beat
+      /* Returns the number of beats to execute and the new address */
-      val fixed = a.bits.burst === AXI4Parameters.BURST_FIXED
+      def fragment(a: IrrevocableIO[AXI4BundleA], supportedSizes1: Seq[Int]): (IrrevocableIO[AXI4BundleA], Bool, UInt) = {
-      val narrow = a.bits.size =/= UInt(lgBytes)
+        val out = Wire(a)
      val bad = fixed || narrow
-      // The number of beats-1 to execute
+        val busy   = RegInit(Bool(false))
-      val beats1 = Mux(bad, UInt(0), maxSupported1)
+        val r_addr = Reg(UInt(width = a.bits.params.addrBits))
-      val beats = OH1ToOH(beats1) // beats1 + 1
+        val r_len  = Reg(UInt(width = AXI4Parameters.lenBits))
-      val inc_addr = addr + (beats << a.bits.size) // address after adding transfer
+        val len  = Mux(busy, r_len,  a.bits.len)
-      val wrapMask = a.bits.bytes1() // only these bits may change, if wrapping
+        val addr = Mux(busy, r_addr, a.bits.addr)
-      val mux_addr = Wire(init = inc_addr)
+
-      when (a.bits.burst === AXI4Parameters.BURST_WRAP) {
+        val lo = if (lgBytes == 0) UInt(0) else addr(lgBytes-1, 0)
-        mux_addr := (inc_addr & wrapMask) | ~(~a.bits.addr | wrapMask)
+        val hi = addr >> lgBytes
-      }
+        val alignment = hi(AXI4Parameters.lenBits-1,0)
-      when (a.bits.burst === AXI4Parameters.BURST_FIXED) {
+
-        mux_addr := a.bits.addr
+        val allSame = supportedSizes1.filter(_ >= 0).distinct.size <= 1
        val dynamic1 = Mux1H(slave.findFast(addr), supportedSizes1.map(s => UInt(max(0, s))))
        val fixed1 = UInt(supportedSizes1.filter(_ >= 0).headOption.getOrElse(0))
        /* We need to compute the largest transfer allowed by the AXI len.
         * len+1 is the number of beats to execute.
         * We want the MSB(len+1)-1; one less than the largest power of two we could execute.
         * There are two cases; either len is 2^n-1 in which case we leave it unchanged, ELSE
         *   fill the bits from highest to lowest, and shift right by one bit.
         */
        val fillLow  = rightOR(len) >> 1   // set   all bits in positions <  a set     bit
        val wipeHigh = ~leftOR(~len)       // clear all bits in position  >= a cleared bit
        val remain1  = fillLow | wipeHigh  // MSB(a.len+1)-1
        val align1   = ~leftOR(alignment)  // transfer size limited by address alignment
        val support1 = if (allSame) fixed1 else dynamic1 // maximum supported size-1 based on target address
        val maxSupported1 = remain1 & align1 & support1 // Take the minimum of all the limits
        // Things that cause us to degenerate to a single beat
        val fixed = a.bits.burst === AXI4Parameters.BURST_FIXED
        val narrow = a.bits.size =/= UInt(lgBytes)
        val bad = fixed || narrow
        // The number of beats-1 to execute
        val beats1 = Mux(bad, UInt(0), maxSupported1)
        val beats = OH1ToOH(beats1) // beats1 + 1
        val inc_addr = addr + (beats << a.bits.size) // address after adding transfer
        val wrapMask = a.bits.bytes1() // only these bits may change, if wrapping
        val mux_addr = Wire(init = inc_addr)
        when (a.bits.burst === AXI4Parameters.BURST_WRAP) {
          mux_addr := (inc_addr & wrapMask) | ~(~a.bits.addr | wrapMask)
        }
        when (a.bits.burst === AXI4Parameters.BURST_FIXED) {
          mux_addr := a.bits.addr
        }
        val last = beats1 === len
        a.ready := out.ready && last
        out.valid := a.valid
        out.bits := a.bits
        out.bits.len := beats1
        // We forcibly align every access. If the first beat was misaligned, the strb bits
        // for the lower addresses must not have been set. Therefore, rounding the address
        // down is harmless. We can do this after the address update algorithm, because the
        // incremented values will be rounded down the same way. Furthermore, a subword
        // offset cannot cause a premature wrap-around.
        out.bits.addr := ~(~addr | UIntToOH1(a.bits.size, lgBytes))
        when (out.fire()) {
          busy := !last
          r_addr := mux_addr
          r_len  := len - beats
        }
        (out, last, beats)
      }
-      val last = beats1 === len
+      // The size to which we will fragment the access
-      a.ready := out.ready && last
+      val readSizes1  = slaves.map(s => s.supportsRead .max/beatBytes-1)
-      out.valid := a.valid
+      val writeSizes1 = slaves.map(s => s.supportsWrite.max/beatBytes-1)
-      out.bits := a.bits
+      // Indirection variables for inputs and outputs; makes transformation application easier
-      out.bits.len := beats1
+      val (in_ar, ar_last, _)       = fragment(Queue.irrevocable(in.ar, 1, flow=true), readSizes1)
      val (in_aw, aw_last, w_beats) = fragment(Queue.irrevocable(in.aw, 1, flow=true), writeSizes1)
      val in_w = in.w
      val in_r = in.r
      val in_b = in.b
      val out_ar = Wire(out.ar)
      val out_aw = out.aw
      val out_w = out.w
      val out_r = Wire(out.r)
      val out_b = Wire(out.b)
-      // We forcibly align every access. If the first beat was misaligned, the strb bits
+      val depth = if (combinational) 1 else 2
-      // for the lower addresses must not have been set. Therefore, rounding the address
+      // In case a slave ties arready := rready, we need a queue to break the combinational loop
-      // down is harmless. We can do this after the address update algorithm, because the
+      // between the two branches (in_ar => {out_ar => out_r, sideband} => in_r).
-      // incremented values will be rounded down the same way. Furthermore, a subword
+      if (in.ar.bits.getWidth < in.r.bits.getWidth) {
-      // offset cannot cause a premature wrap-around.
+        out.ar <> Queue(out_ar, depth, flow=combinational)
-      out.bits.addr := ~(~addr | UIntToOH1(a.bits.size, lgBytes))
+        out_r <> out.r
-
+      } else {
-      when (out.fire()) {
+        out.ar <> out_ar
-        busy := !last
+        out_r <> Queue(out.r, depth, flow=combinational)
        r_addr := mux_addr
        r_len  := len - beats
      }
      // In case a slave ties awready := bready or wready := bready, we need this queue
      out_b <> Queue(out.b, depth, flow=combinational)
-      (out, last, beats)
+      // Sideband to track which transfers were the last fragment
-    }
+      def sideband() = if (lite) {
        Module(new Queue(Bool(), maxInFlight, flow=combinational)).io
      } else {
        Module(new AXI4FragmenterSideband(maxInFlight, flow=combinational)).io
      }
      val sideband_ar_r = sideband()
      val sideband_aw_b = sideband()
-    val in = io.in(0)
+      // AR flow control
-    val out = io.out(0)
+      out_ar.valid := in_ar.valid && sideband_ar_r.enq.ready
      in_ar.ready := sideband_ar_r.enq.ready && out_ar.ready
      sideband_ar_r.enq.valid := in_ar.valid && out_ar.ready
      out_ar.bits := in_ar.bits
      sideband_ar_r.enq.bits := ar_last
-    // The size to which we will fragment the access
+      // When does W channel start counting a new transfer
-    val readSizes1  = slaves.map(s => s.supportsRead .max/beatBytes-1)
+      val wbeats_latched = RegInit(Bool(false))
-    val writeSizes1 = slaves.map(s => s.supportsWrite.max/beatBytes-1)
+      val wbeats_ready = Wire(Bool())
      val wbeats_valid = Wire(Bool())
      when (wbeats_valid && wbeats_ready) { wbeats_latched := Bool(true) }
      when (out_aw.fire()) { wbeats_latched := Bool(false) }
-    // Indirection variables for inputs and outputs; makes transformation application easier
+      // AW flow control
-    val (in_ar, ar_last, _)       = fragment(Queue.irrevocable(in.ar, 1, flow=true), readSizes1)
+      out_aw.valid := in_aw.valid && sideband_aw_b.enq.ready && (wbeats_ready || wbeats_latched)
-    val (in_aw, aw_last, w_beats) = fragment(Queue.irrevocable(in.aw, 1, flow=true), writeSizes1)
+      in_aw.ready := sideband_aw_b.enq.ready && out_aw.ready && (wbeats_ready || wbeats_latched)
-    val in_w = in.w
+      sideband_aw_b.enq.valid := in_aw.valid && out_aw.ready && (wbeats_ready || wbeats_latched)
-    val in_r = in.r
+      wbeats_valid := in_aw.valid && !wbeats_latched
-    val in_b = in.b
+      out_aw.bits := in_aw.bits
-    val out_ar = Wire(out.ar)
+      sideband_aw_b.enq.bits := aw_last
    val out_aw = out.aw
    val out_w = out.w
    val out_r = Wire(out.r)
    val out_b = Wire(out.b)
-    val depth = if (combinational) 1 else 2
+      // We need to inject 'last' into the W channel fragments, count!
-    // In case a slave ties arready := rready, we need a queue to break the combinational loop
+      val w_counter = RegInit(UInt(0, width = AXI4Parameters.lenBits+1))
-    // between the two branches (in_ar => {out_ar => out_r, sideband} => in_r).
+      val w_idle = w_counter === UInt(0)
-    if (in.ar.bits.getWidth < in.r.bits.getWidth) {
+      val w_todo = Mux(w_idle, Mux(wbeats_valid, w_beats, UInt(0)), w_counter)
-      out.ar <> Queue(out_ar, depth, flow=combinational)
+      val w_last = w_todo === UInt(1)
-      out_r <> out.r
+      w_counter := w_todo - out_w.fire()
-    } else {
+      assert (!out_w.fire() || w_todo =/= UInt(0)) // underflow impossible
      out.ar <> out_ar
      out_r <> Queue(out.r, depth, flow=combinational)
    }
    // In case a slave ties awready := bready or wready := bready, we need this queue
    out_b <> Queue(out.b, depth, flow=combinational)
-    // Sideband to track which transfers were the last fragment
+      // W flow control
-    def sideband() = if (lite) {
+      wbeats_ready := w_idle
-      Module(new Queue(Bool(), maxInFlight, flow=combinational)).io
+      out_w.valid := in_w.valid && (!wbeats_ready || wbeats_valid)
-    } else {
+      in_w.ready := out_w.ready && (!wbeats_ready || wbeats_valid)
-      Module(new AXI4FragmenterSideband(maxInFlight, flow=combinational)).io
+      out_w.bits := in_w.bits
-    }
+      out_w.bits.last := w_last
-    val sideband_ar_r = sideband()
+      // We should also recreate the last last
-    val sideband_aw_b = sideband()
+      assert (!out_w.valid || !in_w.bits.last || w_last)
-    // AR flow control
+      // R flow control
-    out_ar.valid := in_ar.valid && sideband_ar_r.enq.ready
+      val r_last = out_r.bits.last
-    in_ar.ready := sideband_ar_r.enq.ready && out_ar.ready
+      in_r.valid := out_r.valid && (!r_last || sideband_ar_r.deq.valid)
-    sideband_ar_r.enq.valid := in_ar.valid && out_ar.ready
+      out_r.ready := in_r.ready && (!r_last || sideband_ar_r.deq.valid)
-    out_ar.bits := in_ar.bits
+      sideband_ar_r.deq.ready := r_last && out_r.valid && in_r.ready
-    sideband_ar_r.enq.bits := ar_last
+      in_r.bits := out_r.bits
      in_r.bits.last := r_last && sideband_ar_r.deq.bits
-    // When does W channel start counting a new transfer
+      // B flow control
-    val wbeats_latched = RegInit(Bool(false))
+      val b_last = sideband_aw_b.deq.bits
-    val wbeats_ready = Wire(Bool())
+      in_b.valid := out_b.valid && sideband_aw_b.deq.valid && b_last
-    val wbeats_valid = Wire(Bool())
+      out_b.ready := sideband_aw_b.deq.valid && (!b_last || in_b.ready)
-    when (wbeats_valid && wbeats_ready) { wbeats_latched := Bool(true) }
+      sideband_aw_b.deq.ready := out_b.valid && (!b_last || in_b.ready)
-    when (out_aw.fire()) { wbeats_latched := Bool(false) }
+      in_b.bits := out_b.bits
-    // AW flow control
+      // Merge errors from dropped B responses
-    out_aw.valid := in_aw.valid && sideband_aw_b.enq.ready && (wbeats_ready || wbeats_latched)
+      val r_resp = RegInit(UInt(0, width = AXI4Parameters.respBits))
-    in_aw.ready := sideband_aw_b.enq.ready && out_aw.ready && (wbeats_ready || wbeats_latched)
+      val resp = out_b.bits.resp | r_resp
-    sideband_aw_b.enq.valid := in_aw.valid && out_aw.ready && (wbeats_ready || wbeats_latched)
+      when (out_b.fire()) { r_resp := Mux(b_last, UInt(0), resp) }
-    wbeats_valid := in_aw.valid && !wbeats_latched
+      in_b.bits.resp := resp
    out_aw.bits := in_aw.bits
    sideband_aw_b.enq.bits := aw_last
    // We need to inject 'last' into the W channel fragments, count!
    val w_counter = RegInit(UInt(0, width = AXI4Parameters.lenBits+1))
    val w_idle = w_counter === UInt(0)
    val w_todo = Mux(w_idle, Mux(wbeats_valid, w_beats, UInt(0)), w_counter)
    val w_last = w_todo === UInt(1)
    w_counter := w_todo - out_w.fire()
    assert (!out_w.fire() || w_todo =/= UInt(0)) // underflow impossible
    // W flow control
    wbeats_ready := w_idle
    out_w.valid := in_w.valid && (!wbeats_ready || wbeats_valid)
    in_w.ready := out_w.ready && (!wbeats_ready || wbeats_valid)
    out_w.bits := in_w.bits
    out_w.bits.last := w_last
    // We should also recreate the last last
    assert (!out_w.valid || !in_w.bits.last || w_last)
    // R flow control
    val r_last = out_r.bits.last
    in_r.valid := out_r.valid && (!r_last || sideband_ar_r.deq.valid)
    out_r.ready := in_r.ready && (!r_last || sideband_ar_r.deq.valid)
    sideband_ar_r.deq.ready := r_last && out_r.valid && in_r.ready
    in_r.bits := out_r.bits
    in_r.bits.last := r_last && sideband_ar_r.deq.bits
    // B flow control
    val b_last = sideband_aw_b.deq.bits
    in_b.valid := out_b.valid && sideband_aw_b.deq.valid && b_last
    out_b.ready := sideband_aw_b.deq.valid && (!b_last || in_b.ready)
    sideband_aw_b.deq.ready := out_b.valid && (!b_last || in_b.ready)
    in_b.bits := out_b.bits
    // Merge errors from dropped B responses
    val r_resp = RegInit(UInt(0, width = AXI4Parameters.respBits))
    val resp = out_b.bits.resp | r_resp
    when (out_b.fire()) { r_resp := Mux(b_last, UInt(0), resp) }
    in_b.bits.resp := resp
  }
 }
 /* We want to put barriers between the fragments of a fragmented transfer and all other transfers.
 * This lets us use very little state to reassemble the fragments (else we need one FIFO per ID).
 * Furthermore, because all the fragments share the same AXI ID, they come back contiguously.
 * This guarantees that no other R responses might get mixed between fragments, ensuring that the
 * interleavedId for the slaves remains unaffected by the fragmentation transformation.
 * Of course, if you need to fragment, this means there is a potentially hefty serialization cost.
 * However, this design allows full concurrency in the common no-fragmentation-needed scenario.
 */
 class AXI4FragmenterSideband(maxInFlight: Int, flow: Boolean = false) extends Module
 {
  val io = new QueueIO(Bool(), maxInFlight)
  io.count := UInt(0)
  val PASS = UInt(2, width = 2) // allow 'last=1' bits to enque, on 'last=0' if count>0 block else accept+FIND
  val FIND = UInt(0, width = 2) // allow 'last=0' bits to enque, accept 'last=1' and switch to WAIT
  val WAIT = UInt(1, width = 2) // block all access till count=0
  val state = RegInit(PASS)
  val count = RegInit(UInt(0, width = log2Up(maxInFlight)))
  val full  = count === UInt(maxInFlight-1)
  val empty = count === UInt(0)
  val last  = count === UInt(1)
  io.deq.bits := state(1) || (last && state(0)) // PASS || (last && WAIT)
  io.deq.valid := !empty
  io.enq.ready := !full && (empty || (state === FIND) || (state === PASS && io.enq.bits))
  // WAIT => count > 0
  assert (state =/= WAIT || count =/= UInt(0))
  if (flow) {
    when (io.enq.valid) {
      io.deq.valid := Bool(true)
      when (empty) { io.deq.bits := io.enq.bits }
    }
  }
-  count := count + io.enq.fire() - io.deq.fire()
+  /* We want to put barriers between the fragments of a fragmented transfer and all other transfers.
-  switch (state) {
+   * This lets us use very little state to reassemble the fragments (else we need one FIFO per ID).
-    is(PASS) { when (io.enq.valid && !io.enq.bits && empty) { state := FIND } }
+   * Furthermore, because all the fragments share the same AXI ID, they come back contiguously.
-    is(FIND) { when (io.enq.valid &&  io.enq.bits && !full) { state := Mux(empty, PASS, WAIT) } }
+   * This guarantees that no other R responses might get mixed between fragments, ensuring that the
-    is(WAIT) { when (last && io.deq.ready)                  { state := PASS } }
+   * interleavedId for the slaves remains unaffected by the fragmentation transformation.
   * Of course, if you need to fragment, this means there is a potentially hefty serialization cost.
   * However, this design allows full concurrency in the common no-fragmentation-needed scenario.
   */
  class AXI4FragmenterSideband(maxInFlight: Int, flow: Boolean = false) extends Module
  {
    val io = new QueueIO(Bool(), maxInFlight)
    io.count := UInt(0)
    val PASS = UInt(2, width = 2) // allow 'last=1' bits to enque, on 'last=0' if count>0 block else accept+FIND
    val FIND = UInt(0, width = 2) // allow 'last=0' bits to enque, accept 'last=1' and switch to WAIT
    val WAIT = UInt(1, width = 2) // block all access till count=0
    val state = RegInit(PASS)
    val count = RegInit(UInt(0, width = log2Up(maxInFlight)))
    val full  = count === UInt(maxInFlight-1)
    val empty = count === UInt(0)
    val last  = count === UInt(1)
    io.deq.bits := state(1) || (last && state(0)) // PASS || (last && WAIT)
    io.deq.valid := !empty
    io.enq.ready := !full && (empty || (state === FIND) || (state === PASS && io.enq.bits))
    // WAIT => count > 0
    assert (state =/= WAIT || count =/= UInt(0))
    if (flow) {
      when (io.enq.valid) {
        io.deq.valid := Bool(true)
        when (empty) { io.deq.bits := io.enq.bits }
      }
    }
    count := count + io.enq.fire() - io.deq.fire()
    switch (state) {
      is(PASS) { when (io.enq.valid && !io.enq.bits && empty) { state := FIND } }
      is(FIND) { when (io.enq.valid &&  io.enq.bits && !full) { state := Mux(empty, PASS, WAIT) } }
      is(WAIT) { when (last && io.deq.ready)                  { state := PASS } }
    }
  }
 }
--- a/src/main/scala/uncore/axi4/Nodes.scala
+++ b/src/main/scala/uncore/axi4/Nodes.scala
@ -19,10 +19,6 @@ object AXI4Imp extends NodeImp[AXI4MasterPortParameters, AXI4SlavePortParameters
  override def labelI(ei: AXI4EdgeParameters) = (ei.slave.beatBytes * 8).toString
  override def labelO(eo: AXI4EdgeParameters) = (eo.slave.beatBytes * 8).toString
  def connect(bo: => AXI4Bundle, bi: => AXI4Bundle, ei: => AXI4EdgeParameters)(implicit p: Parameters, sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) = {
    (None, () => { bi <> bo })
  }
  override def mixO(pd: AXI4MasterPortParameters, node: OutwardNode[AXI4MasterPortParameters, AXI4SlavePortParameters, AXI4Bundle]): AXI4MasterPortParameters  =
   pd.copy(masters = pd.masters.map  { c => c.copy (nodePath = node +: c.nodePath) })
  override def mixI(pu: AXI4SlavePortParameters, node: InwardNode[AXI4MasterPortParameters, AXI4SlavePortParameters, AXI4Bundle]): AXI4SlavePortParameters =
@ -31,16 +27,13 @@ object AXI4Imp extends NodeImp[AXI4MasterPortParameters, AXI4SlavePortParameters
 // Nodes implemented inside modules
 case class AXI4IdentityNode() extends IdentityNode(AXI4Imp)
-case class AXI4MasterNode(portParams: AXI4MasterPortParameters, numPorts: Range.Inclusive = 1 to 1)
+case class AXI4MasterNode(portParams: Seq[AXI4MasterPortParameters]) extends SourceNode(AXI4Imp)(portParams)
-  extends SourceNode(AXI4Imp)(portParams, numPorts)
+case class AXI4SlaveNode(portParams: Seq[AXI4SlavePortParameters]) extends SinkNode(AXI4Imp)(portParams)
 case class AXI4SlaveNode(portParams: AXI4SlavePortParameters, numPorts: Range.Inclusive = 1 to 1)
  extends SinkNode(AXI4Imp)(portParams, numPorts)
 case class AXI4AdapterNode(
-  masterFn:       Seq[AXI4MasterPortParameters]  => AXI4MasterPortParameters,
+  masterFn:  AXI4MasterPortParameters => AXI4MasterPortParameters,
-  slaveFn:        Seq[AXI4SlavePortParameters] => AXI4SlavePortParameters,
+  slaveFn:   AXI4SlavePortParameters  => AXI4SlavePortParameters,
-  numMasterPorts: Range.Inclusive = 1 to 1,
+  numPorts:  Range.Inclusive = 0 to 999)
-  numSlavePorts:  Range.Inclusive = 1 to 1)
+  extends AdapterNode(AXI4Imp)(masterFn, slaveFn, numPorts)
  extends InteriorNode(AXI4Imp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
 // Nodes passed from an inner module
 case class AXI4OutputNode() extends OutputNode(AXI4Imp)
--- a/src/main/scala/uncore/axi4/RegisterRouter.scala
+++ b/src/main/scala/uncore/axi4/RegisterRouter.scala
@ -9,7 +9,7 @@ import regmapper._
 import scala.math.{min,max}
 class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
-  extends AXI4SlaveNode(AXI4SlavePortParameters(
+  extends AXI4SlaveNode(Seq(AXI4SlavePortParameters(
    Seq(AXI4SlaveParameters(
      address       = Seq(address),
      executable    = executable,
@ -17,7 +17,7 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int
      supportsRead  = TransferSizes(1, beatBytes),
      interleavedId = Some(0))),
    beatBytes  = beatBytes,
-    minLatency = min(concurrency, 1))) // the Queue adds at most one cycle
+    minLatency = min(concurrency, 1)))) // the Queue adds at most one cycle
 {
  require (address.contiguous)
--- a/src/main/scala/uncore/axi4/SRAM.scala
+++ b/src/main/scala/uncore/axi4/SRAM.scala
@ -8,7 +8,7 @@ import diplomacy._
 class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
 {
-  val node = AXI4SlaveNode(AXI4SlavePortParameters(
+  val node = AXI4SlaveNode(Seq(AXI4SlavePortParameters(
    Seq(AXI4SlaveParameters(
      address       = List(address),
      regionType    = RegionType.UNCACHED,
@ -17,7 +17,7 @@ class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int =
      supportsWrite = TransferSizes(1, beatBytes),
      interleavedId = Some(0))),
    beatBytes  = beatBytes,
-    minLatency = 0)) // B responds on same cycle
+    minLatency = 0))) // B responds on same cycle
  // We require the address range to include an entire beat (for the write mask)
  require ((address.mask & (beatBytes-1)) == beatBytes-1)
--- a/src/main/scala/uncore/axi4/ToTL.scala
+++ b/src/main/scala/uncore/axi4/ToTL.scala
@ -8,15 +8,15 @@ import config._
 import diplomacy._
 import uncore.tilelink2._
-case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)(
+case class AXI4ToTLNode() extends MixedAdapterNode(AXI4Imp, TLImp)(
-  dFn = { case (1, Seq(AXI4MasterPortParameters(masters))) =>
+  dFn = { case AXI4MasterPortParameters(masters) =>
-    Seq(TLClientPortParameters(clients = masters.map { m =>
+    TLClientPortParameters(clients = masters.map { m =>
      TLClientParameters(
        sourceId = IdRange(m.id.start << 1, m.id.end << 1), // R+W ids are distinct
        nodePath = m.nodePath)
-    }))
+    })
  },
-  uFn = { case (1, Seq(mp)) => Seq(AXI4SlavePortParameters(
+  uFn = { mp => AXI4SlavePortParameters(
    slaves = mp.managers.map { m =>
      AXI4SlaveParameters(
        address       = m.address,
@ -27,10 +27,8 @@ case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)(
        supportsRead  = m.supportsGet,
        interleavedId = Some(0))}, // TL2 never interleaves D beats
    beatBytes = mp.beatBytes,
-    minLatency = mp.minLatency))
+    minLatency = mp.minLatency)
-  },
+  })
  numPO = 1 to 1,
  numPI = 1 to 1)
 class AXI4ToTL()(implicit p: Parameters) extends LazyModule
 {
@ -42,131 +40,129 @@ class AXI4ToTL()(implicit p: Parameters) extends LazyModule
      val out = node.bundleOut
    }
-    val in = io.in(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val out = io.out(0)
+      val numIds = edgeIn.master.endId
-    val edgeIn = node.edgesIn(0)
+      val beatBytes = edgeOut.manager.beatBytes
-    val edgeOut = node.edgesOut(0)
+      val countBits = AXI4Parameters.lenBits + (1 << AXI4Parameters.sizeBits) - 1
    val numIds = edgeIn.master.endId
    val beatBytes = edgeOut.manager.beatBytes
    val countBits = AXI4Parameters.lenBits + (1 << AXI4Parameters.sizeBits) - 1
-    require (edgeIn.master.masters(0).aligned)
+      require (edgeIn.master.masters(0).aligned)
-    val r_out = Wire(out.a)
+      val r_out = Wire(out.a)
-    val r_inflight = RegInit(UInt(0, width = numIds))
+      val r_inflight = RegInit(UInt(0, width = numIds))
-    val r_block = r_inflight(in.ar.bits.id)
+      val r_block = r_inflight(in.ar.bits.id)
-    val r_size1 = in.ar.bits.bytes1()
+      val r_size1 = in.ar.bits.bytes1()
-    val r_size = OH1ToUInt(r_size1)
+      val r_size = OH1ToUInt(r_size1)
-    val r_addr = in.ar.bits.addr
+      val r_addr = in.ar.bits.addr
-    val r_ok = edgeOut.manager.supportsGetSafe(r_addr, r_size)
+      val r_ok = edgeOut.manager.supportsGetSafe(r_addr, r_size)
-    val r_err_in = Wire(Decoupled(new AXI4BundleRError(in.ar.bits.params)))
+      val r_err_in = Wire(Decoupled(new AXI4BundleRError(in.ar.bits.params)))
-    val r_err_out = Queue(r_err_in, 2)
+      val r_err_out = Queue(r_err_in, 2)
-    val r_count = RegInit(UInt(0, width = in.ar.bits.params.lenBits))
+      val r_count = RegInit(UInt(0, width = in.ar.bits.params.lenBits))
-    val r_last = r_count === in.ar.bits.len
+      val r_last = r_count === in.ar.bits.len
-    assert (!in.ar.valid || r_size1 === UIntToOH1(r_size, countBits)) // because aligned
+      assert (!in.ar.valid || r_size1 === UIntToOH1(r_size, countBits)) // because aligned
-    in.ar.ready := Mux(r_ok, r_out.ready, r_err_in.ready && r_last) && !r_block
+      in.ar.ready := Mux(r_ok, r_out.ready, r_err_in.ready && r_last) && !r_block
-    r_out.valid := in.ar.valid && !r_block && r_ok
+      r_out.valid := in.ar.valid && !r_block && r_ok
-    r_out.bits := edgeOut.Get(in.ar.bits.id << 1 | UInt(1), r_addr, r_size)._2
+      r_out.bits := edgeOut.Get(in.ar.bits.id << 1 | UInt(1), r_addr, r_size)._2
-    r_err_in.valid := in.ar.valid && !r_block && !r_ok
+      r_err_in.valid := in.ar.valid && !r_block && !r_ok
-    r_err_in.bits.last := r_last
+      r_err_in.bits.last := r_last
-    r_err_in.bits.id := in.ar.bits.id
+      r_err_in.bits.id := in.ar.bits.id
-    when (r_err_in.fire()) { r_count := Mux(r_last, UInt(0), r_count + UInt(1)) }
+      when (r_err_in.fire()) { r_count := Mux(r_last, UInt(0), r_count + UInt(1)) }
-    val w_out = Wire(out.a)
+      val w_out = Wire(out.a)
-    val w_inflight = RegInit(UInt(0, width = numIds))
+      val w_inflight = RegInit(UInt(0, width = numIds))
-    val w_block = w_inflight(in.aw.bits.id)
+      val w_block = w_inflight(in.aw.bits.id)
-    val w_size1 = in.aw.bits.bytes1()
+      val w_size1 = in.aw.bits.bytes1()
-    val w_size = OH1ToUInt(w_size1)
+      val w_size = OH1ToUInt(w_size1)
-    val w_addr = in.aw.bits.addr
+      val w_addr = in.aw.bits.addr
-    val w_ok = edgeOut.manager.supportsPutPartialSafe(w_addr, w_size)
+      val w_ok = edgeOut.manager.supportsPutPartialSafe(w_addr, w_size)
-    val w_err_in = Wire(Decoupled(in.aw.bits.id))
+      val w_err_in = Wire(Decoupled(in.aw.bits.id))
-    val w_err_out = Queue(w_err_in, 2)
+      val w_err_out = Queue(w_err_in, 2)
-    assert (!in.aw.valid || w_size1 === UIntToOH1(w_size, countBits)) // because aligned
+      assert (!in.aw.valid || w_size1 === UIntToOH1(w_size, countBits)) // because aligned
-    assert (!in.aw.valid || in.aw.bits.len === UInt(0) || in.aw.bits.size === UInt(log2Ceil(beatBytes))) // because aligned
+      assert (!in.aw.valid || in.aw.bits.len === UInt(0) || in.aw.bits.size === UInt(log2Ceil(beatBytes))) // because aligned
-    in.aw.ready := Mux(w_ok, w_out.ready, w_err_in.ready) && in.w.valid && in.w.bits.last && !w_block
+      in.aw.ready := Mux(w_ok, w_out.ready, w_err_in.ready) && in.w.valid && in.w.bits.last && !w_block
-    in.w.ready  := Mux(w_ok, w_out.ready, w_err_in.ready || !in.w.bits.last) && in.aw.valid && !w_block
+      in.w.ready  := Mux(w_ok, w_out.ready, w_err_in.ready || !in.w.bits.last) && in.aw.valid && !w_block
-    w_out.valid := in.aw.valid && in.w.valid && !w_block && w_ok
+      w_out.valid := in.aw.valid && in.w.valid && !w_block && w_ok
-    w_out.bits := edgeOut.Put(in.aw.bits.id << 1, w_addr, w_size, in.w.bits.data, in.w.bits.strb)._2
+      w_out.bits := edgeOut.Put(in.aw.bits.id << 1, w_addr, w_size, in.w.bits.data, in.w.bits.strb)._2
-    w_err_in.valid := in.aw.valid && in.w.valid && !w_block && !w_ok && in.w.bits.last
+      w_err_in.valid := in.aw.valid && in.w.valid && !w_block && !w_ok && in.w.bits.last
-    w_err_in.bits := in.aw.bits.id
+      w_err_in.bits := in.aw.bits.id
-    TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), r_out), (in.aw.bits.len, w_out))
+      TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), r_out), (in.aw.bits.len, w_out))
-    val ok_b  = Wire(in.b)
+      val ok_b  = Wire(in.b)
-    val err_b = Wire(in.b)
+      val err_b = Wire(in.b)
-    val mux_b = Wire(in.b)
+      val mux_b = Wire(in.b)
-    val ok_r  = Wire(in.r)
+      val ok_r  = Wire(in.r)
-    val err_r = Wire(in.r)
+      val err_r = Wire(in.r)
-    val mux_r = Wire(in.r)
+      val mux_r = Wire(in.r)
-    val d_resp = Mux(out.d.bits.error, AXI4Parameters.RESP_SLVERR, AXI4Parameters.RESP_OKAY)
+      val d_resp = Mux(out.d.bits.error, AXI4Parameters.RESP_SLVERR, AXI4Parameters.RESP_OKAY)
-    val d_hasData = edgeOut.hasData(out.d.bits)
+      val d_hasData = edgeOut.hasData(out.d.bits)
-    val d_last = edgeOut.last(out.d)
+      val d_last = edgeOut.last(out.d)
-    out.d.ready := Mux(d_hasData, ok_r.ready, ok_b.ready)
+      out.d.ready := Mux(d_hasData, ok_r.ready, ok_b.ready)
-    ok_r.valid := out.d.valid && d_hasData
+      ok_r.valid := out.d.valid && d_hasData
-    ok_b.valid := out.d.valid && !d_hasData
+      ok_b.valid := out.d.valid && !d_hasData
-    ok_r.bits.id   := out.d.bits.source >> 1
+      ok_r.bits.id   := out.d.bits.source >> 1
-    ok_r.bits.data := out.d.bits.data
+      ok_r.bits.data := out.d.bits.data
-    ok_r.bits.resp := d_resp
+      ok_r.bits.resp := d_resp
-    ok_r.bits.last := d_last
+      ok_r.bits.last := d_last
-    r_err_out.ready := err_r.ready
+      r_err_out.ready := err_r.ready
-    err_r.valid := r_err_out.valid
+      err_r.valid := r_err_out.valid
-    err_r.bits.id   := r_err_out.bits.id
+      err_r.bits.id   := r_err_out.bits.id
-    err_r.bits.data := out.d.bits.data // don't care
+      err_r.bits.data := out.d.bits.data // don't care
-    err_r.bits.resp := AXI4Parameters.RESP_DECERR
+      err_r.bits.resp := AXI4Parameters.RESP_DECERR
-    err_r.bits.last := r_err_out.bits.last
+      err_r.bits.last := r_err_out.bits.last
-    // AXI4 must hold R to one source until last
+      // AXI4 must hold R to one source until last
-    val mux_lock_ok  = RegInit(Bool(false))
+      val mux_lock_ok  = RegInit(Bool(false))
-    val mux_lock_err = RegInit(Bool(false))
+      val mux_lock_err = RegInit(Bool(false))
-    when (ok_r .fire()) { mux_lock_ok  := !ok_r .bits.last }
+      when (ok_r .fire()) { mux_lock_ok  := !ok_r .bits.last }
-    when (err_r.fire()) { mux_lock_err := !err_r.bits.last }
+      when (err_r.fire()) { mux_lock_err := !err_r.bits.last }
-    assert (!mux_lock_ok || !mux_lock_err)
+      assert (!mux_lock_ok || !mux_lock_err)
-    // Prioritize err over ok (b/c err_r.valid comes from a register)
+      // Prioritize err over ok (b/c err_r.valid comes from a register)
-    mux_r.valid := (!mux_lock_err && ok_r.valid) || (!mux_lock_ok && err_r.valid)
+      mux_r.valid := (!mux_lock_err && ok_r.valid) || (!mux_lock_ok && err_r.valid)
-    mux_r.bits  := Mux(!mux_lock_ok && err_r.valid, err_r.bits, ok_r.bits)
+      mux_r.bits  := Mux(!mux_lock_ok && err_r.valid, err_r.bits, ok_r.bits)
-    ok_r.ready  := mux_r.ready && (mux_lock_ok || !err_r.valid)
+      ok_r.ready  := mux_r.ready && (mux_lock_ok || !err_r.valid)
-    err_r.ready := mux_r.ready && !mux_lock_ok
+      err_r.ready := mux_r.ready && !mux_lock_ok
-    // AXI4 needs irrevocable behaviour
+      // AXI4 needs irrevocable behaviour
-    in.r <> Queue.irrevocable(mux_r, 1, flow=true)
+      in.r <> Queue.irrevocable(mux_r, 1, flow=true)
-    ok_b.bits.id   := out.d.bits.source >> 1
+      ok_b.bits.id   := out.d.bits.source >> 1
-    ok_b.bits.resp := d_resp
+      ok_b.bits.resp := d_resp
-    w_err_out.ready := err_b.ready
+      w_err_out.ready := err_b.ready
-    err_b.valid := w_err_out.valid
+      err_b.valid := w_err_out.valid
-    err_b.bits.id   := w_err_out.bits
+      err_b.bits.id   := w_err_out.bits
-    err_b.bits.resp := AXI4Parameters.RESP_DECERR
+      err_b.bits.resp := AXI4Parameters.RESP_DECERR
-    // Prioritize err over ok (b/c err_b.valid comes from a register)
+      // Prioritize err over ok (b/c err_b.valid comes from a register)
-    mux_b.valid := ok_b.valid || err_b.valid
+      mux_b.valid := ok_b.valid || err_b.valid
-    mux_b.bits  := Mux(err_b.valid, err_b.bits, ok_b.bits)
+      mux_b.bits  := Mux(err_b.valid, err_b.bits, ok_b.bits)
-    ok_b.ready  := mux_b.ready && !err_b.valid
+      ok_b.ready  := mux_b.ready && !err_b.valid
-    err_b.ready := mux_b.ready
+      err_b.ready := mux_b.ready
-    // AXI4 needs irrevocable behaviour
+      // AXI4 needs irrevocable behaviour
-    in.b <> Queue.irrevocable(mux_b, 1, flow=true)
+      in.b <> Queue.irrevocable(mux_b, 1, flow=true)
-    // Update flight trackers
+      // Update flight trackers
-    val r_set = in.ar.fire().asUInt << in.ar.bits.id
+      val r_set = in.ar.fire().asUInt << in.ar.bits.id
-    val r_clr = (in.r.fire() && in.r.bits.last).asUInt << in.r.bits.id
+      val r_clr = (in.r.fire() && in.r.bits.last).asUInt << in.r.bits.id
-    r_inflight := (r_inflight | r_set) & ~r_clr
+      r_inflight := (r_inflight | r_set) & ~r_clr
-    val w_set = in.aw.fire().asUInt << in.aw.bits.id
+      val w_set = in.aw.fire().asUInt << in.aw.bits.id
-    val w_clr = in.b.fire().asUInt << in.b.bits.id
+      val w_clr = in.b.fire().asUInt << in.b.bits.id
-    w_inflight := (w_inflight | w_set) & ~w_clr
+      w_inflight := (w_inflight | w_set) & ~w_clr
-    // Unused channels
+      // Unused channels
-    out.b.ready := Bool(true)
+      out.b.ready := Bool(true)
-    out.c.valid := Bool(false)
+      out.c.valid := Bool(false)
-    out.e.valid := Bool(false)
+      out.e.valid := Bool(false)
    }
  }
 }
--- a/src/main/scala/uncore/devices/Plic.scala
+++ b/src/main/scala/uncore/devices/Plic.scala
@ -62,7 +62,7 @@ class TLPLIC(supervisor: Boolean, maxPriorities: Int, address: BigInt = 0xC00000
    beatBytes = p(rocket.XLen)/8,
    undefZero = false)
-  val intnode = IntAdapterNode(
+  val intnode = IntNexusNode(
    numSourcePorts = 0 to 1024,
    numSinkPorts   = 0 to 1024,
    sourceFn       = { _ => IntSourcePortParameters(Seq(IntSourceParameters(contextsPerHart))) },
--- a/src/main/scala/uncore/tilelink2/AtomicAutomata.scala
+++ b/src/main/scala/uncore/tilelink2/AtomicAutomata.scala
@ -6,6 +6,7 @@ import Chisel._
 import chisel3.internal.sourceinfo.SourceInfo
 import config._
 import diplomacy._
 import util.GenericParameterizedBundle
 import scala.math.{min,max}
 // Ensures that all downstream RW managers support Atomic operationss.
@ -15,8 +16,8 @@ class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, conc
  require (concurrency >= 1)
  val node = TLAdapterNode(
-    clientFn  = { case Seq(cp) => require (!cp.unsafeAtomics); cp.copy(unsafeAtomics = true) },
+    clientFn  = { case cp => require (!cp.unsafeAtomics); cp.copy(unsafeAtomics = true) },
-    managerFn = { case Seq(mp) => mp.copy(managers = mp.managers.map { m =>
+    managerFn = { case mp => mp.copy(managers = mp.managers.map { m =>
      val ourSupport = TransferSizes(1, mp.beatBytes)
      def widen(x: TransferSizes) = if (passthrough && x.min <= 2*mp.beatBytes) TransferSizes(1, max(mp.beatBytes, x.max)) else ourSupport
      val canDoit = m.supportsPutFull.contains(ourSupport) && m.supportsGet.contains(ourSupport)
@ -33,245 +34,232 @@ class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, conc
      val out = node.bundleOut
    }
-    val in  = io.in(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val out = io.out(0)
+      val managers = edgeOut.manager.managers
-    val edgeIn  = node.edgesIn(0)
+      val beatBytes = edgeOut.manager.beatBytes
    val edgeOut = node.edgesOut(0)
    val managers = edgeOut.manager.managers
    val beatBytes = edgeOut.manager.beatBytes
-    // To which managers are we adding atomic support?
+      // To which managers are we adding atomic support?
-    val ourSupport = TransferSizes(1, edgeOut.manager.beatBytes)
+      val ourSupport = TransferSizes(1, edgeOut.manager.beatBytes)
-    val managersNeedingHelp = managers.filter { m =>
+      val managersNeedingHelp = managers.filter { m =>
-      m.supportsPutFull.contains(ourSupport) &&
+        m.supportsPutFull.contains(ourSupport) &&
-      m.supportsGet.contains(ourSupport) &&
+        m.supportsGet.contains(ourSupport) &&
-      ((logical    && !m.supportsLogical   .contains(ourSupport)) ||
+        ((logical    && !m.supportsLogical   .contains(ourSupport)) ||
-       (arithmetic && !m.supportsArithmetic.contains(ourSupport)) ||
+         (arithmetic && !m.supportsArithmetic.contains(ourSupport)) ||
-       !passthrough) // we will do atomics for everyone we can
+         !passthrough) // we will do atomics for everyone we can
    }
    // We cannot add atomcis to a non-FIFO manager
    managersNeedingHelp foreach { m => require (m.fifoId.isDefined) }
    // We need to preserve FIFO semantics across FIFO domains, not managers
    // Suppose you have Put(42) Atomic(+1) both inflight; valid results: 42 or 43
    // If we allow Put(42) Get() Put(+1) concurrent; valid results: 42 43 OR undef
    // Making non-FIFO work requires waiting for all Acks to come back (=> use FIFOFixer)
    val domainsNeedingHelp = managersNeedingHelp.map(_.fifoId.get).distinct
    // Don't overprovision the CAM
    val camSize = min(domainsNeedingHelp.size, concurrency)
    // Compact the fifoIds to only those we care about
    val camFifoIds = managers.map(m => UInt(m.fifoId.map(id => max(0, domainsNeedingHelp.indexOf(id))).getOrElse(0)))
    // CAM entry state machine
    val FREE = UInt(0) // unused                   waiting on Atomic from A
    val GET  = UInt(3) // Get sent down A          waiting on AccessDataAck from D
    val AMO  = UInt(2) // AccessDataAck sent up D  waiting for A availability
    val ACK  = UInt(1) // Put sent down A          waiting for PutAck from D
    def helper(select: Seq[Bool], x: Seq[TransferSizes], lgSize: UInt) =
      if (!passthrough) Bool(false) else
      if (x.map(_ == x(0)).reduce(_ && _)) x(0).containsLg(lgSize) else
      Mux1H(select, x.map(_.containsLg(lgSize))) 
    // Do we need to do anything at all?
    if (camSize > 0) {
      class CAM_S extends Bundle {
        val state = UInt(width = 2)
      }
      class CAM_A extends Bundle {
        val bits    = new TLBundleA(out.a.bits.params)
        val fifoId  = UInt(width = log2Up(domainsNeedingHelp.size))
        val lut     = UInt(width = 4)
      }
      class CAM_D extends Bundle {
        val data = UInt(width = out.a.bits.params.dataBits)
      }
      // We cannot add atomcis to a non-FIFO manager
      managersNeedingHelp foreach { m => require (m.fifoId.isDefined) }
      // We need to preserve FIFO semantics across FIFO domains, not managers
      // Suppose you have Put(42) Atomic(+1) both inflight; valid results: 42 or 43
      // If we allow Put(42) Get() Put(+1) concurrent; valid results: 42 43 OR undef
      // Making non-FIFO work requires waiting for all Acks to come back (=> use FIFOFixer)
      val domainsNeedingHelp = managersNeedingHelp.map(_.fifoId.get).distinct
      // Don't overprovision the CAM
      val camSize = min(domainsNeedingHelp.size, concurrency)
      // Compact the fifoIds to only those we care about
      val camFifoIds = managers.map(m => UInt(m.fifoId.map(id => max(0, domainsNeedingHelp.indexOf(id))).getOrElse(0)))
-      val initval = Wire(new CAM_S)
+      // CAM entry state machine
-      initval.state := FREE
+      val FREE = UInt(0) // unused                   waiting on Atomic from A
-      val cam_s = RegInit(Vec.fill(camSize)(initval))
+      val GET  = UInt(3) // Get sent down A          waiting on AccessDataAck from D
-      val cam_a = Reg(Vec(camSize, new CAM_A))
+      val AMO  = UInt(2) // AccessDataAck sent up D  waiting for A availability
-      val cam_d = Reg(Vec(camSize, new CAM_D))
+      val ACK  = UInt(1) // Put sent down A          waiting for PutAck from D
-      val cam_free   = cam_s.map(_.state === FREE)
+      def helper(select: Seq[Bool], x: Seq[TransferSizes], lgSize: UInt) =
-      val cam_amo    = cam_s.map(_.state === AMO)
+        if (!passthrough) Bool(false) else
-      val cam_abusy  = cam_s.map(e => e.state === GET || e.state === AMO) // A is blocked
+        if (x.map(_ == x(0)).reduce(_ && _)) x(0).containsLg(lgSize) else
-      val cam_dmatch = cam_s.map(e => e.state =/= FREE) // D should inspect these entries
+        Mux1H(select, x.map(_.containsLg(lgSize))) 
-      // Can the manager already handle this message?
+      val params = TLAtomicAutomata.CAMParams(out.a.bits.params, domainsNeedingHelp.size)
-      val a_size = edgeIn.size(in.a.bits)
+      // Do we need to do anything at all?
-      val a_select = edgeOut.manager.findFast(edgeIn.address(in.a.bits))
+      if (camSize > 0) {
-      val a_canLogical    = helper(a_select, managers.map(_.supportsLogical),    a_size)
+        val initval = Wire(new TLAtomicAutomata.CAM_S(params))
-      val a_canArithmetic = helper(a_select, managers.map(_.supportsArithmetic), a_size)
+        initval.state := FREE
-      val a_isLogical    = in.a.bits.opcode === TLMessages.LogicalData
+        val cam_s = RegInit(Vec.fill(camSize)(initval))
-      val a_isArithmetic = in.a.bits.opcode === TLMessages.ArithmeticData
+        val cam_a = Reg(Vec(camSize, new TLAtomicAutomata.CAM_A(params)))
-      val a_isSupported = Mux(a_isLogical, a_canLogical, Mux(a_isArithmetic, a_canArithmetic, Bool(true)))
+        val cam_d = Reg(Vec(camSize, new TLAtomicAutomata.CAM_D(params)))
-      // Must we do a Put?
+        val cam_free   = cam_s.map(_.state === FREE)
-      val a_cam_any_put = cam_amo.reduce(_ || _)
+        val cam_amo    = cam_s.map(_.state === AMO)
-      val a_cam_por_put = cam_amo.scanLeft(Bool(false))(_||_).init
+        val cam_abusy  = cam_s.map(e => e.state === GET || e.state === AMO) // A is blocked
-      val a_cam_sel_put = (cam_amo zip a_cam_por_put) map { case (a, b) => a && !b }
+        val cam_dmatch = cam_s.map(e => e.state =/= FREE) // D should inspect these entries
      val a_cam_a = PriorityMux(cam_amo, cam_a)
      val a_cam_d = PriorityMux(cam_amo, cam_d)
      val a_a = a_cam_a.bits.data
      val a_d = a_cam_d.data
-      // Does the A request conflict with an inflight AMO?
+        // Can the manager already handle this message?
-      val a_fifoId  = Mux1H(a_select, camFifoIds)
+        val a_size = edgeIn.size(in.a.bits)
-      val a_cam_busy = (cam_abusy zip cam_a.map(_.fifoId === a_fifoId)) map { case (a,b) => a&&b } reduce (_||_)
+        val a_select = edgeOut.manager.findFast(edgeIn.address(in.a.bits))
        val a_canLogical    = helper(a_select, managers.map(_.supportsLogical),    a_size)
        val a_canArithmetic = helper(a_select, managers.map(_.supportsArithmetic), a_size)
        val a_isLogical    = in.a.bits.opcode === TLMessages.LogicalData
        val a_isArithmetic = in.a.bits.opcode === TLMessages.ArithmeticData
        val a_isSupported = Mux(a_isLogical, a_canLogical, Mux(a_isArithmetic, a_canArithmetic, Bool(true)))
-      // (Where) are we are allocating in the CAM?
+        // Must we do a Put?
-      val a_cam_any_free = cam_free.reduce(_ || _)
+        val a_cam_any_put = cam_amo.reduce(_ || _)
-      val a_cam_por_free = cam_free.scanLeft(Bool(false))(_||_).init
+        val a_cam_por_put = cam_amo.scanLeft(Bool(false))(_||_).init
-      val a_cam_sel_free = (cam_free zip a_cam_por_free) map { case (a,b) => a && !b }
+        val a_cam_sel_put = (cam_amo zip a_cam_por_put) map { case (a, b) => a && !b }
        val a_cam_a = PriorityMux(cam_amo, cam_a)
        val a_cam_d = PriorityMux(cam_amo, cam_d)
        val a_a = a_cam_a.bits.data
        val a_d = a_cam_d.data
-      // Logical AMO
+        // Does the A request conflict with an inflight AMO?
-      val indexes = Seq.tabulate(beatBytes*8) { i => Cat(a_a(i,i), a_d(i,i)) }
+        val a_fifoId  = Mux1H(a_select, camFifoIds)
-      val logic_out = Cat(indexes.map(x => a_cam_a.lut(x).asUInt).reverse)
+        val a_cam_busy = (cam_abusy zip cam_a.map(_.fifoId === a_fifoId)) map { case (a,b) => a&&b } reduce (_||_)
-      // Arithmetic AMO
+        // (Where) are we are allocating in the CAM?
-      val unsigned = a_cam_a.bits.param(1)
+        val a_cam_any_free = cam_free.reduce(_ || _)
-      val take_max = a_cam_a.bits.param(0)
+        val a_cam_por_free = cam_free.scanLeft(Bool(false))(_||_).init
-      val adder = a_cam_a.bits.param(2)
+        val a_cam_sel_free = (cam_free zip a_cam_por_free) map { case (a,b) => a && !b }
      val mask = a_cam_a.bits.mask
      val signSel = ~(~mask | (mask >> 1))
      val signbits_a = Cat(Seq.tabulate(beatBytes) { i => a_a(8*i+7,8*i+7) } .reverse)
      val signbits_d = Cat(Seq.tabulate(beatBytes) { i => a_d(8*i+7,8*i+7) } .reverse)
      // Move the selected sign bit into the first byte position it will extend
      val signbit_a = ((signbits_a & signSel) << 1)(beatBytes-1, 0)
      val signbit_d = ((signbits_d & signSel) << 1)(beatBytes-1, 0)
      val signext_a = FillInterleaved(8, leftOR(signbit_a))
      val signext_d = FillInterleaved(8, leftOR(signbit_d))
      // NOTE: sign-extension does not change the relative ordering in EITHER unsigned or signed arithmetic
      val wide_mask = FillInterleaved(8, mask)
      val a_a_ext = (a_a & wide_mask) | signext_a
      val a_d_ext = (a_d & wide_mask) | signext_d
      val a_d_inv = Mux(adder, a_d_ext, ~a_d_ext)
      val adder_out = a_a_ext + a_d_inv
      val h = 8*beatBytes-1 // now sign-extended; use biggest bit
      val a_bigger_uneq = unsigned === a_a_ext(h) // result if high bits are unequal
      val a_bigger = Mux(a_a_ext(h) === a_d_ext(h), !adder_out(h), a_bigger_uneq)
      val pick_a = take_max === a_bigger
      val arith_out = Mux(adder, adder_out, Mux(pick_a, a_a, a_d))
-      // AMO result data
+        // Logical AMO
-      val amo_data =
+        val indexes = Seq.tabulate(beatBytes*8) { i => Cat(a_a(i,i), a_d(i,i)) }
-        if (!logical)    arith_out else
+        val logic_out = Cat(indexes.map(x => a_cam_a.lut(x).asUInt).reverse)
        if (!arithmetic) logic_out else
        Mux(a_cam_a.bits.opcode(0), logic_out, arith_out)
-      // Potentially mutate the message from inner
+        // Arithmetic AMO
-      val source_i = Wire(in.a)
+        val unsigned = a_cam_a.bits.param(1)
-      val a_allow = !a_cam_busy && (a_isSupported || a_cam_any_free)
+        val take_max = a_cam_a.bits.param(0)
-      in.a.ready := source_i.ready && a_allow
+        val adder = a_cam_a.bits.param(2)
-      source_i.valid := in.a.valid && a_allow
+        val mask = a_cam_a.bits.mask
-      source_i.bits  := in.a.bits
+        val signSel = ~(~mask | (mask >> 1))
-      when (!a_isSupported) { // minimal mux difference
+        val signbits_a = Cat(Seq.tabulate(beatBytes) { i => a_a(8*i+7,8*i+7) } .reverse)
-        source_i.bits.opcode := TLMessages.Get
+        val signbits_d = Cat(Seq.tabulate(beatBytes) { i => a_d(8*i+7,8*i+7) } .reverse)
-        source_i.bits.param  := UInt(0)
+        // Move the selected sign bit into the first byte position it will extend
-      }
+        val signbit_a = ((signbits_a & signSel) << 1)(beatBytes-1, 0)
        val signbit_d = ((signbits_d & signSel) << 1)(beatBytes-1, 0)
        val signext_a = FillInterleaved(8, leftOR(signbit_a))
        val signext_d = FillInterleaved(8, leftOR(signbit_d))
        // NOTE: sign-extension does not change the relative ordering in EITHER unsigned or signed arithmetic
        val wide_mask = FillInterleaved(8, mask)
        val a_a_ext = (a_a & wide_mask) | signext_a
        val a_d_ext = (a_d & wide_mask) | signext_d
        val a_d_inv = Mux(adder, a_d_ext, ~a_d_ext)
        val adder_out = a_a_ext + a_d_inv
        val h = 8*beatBytes-1 // now sign-extended; use biggest bit
        val a_bigger_uneq = unsigned === a_a_ext(h) // result if high bits are unequal
        val a_bigger = Mux(a_a_ext(h) === a_d_ext(h), !adder_out(h), a_bigger_uneq)
        val pick_a = take_max === a_bigger
        val arith_out = Mux(adder, adder_out, Mux(pick_a, a_a, a_d))
-      // Potentially take the message from the CAM
+        // AMO result data
-      val source_c = Wire(in.a)
+        val amo_data =
-      source_c.valid := a_cam_any_put
+          if (!logical)    arith_out else
-      source_c.bits := edgeOut.Put(a_cam_a.bits.source, edgeIn.address(a_cam_a.bits), a_cam_a.bits.size, amo_data)._2
+          if (!arithmetic) logic_out else
          Mux(a_cam_a.bits.opcode(0), logic_out, arith_out)
-      // Finishing an AMO from the CAM has highest priority
+        // Potentially mutate the message from inner
-      TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), source_c), (edgeOut.numBeats1(in.a.bits), source_i))
+        val source_i = Wire(in.a)
        val a_allow = !a_cam_busy && (a_isSupported || a_cam_any_free)
        in.a.ready := source_i.ready && a_allow
        source_i.valid := in.a.valid && a_allow
        source_i.bits  := in.a.bits
        when (!a_isSupported) { // minimal mux difference
          source_i.bits.opcode := TLMessages.Get
          source_i.bits.param  := UInt(0)
        }
-      // Capture the A state into the CAM
+        // Potentially take the message from the CAM
-      when (source_i.fire() && !a_isSupported) {
+        val source_c = Wire(in.a)
-        (a_cam_sel_free zip cam_a) foreach { case (en, r) =>
+        source_c.valid := a_cam_any_put
-          when (en) {
+        source_c.bits := edgeOut.Put(a_cam_a.bits.source, edgeIn.address(a_cam_a.bits), a_cam_a.bits.size, amo_data)._2
-            r.fifoId := a_fifoId
+
-            r.bits   := in.a.bits
+        // Finishing an AMO from the CAM has highest priority
-            r.lut    := MuxLookup(in.a.bits.param(1, 0), UInt(0, width = 4), Array(
+        TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), source_c), (edgeOut.numBeats1(in.a.bits), source_i))
-              TLAtomics.AND  -> UInt(0x8),
+
-              TLAtomics.OR   -> UInt(0xe),
+        // Capture the A state into the CAM
-              TLAtomics.XOR  -> UInt(0x6),
+        when (source_i.fire() && !a_isSupported) {
-              TLAtomics.SWAP -> UInt(0xc)))
+          (a_cam_sel_free zip cam_a) foreach { case (en, r) =>
            when (en) {
              r.fifoId := a_fifoId
              r.bits   := in.a.bits
              r.lut    := MuxLookup(in.a.bits.param(1, 0), UInt(0, width = 4), Array(
                TLAtomics.AND  -> UInt(0x8),
                TLAtomics.OR   -> UInt(0xe),
                TLAtomics.XOR  -> UInt(0x6),
                TLAtomics.SWAP -> UInt(0xc)))
            }
          }
          (a_cam_sel_free zip cam_s) foreach { case (en, r) =>
            when (en) {
              r.state := GET
            }
          }
        }
-        (a_cam_sel_free zip cam_s) foreach { case (en, r) =>
+
-          when (en) {
+        // Advance the put state
-            r.state := GET
+        when (source_c.fire()) {
          (a_cam_sel_put zip cam_s) foreach { case (en, r) =>
            when (en) {
              r.state := ACK
            }
          }
        }
      }
-      // Advance the put state
+        // We need to deal with a potential D response in the same cycle as the A request
-      when (source_c.fire()) {
+        val d_cam_sel_raw = cam_a.map(_.bits.source === in.d.bits.source)
-        (a_cam_sel_put zip cam_s) foreach { case (en, r) =>
+        val d_cam_sel_match = (d_cam_sel_raw zip cam_dmatch) map { case (a,b) => a&&b }
-          when (en) {
+        val d_cam_data = Mux1H(d_cam_sel_match, cam_d.map(_.data))
-            r.state := ACK
+        val d_cam_sel_bypass = if (edgeOut.manager.minLatency > 0) Bool(false) else
                               out.d.bits.source === in.a.bits.source && in.a.valid && !a_isSupported
        val d_cam_sel = (a_cam_sel_free zip d_cam_sel_match) map { case (a,d) => Mux(d_cam_sel_bypass, a, d) }
        val d_cam_sel_any = d_cam_sel_bypass || d_cam_sel_match.reduce(_ || _)
        val d_ackd = out.d.bits.opcode === TLMessages.AccessAckData
        val d_ack  = out.d.bits.opcode === TLMessages.AccessAck
        when (out.d.fire()) {
          (d_cam_sel zip cam_d) foreach { case (en, r) =>
            when (en && d_ackd) {
              r.data := out.d.bits.data
            }
          }
          (d_cam_sel zip cam_s) foreach { case (en, r) =>
            when (en) {
              // Note: it is important that this comes AFTER the := GET, so we can go FREE=>GET=>AMO in one cycle
              r.state := Mux(d_ackd, AMO, FREE)
            }
          }
        }
      }
-      // We need to deal with a potential D response in the same cycle as the A request
+        val d_drop = d_ackd && d_cam_sel_any
-      val d_cam_sel_raw = cam_a.map(_.bits.source === in.d.bits.source)
+        val d_replace = d_ack && d_cam_sel_match.reduce(_ || _)
      val d_cam_sel_match = (d_cam_sel_raw zip cam_dmatch) map { case (a,b) => a&&b }
      val d_cam_data = Mux1H(d_cam_sel_match, cam_d.map(_.data))
      val d_cam_sel_bypass = if (edgeOut.manager.minLatency > 0) Bool(false) else
                             out.d.bits.source === in.a.bits.source && in.a.valid && !a_isSupported
      val d_cam_sel = (a_cam_sel_free zip d_cam_sel_match) map { case (a,d) => Mux(d_cam_sel_bypass, a, d) }
      val d_cam_sel_any = d_cam_sel_bypass || d_cam_sel_match.reduce(_ || _)
      val d_ackd = out.d.bits.opcode === TLMessages.AccessAckData
      val d_ack  = out.d.bits.opcode === TLMessages.AccessAck
-      when (out.d.fire()) {
+        in.d.valid := out.d.valid && !d_drop
-        (d_cam_sel zip cam_d) foreach { case (en, r) =>
+        out.d.ready := in.d.ready || d_drop
-          when (en && d_ackd) {
+
-            r.data := out.d.bits.data
+        in.d.bits := out.d.bits
-          }
+        when (d_replace) { // minimal muxes
-        }
+          in.d.bits.opcode := TLMessages.AccessAckData
-        (d_cam_sel zip cam_s) foreach { case (en, r) =>
+          in.d.bits.data := d_cam_data
          when (en) {
            // Note: it is important that this comes AFTER the := GET, so we can go FREE=>GET=>AMO in one cycle
            r.state := Mux(d_ackd, AMO, FREE)
          }
        }
      } else {
        out.a.valid := in.a.valid
        in.a.ready := out.a.ready
        out.a.bits := in.a.bits
        in.d.valid := out.d.valid
        out.d.ready := in.d.ready
        in.d.bits := out.d.bits
      }
-      val d_drop = d_ackd && d_cam_sel_any
+      if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) {
-      val d_replace = d_ack && d_cam_sel_match.reduce(_ || _)
+        in.b.valid := out.b.valid
        out.b.ready := in.b.ready
        in.b.bits := out.b.bits
-      in.d.valid := out.d.valid && !d_drop
+        out.c.valid := in.c.valid
-      out.d.ready := in.d.ready || d_drop
+        in.c.ready := out.c.ready
        out.c.bits := in.c.bits
-      in.d.bits := out.d.bits
+        out.e.valid := in.e.valid
-      when (d_replace) { // minimal muxes
+        in.e.ready := out.e.ready
-        in.d.bits.opcode := TLMessages.AccessAckData
+        out.e.bits := in.e.bits
-        in.d.bits.data := d_cam_data
+      } else {
        in.b.valid := Bool(false)
        in.c.ready := Bool(true)
        in.e.ready := Bool(true)
        out.b.ready := Bool(true)
        out.c.valid := Bool(false)
        out.e.valid := Bool(false)
      }
    } else {
      out.a.valid := in.a.valid
      in.a.ready := out.a.ready
      out.a.bits := in.a.bits
      in.d.valid := out.d.valid
      out.d.ready := in.d.ready
      in.d.bits := out.d.bits
    }
    if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) {
      in.b.valid := out.b.valid
      out.b.ready := in.b.ready
      in.b.bits := out.b.bits
      out.c.valid := in.c.valid
      in.c.ready := out.c.ready
      out.c.bits := in.c.bits
      out.e.valid := in.e.valid
      in.e.ready := out.e.ready
      out.e.bits := in.e.bits
    } else {
      in.b.valid := Bool(false)
      in.c.ready := Bool(true)
      in.e.ready := Bool(true)
      out.b.ready := Bool(true)
      out.c.valid := Bool(false)
      out.e.valid := Bool(false)
    }
  }
 }
@ -284,6 +272,20 @@ object TLAtomicAutomata
    atomics.node := x
    atomics.node
  }
  case class CAMParams(a: TLBundleParameters, domainsNeedingHelp: Int)
  class CAM_S(params: CAMParams) extends GenericParameterizedBundle(params) {
    val state = UInt(width = 2)
  }
  class CAM_A(params: CAMParams) extends GenericParameterizedBundle(params) {
    val bits    = new TLBundleA(params.a)
    val fifoId  = UInt(width = log2Up(params.domainsNeedingHelp))
    val lut     = UInt(width = 4)
  }
  class CAM_D(params: CAMParams) extends GenericParameterizedBundle(params) {
    val data = UInt(width = params.a.dataBits)
  }
 }
 /** Synthesizeable unit tests */
--- a/src/main/scala/uncore/tilelink2/Broadcast.scala
+++ b/src/main/scala/uncore/tilelink2/Broadcast.scala
@ -13,11 +13,11 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa
  require (numTrackers > 0)
  val node = TLAdapterNode(
-    clientFn  = { case Seq(cp) =>
+    clientFn  = { cp =>
      cp.copy(clients = Seq(TLClientParameters(
        sourceId = IdRange(0, 1 << log2Ceil(cp.endSourceId*4)))))
    },
-    managerFn = { case Seq(mp) =>
+    managerFn = { mp =>
      mp.copy(
        endSinkId  = numTrackers,
        managers   = mp.managers.map { m =>
@ -56,154 +56,152 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa
      val out = node.bundleOut
    }
-    val in = io.in(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val out = io.out(0)
+      val clients = edgeIn.client.clients
-    val edgeIn = node.edgesIn(0)
+      val managers = edgeOut.manager.managers
-    val edgeOut = node.edgesOut(0)
+      val lineShift = log2Ceil(lineBytes)
    val clients = edgeIn.client.clients
    val managers = edgeOut.manager.managers
    val lineShift = log2Ceil(lineBytes)
-    import TLBroadcastConstants._
+      import TLBroadcastConstants._
-    require (lineBytes >= edgeOut.manager.beatBytes)
+      require (lineBytes >= edgeOut.manager.beatBytes)
-    // For the probe walker, we need to identify all the caches
+      // For the probe walker, we need to identify all the caches
-    val caches = clients.filter(_.supportsProbe).map(_.sourceId)
+      val caches = clients.filter(_.supportsProbe).map(_.sourceId)
-    val cache_targets = caches.map(c => UInt(c.start))
+      val cache_targets = caches.map(c => UInt(c.start))
-    // Create the request tracker queues
+      // Create the request tracker queues
-    val trackers = Seq.tabulate(numTrackers) { id =>
+      val trackers = Seq.tabulate(numTrackers) { id =>
-      Module(new TLBroadcastTracker(id, lineBytes, log2Up(caches.size+1), bufferless, edgeIn, edgeOut)).io
+        Module(new TLBroadcastTracker(id, lineBytes, log2Up(caches.size+1), bufferless, edgeIn, edgeOut)).io
      }
      // We always accept E
      in.e.ready := Bool(true)
      (trackers zip UIntToOH(in.e.bits.sink).toBools) foreach { case (tracker, select) =>
        tracker.e_last := select && in.e.fire()
      }
      // Depending on the high source bits, we might transform D
      val d_high = log2Ceil(edgeIn.client.endSourceId)
      val d_what = out.d.bits.source(d_high+1, d_high)
      val d_drop = d_what === DROP
      val d_hasData = edgeOut.hasData(out.d.bits)
      val d_normal = Wire(in.d)
      val d_trackerOH = Vec(trackers.map { t => !t.idle && t.source === d_normal.bits.source }).asUInt
      assert (!out.d.valid || !d_drop || out.d.bits.opcode === TLMessages.AccessAck)
      out.d.ready := d_normal.ready || d_drop
      d_normal.valid := out.d.valid && !d_drop
      d_normal.bits := out.d.bits // truncates source
      when (d_what(1)) { // TRANSFORM_*
        d_normal.bits.opcode := Mux(d_hasData, TLMessages.GrantData, TLMessages.ReleaseAck)
        d_normal.bits.param  := Mux(d_hasData, Mux(d_what(0), TLPermissions.toT, TLPermissions.toB), UInt(0))
      }
      d_normal.bits.sink := OHToUInt(d_trackerOH)
      assert (!d_normal.valid || (d_trackerOH.orR() || d_normal.bits.opcode === TLMessages.ReleaseAck))
      // A tracker response is anything neither dropped nor a ReleaseAck
      val d_response = d_hasData || !d_what(1)
      val d_last = edgeIn.last(d_normal)
      (trackers zip d_trackerOH.toBools) foreach { case (tracker, select) =>
        tracker.d_last := select && d_normal.fire() && d_response && d_last
        tracker.probedack := select && out.d.fire() && d_drop
      }
      // Incoming C can be:
      // ProbeAck     => decrement tracker, drop 
      // ProbeAckData => decrement tracker, send out A as PutFull(DROP)
      // ReleaseData  =>                    send out A as PutFull(TRANSFORM)
      // Release      => send out D as ReleaseAck
      val c_probeack     = in.c.bits.opcode === TLMessages.ProbeAck
      val c_probeackdata = in.c.bits.opcode === TLMessages.ProbeAckData
      val c_releasedata  = in.c.bits.opcode === TLMessages.ReleaseData
      val c_release      = in.c.bits.opcode === TLMessages.Release
      val c_trackerOH    = trackers.map { t => t.line === (in.c.bits.address >> lineShift) }
      val c_trackerSrc   = Mux1H(c_trackerOH, trackers.map { _.source })
      // Decrement the tracker's outstanding probe counter
      (trackers zip c_trackerOH) foreach { case (tracker, select) =>
        tracker.probenack := in.c.fire() && c_probeack && select
      }
      val releaseack = Wire(in.d)
      val putfull = Wire(out.a)
      in.c.ready := c_probeack || Mux(c_release, releaseack.ready, putfull.ready)
      releaseack.valid := in.c.valid && c_release
      releaseack.bits  := edgeIn.ReleaseAck(in.c.bits.address, UInt(0), in.c.bits.source, in.c.bits.size)
      val put_what = Mux(c_releasedata, TRANSFORM_B, DROP)
      val put_who  = Mux(c_releasedata, in.c.bits.source, c_trackerSrc)
      putfull.valid := in.c.valid && (c_probeackdata || c_releasedata)
      putfull.bits := edgeOut.Put(Cat(put_what, put_who), in.c.bits.address, in.c.bits.size, in.c.bits.data)._2
      // Combine ReleaseAck or the modified D
      TLArbiter.lowest(edgeOut, in.d, releaseack, d_normal)
      // Combine the PutFull with the trackers
      TLArbiter.lowestFromSeq(edgeOut, out.a, putfull +: trackers.map(_.out_a))
      // The Probe FSM walks all caches and probes them
      val probe_todo = RegInit(UInt(0, width = max(1, caches.size)))
      val probe_line = Reg(UInt())
      val probe_perms = Reg(UInt(width = 2))
      val probe_next = probe_todo & ~(leftOR(probe_todo) << 1)
      val probe_busy = probe_todo.orR()
      val probe_target = if (caches.size == 0) UInt(0) else Mux1H(probe_next, cache_targets)
      // Probe whatever the FSM wants to do next
      in.b.valid := probe_busy
      if (caches.size != 0) {
        in.b.bits := edgeIn.Probe(probe_line << lineShift, probe_target, UInt(lineShift), probe_perms)._2
      }
      when (in.b.fire()) { probe_todo := probe_todo & ~probe_next }
      // Which cache does a request come from?
      val a_cache = if (caches.size == 0) UInt(1) else Vec(caches.map(_.contains(in.a.bits.source))).asUInt
      val a_first = edgeIn.first(in.a)
      // To accept a request from A, the probe FSM must be idle and there must be a matching tracker
      val freeTrackers = Vec(trackers.map { t => t.idle }).asUInt
      val freeTracker = freeTrackers.orR()
      val matchTrackers = Vec(trackers.map { t => t.line === in.a.bits.address >> lineShift }).asUInt
      val matchTracker = matchTrackers.orR()
      val allocTracker = freeTrackers & ~(leftOR(freeTrackers) << 1)
      val selectTracker = Mux(matchTracker, matchTrackers, allocTracker)
      val trackerReady = Vec(trackers.map(_.in_a.ready)).asUInt
      in.a.ready := (!a_first || !probe_busy) && (selectTracker & trackerReady).orR()
      (trackers zip selectTracker.toBools) foreach { case (t, select) =>
        t.in_a.valid := in.a.valid && select && (!a_first || !probe_busy)
        t.in_a.bits := in.a.bits
        t.in_a_first := a_first
        t.probe := (if (caches.size == 0) UInt(0) else Mux(a_cache.orR(), UInt(caches.size-1), UInt(caches.size)))
      }
      when (in.a.fire() && a_first) {
        probe_todo  := ~a_cache // probe all but the cache who poked us
        probe_line  := in.a.bits.address >> lineShift
        probe_perms := MuxLookup(in.a.bits.opcode, Wire(UInt(width = 2)), Array(
          TLMessages.PutFullData    -> TLPermissions.toN,
          TLMessages.PutPartialData -> TLPermissions.toN,
          TLMessages.ArithmeticData -> TLPermissions.toN,
          TLMessages.LogicalData    -> TLPermissions.toN,
          TLMessages.Get            -> TLPermissions.toB,
          TLMessages.Hint           -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
            TLHints.PREFETCH_READ   -> TLPermissions.toB,
            TLHints.PREFETCH_WRITE  -> TLPermissions.toN)),
          TLMessages.Acquire        -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
            TLPermissions.NtoB      -> TLPermissions.toB,
            TLPermissions.NtoT      -> TLPermissions.toN,
            TLPermissions.BtoT      -> TLPermissions.toN))))
      }
      // The outer TL connections may not be cached
      out.b.ready := Bool(true)
      out.c.valid := Bool(false)
      out.e.valid := Bool(false)
    }
    // We always accept E
    in.e.ready := Bool(true)
    (trackers zip UIntToOH(in.e.bits.sink).toBools) foreach { case (tracker, select) =>
      tracker.e_last := select && in.e.fire()
    }
    // Depending on the high source bits, we might transform D
    val d_high = log2Ceil(edgeIn.client.endSourceId)
    val d_what = out.d.bits.source(d_high+1, d_high)
    val d_drop = d_what === DROP
    val d_hasData = edgeOut.hasData(out.d.bits)
    val d_normal = Wire(in.d)
    val d_trackerOH = Vec(trackers.map { t => !t.idle && t.source === d_normal.bits.source }).asUInt
    assert (!out.d.valid || !d_drop || out.d.bits.opcode === TLMessages.AccessAck)
    out.d.ready := d_normal.ready || d_drop
    d_normal.valid := out.d.valid && !d_drop
    d_normal.bits := out.d.bits // truncates source
    when (d_what(1)) { // TRANSFORM_*
      d_normal.bits.opcode := Mux(d_hasData, TLMessages.GrantData, TLMessages.ReleaseAck)
      d_normal.bits.param  := Mux(d_hasData, Mux(d_what(0), TLPermissions.toT, TLPermissions.toB), UInt(0))
    }
    d_normal.bits.sink := OHToUInt(d_trackerOH)
    assert (!d_normal.valid || (d_trackerOH.orR() || d_normal.bits.opcode === TLMessages.ReleaseAck))
    // A tracker response is anything neither dropped nor a ReleaseAck
    val d_response = d_hasData || !d_what(1)
    val d_last = edgeIn.last(d_normal)
    (trackers zip d_trackerOH.toBools) foreach { case (tracker, select) =>
      tracker.d_last := select && d_normal.fire() && d_response && d_last
      tracker.probedack := select && out.d.fire() && d_drop
    }
    // Incoming C can be:
    // ProbeAck     => decrement tracker, drop 
    // ProbeAckData => decrement tracker, send out A as PutFull(DROP)
    // ReleaseData  =>                    send out A as PutFull(TRANSFORM)
    // Release      => send out D as ReleaseAck
    val c_probeack     = in.c.bits.opcode === TLMessages.ProbeAck
    val c_probeackdata = in.c.bits.opcode === TLMessages.ProbeAckData
    val c_releasedata  = in.c.bits.opcode === TLMessages.ReleaseData
    val c_release      = in.c.bits.opcode === TLMessages.Release
    val c_trackerOH    = trackers.map { t => t.line === (in.c.bits.address >> lineShift) }
    val c_trackerSrc   = Mux1H(c_trackerOH, trackers.map { _.source })
    // Decrement the tracker's outstanding probe counter
    (trackers zip c_trackerOH) foreach { case (tracker, select) =>
      tracker.probenack := in.c.fire() && c_probeack && select
    }
    val releaseack = Wire(in.d)
    val putfull = Wire(out.a)
    in.c.ready := c_probeack || Mux(c_release, releaseack.ready, putfull.ready)
    releaseack.valid := in.c.valid && c_release
    releaseack.bits  := edgeIn.ReleaseAck(in.c.bits.address, UInt(0), in.c.bits.source, in.c.bits.size)
    val put_what = Mux(c_releasedata, TRANSFORM_B, DROP)
    val put_who  = Mux(c_releasedata, in.c.bits.source, c_trackerSrc)
    putfull.valid := in.c.valid && (c_probeackdata || c_releasedata)
    putfull.bits := edgeOut.Put(Cat(put_what, put_who), in.c.bits.address, in.c.bits.size, in.c.bits.data)._2
    // Combine ReleaseAck or the modified D
    TLArbiter.lowest(edgeOut, in.d, releaseack, d_normal)
    // Combine the PutFull with the trackers
    TLArbiter.lowestFromSeq(edgeOut, out.a, putfull +: trackers.map(_.out_a))
    // The Probe FSM walks all caches and probes them
    val probe_todo = RegInit(UInt(0, width = max(1, caches.size)))
    val probe_line = Reg(UInt())
    val probe_perms = Reg(UInt(width = 2))
    val probe_next = probe_todo & ~(leftOR(probe_todo) << 1)
    val probe_busy = probe_todo.orR()
    val probe_target = if (caches.size == 0) UInt(0) else Mux1H(probe_next, cache_targets)
    // Probe whatever the FSM wants to do next
    in.b.valid := probe_busy
    if (caches.size != 0) {
      in.b.bits := edgeIn.Probe(probe_line << lineShift, probe_target, UInt(lineShift), probe_perms)._2
    }
    when (in.b.fire()) { probe_todo := probe_todo & ~probe_next }
    // Which cache does a request come from?
    val a_cache = if (caches.size == 0) UInt(1) else Vec(caches.map(_.contains(in.a.bits.source))).asUInt
    val a_first = edgeIn.first(in.a)
    // To accept a request from A, the probe FSM must be idle and there must be a matching tracker
    val freeTrackers = Vec(trackers.map { t => t.idle }).asUInt
    val freeTracker = freeTrackers.orR()
    val matchTrackers = Vec(trackers.map { t => t.line === in.a.bits.address >> lineShift }).asUInt
    val matchTracker = matchTrackers.orR()
    val allocTracker = freeTrackers & ~(leftOR(freeTrackers) << 1)
    val selectTracker = Mux(matchTracker, matchTrackers, allocTracker)
    val trackerReady = Vec(trackers.map(_.in_a.ready)).asUInt
    in.a.ready := (!a_first || !probe_busy) && (selectTracker & trackerReady).orR()
    (trackers zip selectTracker.toBools) foreach { case (t, select) =>
      t.in_a.valid := in.a.valid && select && (!a_first || !probe_busy)
      t.in_a.bits := in.a.bits
      t.in_a_first := a_first
      t.probe := (if (caches.size == 0) UInt(0) else Mux(a_cache.orR(), UInt(caches.size-1), UInt(caches.size)))
    }
    when (in.a.fire() && a_first) {
      probe_todo  := ~a_cache // probe all but the cache who poked us
      probe_line  := in.a.bits.address >> lineShift
      probe_perms := MuxLookup(in.a.bits.opcode, Wire(UInt(width = 2)), Array(
        TLMessages.PutFullData    -> TLPermissions.toN,
        TLMessages.PutPartialData -> TLPermissions.toN,
        TLMessages.ArithmeticData -> TLPermissions.toN,
        TLMessages.LogicalData    -> TLPermissions.toN,
        TLMessages.Get            -> TLPermissions.toB,
        TLMessages.Hint           -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
          TLHints.PREFETCH_READ   -> TLPermissions.toB,
          TLHints.PREFETCH_WRITE  -> TLPermissions.toN)),
        TLMessages.Acquire        -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
          TLPermissions.NtoB      -> TLPermissions.toB,
          TLPermissions.NtoT      -> TLPermissions.toN,
          TLPermissions.BtoT      -> TLPermissions.toN))))
    }
    // The outer TL connections may not be cached
    out.b.ready := Bool(true)
    out.c.valid := Bool(false)
    out.e.valid := Bool(false)
  }
 }
--- a/src/main/scala/uncore/tilelink2/Buffer.scala
+++ b/src/main/scala/uncore/tilelink2/Buffer.scala
@ -18,8 +18,8 @@ class TLBuffer(a: Int = 2, b: Int = 2, c: Int = 2, d: Int = 2, e: Int = 2, pipe:
  require (e >= 0)
  val node = TLAdapterNode(
-    clientFn  = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,b) + min(1,c)) },
+    clientFn  = { p => p.copy(minLatency = p.minLatency + min(1,b) + min(1,c)) },
-    managerFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,a) + min(1,d)) })
+    managerFn = { p => p.copy(minLatency = p.minLatency + min(1,a) + min(1,d)) })
  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
--- a/src/main/scala/uncore/tilelink2/Bundles.scala
+++ b/src/main/scala/uncore/tilelink2/Bundles.scala
@ -210,11 +210,11 @@ final class DecoupledSnoop[+T <: Data](gen: T) extends Bundle
 object DecoupledSnoop
 {
-  def apply[T <: Data](i: DecoupledIO[T]) = {
+  def apply[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T]) = {
-    val out = Wire(new DecoupledSnoop(i.bits))
+    val out = Wire(new DecoupledSnoop(sink.bits))
-    out.ready := i.ready
+    out.ready := sink.ready
-    out.valid := i.valid
+    out.valid := source.valid
-    out.bits  := i.bits
+    out.bits  := source.bits
    out
  }
 }
@ -230,13 +230,13 @@ class TLBundleSnoop(params: TLBundleParameters) extends TLBundleBase(params)
 object TLBundleSnoop
 {
-  def apply(x: TLBundle) = {
+  def apply(source: TLBundle, sink: TLBundle) = {
-    val out = Wire(new TLBundleSnoop(x.params))
+    val out = Wire(new TLBundleSnoop(sink.params))
-    out.a <> DecoupledSnoop(x.a)
+    out.a := DecoupledSnoop(source.a, sink.a)
-    out.b <> DecoupledSnoop(x.b)
+    out.b := DecoupledSnoop(sink.b, source.b)
-    out.c <> DecoupledSnoop(x.c)
+    out.c := DecoupledSnoop(source.c, sink.c)
-    out.d <> DecoupledSnoop(x.d)
+    out.d := DecoupledSnoop(sink.d, source.d)
-    out.e <> DecoupledSnoop(x.e)
+    out.e := DecoupledSnoop(source.e, sink.e)
    out
  }
 }
--- a/src/main/scala/uncore/tilelink2/CacheCork.scala
+++ b/src/main/scala/uncore/tilelink2/CacheCork.scala
@ -12,10 +12,10 @@ import TLMessages._
 class TLCacheCork(unsafe: Boolean = false)(implicit p: Parameters) extends LazyModule
 {
  val node = TLAdapterNode(
-    clientFn  = { case Seq(cp) =>
+    clientFn  = { case cp =>
      cp.copy(clients = cp.clients.map { c => c.copy(
        sourceId = IdRange(c.sourceId.start*2, c.sourceId.end*2))})},
-    managerFn = { case Seq(mp) =>
+    managerFn = { case mp =>
      mp.copy(managers = mp.managers.map { m => m.copy(
        regionType         = if (m.regionType == RegionType.UNCACHED) RegionType.TRACKED else m.regionType,
        supportsAcquireB   = m.supportsGet,
@ -27,93 +27,89 @@ class TLCacheCork(unsafe: Boolean = false)(implicit p: Parameters) extends LazyM
      val out = node.bundleOut
    }
-    val edgeIn = node.edgesIn(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val edgeOut = node.edgesOut(0)
+      require (edgeIn.client.clients.size == 1 || unsafe, "Only one client can safely use a TLCacheCork")
      require (edgeIn.client.clients.filter(_.supportsProbe).size == 1, "Only one caching client allowed")
      edgeOut.manager.managers.foreach { case m =>
        require (!m.supportsAcquireB, "Cannot support caches beyond the Cork")
      }
-    require (edgeIn.client.clients.size == 1 || unsafe, "Only one client can safely use a TLCacheCork")
+      // The Cork turns [Acquire=>Get] => [AccessAckData=>GrantData]
-    require (edgeIn.client.clients.filter(_.supportsProbe).size == 1, "Only one caching client allowed")
+      //            and [ReleaseData=>PutFullData] => [AccessAck=>ReleaseAck]
-    edgeOut.manager.managers.foreach { case m =>
+      // We need to encode information sufficient to reverse the transformation in output.
-      require (!m.supportsAcquireB, "Cannot support caches beyond the Cork")
+      // A caveat is that we get Acquire+Release with the same source and must keep the
      // source unique after transformation onto the A channel.
      // The coding scheme is:
      //   Put: 1, Release: 0 => AccessAck
      //   *: 0, Acquire: 1 => AccessAckData
      // Take requests from A to A
      val isPut = in.a.bits.opcode === PutFullData || in.a.bits.opcode === PutPartialData
      val a_a = Wire(out.a)
      a_a <> in.a
      a_a.bits.source := in.a.bits.source << 1 | Mux(isPut, UInt(1), UInt(0))
      // Transform Acquire into Get
      when (in.a.bits.opcode === Acquire) {
        a_a.bits.opcode := Get
        a_a.bits.param  := UInt(0)
        a_a.bits.source := in.a.bits.source << 1 | UInt(1)
      }
      // Take ReleaseData from C to A; Release from C to D
      val c_a = Wire(out.a)
      c_a.valid := in.c.valid && in.c.bits.opcode === ReleaseData
      c_a.bits.opcode  := PutFullData
      c_a.bits.param   := UInt(0)
      c_a.bits.size    := in.c.bits.size
      c_a.bits.source  := in.c.bits.source << 1
      c_a.bits.address := in.c.bits.address
      c_a.bits.mask    := edgeOut.mask(in.c.bits.address, in.c.bits.size)
      c_a.bits.data    := in.c.bits.data
      val c_d = Wire(in.d)
      c_d.valid := in.c.valid && in.c.bits.opcode === Release
      c_d.bits.opcode  := ReleaseAck
      c_d.bits.param   := UInt(0)
      c_d.bits.size    := in.c.bits.size
      c_d.bits.source  := in.c.bits.source
      c_d.bits.sink    := UInt(0)
      c_d.bits.addr_lo := in.c.bits.address
      c_d.bits.data    := UInt(0)
      c_d.bits.error   := Bool(false)
      assert (!in.c.valid || in.c.bits.opcode === Release || in.c.bits.opcode === ReleaseData)
      in.c.ready := Mux(in.c.bits.opcode === Release, c_d.ready, c_a.ready)
      // Discard E
      in.e.ready := Bool(true)
      // Block B; should never happen
      out.b.ready := Bool(false)
      assert (!out.b.valid)
      // Take responses from D and transform them
      val d_d = Wire(in.d)
      d_d <> out.d
      d_d.bits.source := out.d.bits.source >> 1
      when (out.d.bits.opcode === AccessAckData && out.d.bits.source(0)) {
        d_d.bits.opcode := GrantData
        d_d.bits.param  := TLPermissions.toT
      }
      when (out.d.bits.opcode === AccessAck && !out.d.bits.source(0)) {
        d_d.bits.opcode := ReleaseAck
      }
      // Combine the sources of messages into the channels
      TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (edgeOut.numBeats1(c_a.bits), c_a), (edgeOut.numBeats1(a_a.bits), a_a))
      TLArbiter(TLArbiter.lowestIndexFirst)(in.d,  (edgeIn .numBeats1(d_d.bits), d_d), (UInt(0), Queue(c_d, 2)))
      // Tie off unused ports
      in.b.valid := Bool(false)
      out.c.valid := Bool(false)
      out.e.valid := Bool(false)
    }
    val out = io.out(0)
    val in = io.in(0)
    // The Cork turns [Acquire=>Get] => [AccessAckData=>GrantData]
    //            and [ReleaseData=>PutFullData] => [AccessAck=>ReleaseAck]
    // We need to encode information sufficient to reverse the transformation in output.
    // A caveat is that we get Acquire+Release with the same source and must keep the
    // source unique after transformation onto the A channel.
    // The coding scheme is:
    //   Put: 1, Release: 0 => AccessAck
    //   *: 0, Acquire: 1 => AccessAckData
    // Take requests from A to A
    val isPut = in.a.bits.opcode === PutFullData || in.a.bits.opcode === PutPartialData
    val a_a = Wire(out.a)
    a_a <> in.a
    a_a.bits.source := in.a.bits.source << 1 | Mux(isPut, UInt(1), UInt(0))
    // Transform Acquire into Get
    when (in.a.bits.opcode === Acquire) {
      a_a.bits.opcode := Get
      a_a.bits.param  := UInt(0)
      a_a.bits.source := in.a.bits.source << 1 | UInt(1)
    }
    // Take ReleaseData from C to A; Release from C to D
    val c_a = Wire(out.a)
    c_a.valid := in.c.valid && in.c.bits.opcode === ReleaseData
    c_a.bits.opcode  := PutFullData
    c_a.bits.param   := UInt(0)
    c_a.bits.size    := in.c.bits.size
    c_a.bits.source  := in.c.bits.source << 1
    c_a.bits.address := in.c.bits.address
    c_a.bits.mask    := edgeOut.mask(in.c.bits.address, in.c.bits.size)
    c_a.bits.data    := in.c.bits.data
    val c_d = Wire(in.d)
    c_d.valid := in.c.valid && in.c.bits.opcode === Release
    c_d.bits.opcode  := ReleaseAck
    c_d.bits.param   := UInt(0)
    c_d.bits.size    := in.c.bits.size
    c_d.bits.source  := in.c.bits.source
    c_d.bits.sink    := UInt(0)
    c_d.bits.addr_lo := in.c.bits.address
    c_d.bits.data    := UInt(0)
    c_d.bits.error   := Bool(false)
    assert (!in.c.valid || in.c.bits.opcode === Release || in.c.bits.opcode === ReleaseData)
    in.c.ready := Mux(in.c.bits.opcode === Release, c_d.ready, c_a.ready)
    // Discard E
    in.e.ready := Bool(true)
    // Block B; should never happen
    out.b.ready := Bool(false)
    assert (!out.b.valid)
    // Take responses from D and transform them
    val d_d = Wire(in.d)
    d_d <> out.d
    d_d.bits.source := out.d.bits.source >> 1
    when (out.d.bits.opcode === AccessAckData && out.d.bits.source(0)) {
      d_d.bits.opcode := GrantData
      d_d.bits.param  := TLPermissions.toT
    }
    when (out.d.bits.opcode === AccessAck && !out.d.bits.source(0)) {
      d_d.bits.opcode := ReleaseAck
    }
    // Combine the sources of messages into the channels
    TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (edgeOut.numBeats1(c_a.bits), c_a), (edgeOut.numBeats1(a_a.bits), a_a))
    TLArbiter(TLArbiter.lowestIndexFirst)(in.d,  (edgeIn .numBeats1(d_d.bits), d_d), (UInt(0), Queue(c_d, 2)))
    // Tie off unused ports
    in.b.valid := Bool(false)
    out.c.valid := Bool(false)
    out.e.valid := Bool(false)
  }
 }
--- a/src/main/scala/uncore/tilelink2/Filter.scala
+++ b/src/main/scala/uncore/tilelink2/Filter.scala
@ -11,8 +11,8 @@ import scala.math.{min,max}
 class TLFilter(select: AddressSet)(implicit p: Parameters) extends LazyModule
 {
  val node = TLAdapterNode(
-    clientFn  = { case Seq(cp) => cp },
+    clientFn  = { cp => cp },
-    managerFn = { case Seq(mp) =>
+    managerFn = { mp =>
      mp.copy(managers = mp.managers.map { m =>
        val filtered = m.address.map(_.intersect(select)).flatten
        val alignment = select.alignment /* alignment 0 means 'select' selected everything */
--- a/src/main/scala/uncore/tilelink2/Fragmenter.scala
+++ b/src/main/scala/uncore/tilelink2/Fragmenter.scala
@ -41,8 +41,8 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean =
    sourceId = IdRange(c.sourceId.start << fragmentBits, c.sourceId.end << fragmentBits))
  val node = TLAdapterNode(
-    clientFn  = { case Seq(c) => c.copy(clients = c.clients.map(mapClient)) },
+    clientFn  = { c => c.copy(clients = c.clients.map(mapClient)) },
-    managerFn = { case Seq(m) => m.copy(managers = m.managers.map(mapManager)) })
+    managerFn = { m => m.copy(managers = m.managers.map(mapManager)) })
  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
@ -50,204 +50,201 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean =
      val out = node.bundleOut
    }
-    // All managers must share a common FIFO domain (responses might end up interleaved)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val edgeOut   = node.edgesOut(0)
+      // All managers must share a common FIFO domain (responses might end up interleaved)
-    val edgeIn    = node.edgesIn(0)
+      val manager   = edgeOut.manager
-    val manager   = edgeOut.manager
+      val managers  = manager.managers
-    val managers  = manager.managers
+      val beatBytes = manager.beatBytes
-    val beatBytes = manager.beatBytes
+      val fifoId = managers(0).fifoId
-    val fifoId = managers(0).fifoId
+      require (fifoId.isDefined && managers.map(_.fifoId == fifoId).reduce(_ && _))
    require (fifoId.isDefined && managers.map(_.fifoId == fifoId).reduce(_ && _))
-    // We don't support fragmenting to sub-beat accesses
+      // We don't support fragmenting to sub-beat accesses
-    require (minSize >= beatBytes)
+      require (minSize >= beatBytes)
-    // We can't support devices which are cached on both sides of us
+      // We can't support devices which are cached on both sides of us
-    require (!edgeOut.manager.anySupportAcquireB || !edgeIn.client.anySupportProbe)
+      require (!edgeOut.manager.anySupportAcquireB || !edgeIn.client.anySupportProbe)
-    /* The Fragmenter is a bit tricky, because there are 5 sizes in play:
+      /* The Fragmenter is a bit tricky, because there are 5 sizes in play:
-     *   max  size -- the maximum transfer size possible
+       *   max  size -- the maximum transfer size possible
-     *   orig size -- the original pre-fragmenter size
+       *   orig size -- the original pre-fragmenter size
-     *   frag size -- the modified post-fragmenter size
+       *   frag size -- the modified post-fragmenter size
-     *   min  size -- the threshold below which frag=orig
+       *   min  size -- the threshold below which frag=orig
-     *   beat size -- the amount transfered on any given beat
+       *   beat size -- the amount transfered on any given beat
-     *
+       *
-     * The relationships are as follows:
+       * The relationships are as follows:
-     *   max >= orig >= frag
+       *   max >= orig >= frag
-     *   max >  min  >= beat
+       *   max >  min  >= beat
-     * It IS possible that orig <= min (then frag=orig; ie: no fragmentation)
+       * It IS possible that orig <= min (then frag=orig; ie: no fragmentation)
-     *
+       *
-     * The fragment# (sent via TL.source) is measured in multiples of min size.
+       * The fragment# (sent via TL.source) is measured in multiples of min size.
-     * Meanwhile, to track the progress, counters measure in multiples of beat size.
+       * Meanwhile, to track the progress, counters measure in multiples of beat size.
-     *
+       *
-     * Here is an example of a bus with max=256, min=8, beat=4 and a device supporting 16.
+       * Here is an example of a bus with max=256, min=8, beat=4 and a device supporting 16.
-     *
+       *
-     * in.A    out.A (frag#)  out.D (frag#)  in.D     gen# ack#
+       * in.A    out.A (frag#)  out.D (frag#)  in.D     gen# ack#
-     * get64   get16  6       ackD16  6      ackD64    12   15
+       * get64   get16  6       ackD16  6      ackD64    12   15
-     *                        ackD16  6      ackD64         14
+       *                        ackD16  6      ackD64         14
-     *                        ackD16  6      ackD64         13
+       *                        ackD16  6      ackD64         13
-     *                        ackD16  6      ackD64         12
+       *                        ackD16  6      ackD64         12
-     *         get16  4       ackD16  4      ackD64    8    11
+       *         get16  4       ackD16  4      ackD64    8    11
-     *                        ackD16  4      ackD64         10
+       *                        ackD16  4      ackD64         10
-     *                        ackD16  4      ackD64         9
+       *                        ackD16  4      ackD64         9
-     *                        ackD16  4      ackD64         8
+       *                        ackD16  4      ackD64         8
-     *         get16  2       ackD16  2      ackD64    4    7
+       *         get16  2       ackD16  2      ackD64    4    7
-     *                        ackD16  2      ackD64         6
+       *                        ackD16  2      ackD64         6
-     *                        ackD16  2      ackD64         5
+       *                        ackD16  2      ackD64         5
-     *                        ackD16  2      ackD64         4
+       *                        ackD16  2      ackD64         4
-     *         get16  0       ackD16  0      ackD64    0    3
+       *         get16  0       ackD16  0      ackD64    0    3
-     *                        ackD16  0      ackD64         2
+       *                        ackD16  0      ackD64         2
-     *                        ackD16  0      ackD64         1
+       *                        ackD16  0      ackD64         1
-     *                        ackD16  0      ackD64         0
+       *                        ackD16  0      ackD64         0
-     *
+       *
-     * get8    get8   0       ackD8   0      ackD8     0    1
+       * get8    get8   0       ackD8   0      ackD8     0    1
-     *                        ackD8   0      ackD8          0
+       *                        ackD8   0      ackD8          0
-     *
+       *
-     * get4    get4   0       ackD4   0      ackD4     0    0
+       * get4    get4   0       ackD4   0      ackD4     0    0
-     * get1    get1   0       ackD1   0      ackD1     0    0
+       * get1    get1   0       ackD1   0      ackD1     0    0
-     *
+       *
-     * put64   put16  6                                15   
+       * put64   put16  6                                15   
-     * put64   put16  6                                14
+       * put64   put16  6                                14
-     * put64   put16  6                                13
+       * put64   put16  6                                13
-     * put64   put16  6       ack16   6                12    12
+       * put64   put16  6       ack16   6                12    12
-     * put64   put16  4                                11
+       * put64   put16  4                                11
-     * put64   put16  4                                10
+       * put64   put16  4                                10
-     * put64   put16  4                                9
+       * put64   put16  4                                9
-     * put64   put16  4       ack16   4                8     8
+       * put64   put16  4       ack16   4                8     8
-     * put64   put16  2                                7
+       * put64   put16  2                                7
-     * put64   put16  2                                6
+       * put64   put16  2                                6
-     * put64   put16  2                                5
+       * put64   put16  2                                5
-     * put64   put16  2       ack16   2                4     4
+       * put64   put16  2       ack16   2                4     4
-     * put64   put16  0                                3
+       * put64   put16  0                                3
-     * put64   put16  0                                2
+       * put64   put16  0                                2
-     * put64   put16  0                                1
+       * put64   put16  0                                1
-     * put64   put16  0       ack16   0      ack64     0     0
+       * put64   put16  0       ack16   0      ack64     0     0
-     *
+       *
-     * put8    put8   0                                1
+       * put8    put8   0                                1
-     * put8    put8   0       ack8    0      ack8      0     0
+       * put8    put8   0       ack8    0      ack8      0     0
-     *
+       *
-     * put4    put4   0       ack4    0      ack4      0     0
+       * put4    put4   0       ack4    0      ack4      0     0
-     * put1    put1   0       ack1    0      ack1      0     0
+       * put1    put1   0       ack1    0      ack1      0     0
-     */
+       */
-    val in = io.in(0)
+      val counterBits = log2Up(maxSize/beatBytes)
-    val out = io.out(0)
+      val maxDownSize = if (alwaysMin) minSize else manager.maxTransfer
-    val counterBits = log2Up(maxSize/beatBytes)
+      // First, handle the return path
-    val maxDownSize = if (alwaysMin) minSize else manager.maxTransfer
+      val acknum = RegInit(UInt(0, width = counterBits))
      val dOrig = Reg(UInt())
      val dFragnum = out.d.bits.source(fragmentBits-1, 0)
      val dFirst = acknum === UInt(0)
      val dsizeOH  = UIntToOH (out.d.bits.size, log2Ceil(maxDownSize)+1)
      val dsizeOH1 = UIntToOH1(out.d.bits.size, log2Up(maxDownSize))
      val dHasData = edgeOut.hasData(out.d.bits)
-    // First, handle the return path
+      // calculate new acknum
-    val acknum = RegInit(UInt(0, width = counterBits))
+      val acknum_fragment = dFragnum << log2Ceil(minSize/beatBytes)
-    val dOrig = Reg(UInt())
+      val acknum_size = dsizeOH1 >> log2Ceil(beatBytes)
-    val dFragnum = out.d.bits.source(fragmentBits-1, 0)
+      assert (!out.d.valid || (acknum_fragment & acknum_size) === UInt(0))
-    val dFirst = acknum === UInt(0)
+      val dFirst_acknum = acknum_fragment | Mux(dHasData, acknum_size, UInt(0))
-    val dsizeOH  = UIntToOH (out.d.bits.size, log2Ceil(maxDownSize)+1)
+      val ack_decrement = Mux(dHasData, UInt(1), dsizeOH >> log2Ceil(beatBytes))
-    val dsizeOH1 = UIntToOH1(out.d.bits.size, log2Up(maxDownSize))
+      // calculate the original size
-    val dHasData = edgeOut.hasData(out.d.bits)
+      val dFirst_size = OH1ToUInt((dFragnum << log2Ceil(minSize)) | dsizeOH1)
-    // calculate new acknum
+      when (out.d.fire()) {
-    val acknum_fragment = dFragnum << log2Ceil(minSize/beatBytes)
+        acknum := Mux(dFirst, dFirst_acknum, acknum - ack_decrement)
-    val acknum_size = dsizeOH1 >> log2Ceil(beatBytes)
+        when (dFirst) { dOrig := dFirst_size }
-    assert (!out.d.valid || (acknum_fragment & acknum_size) === UInt(0))
+      }
    val dFirst_acknum = acknum_fragment | Mux(dHasData, acknum_size, UInt(0))
    val ack_decrement = Mux(dHasData, UInt(1), dsizeOH >> log2Ceil(beatBytes))
    // calculate the original size
    val dFirst_size = OH1ToUInt((dFragnum << log2Ceil(minSize)) | dsizeOH1)
-    when (out.d.fire()) {
+      // Swallow up non-data ack fragments
-      acknum := Mux(dFirst, dFirst_acknum, acknum - ack_decrement)
+      val drop = !dHasData && (dFragnum =/= UInt(0))
-      when (dFirst) { dOrig := dFirst_size }
+      out.d.ready := in.d.ready || drop
      in.d.valid  := out.d.valid && !drop
      in.d.bits   := out.d.bits // pass most stuff unchanged
      in.d.bits.addr_lo := out.d.bits.addr_lo & ~dsizeOH1
      in.d.bits.source := out.d.bits.source >> fragmentBits
      in.d.bits.size   := Mux(dFirst, dFirst_size, dOrig)
      // Combine the error flag
      val r_error = RegInit(Bool(false))
      val d_error = r_error | out.d.bits.error
      when (out.d.fire()) { r_error := Mux(drop, d_error, UInt(0)) }
      in.d.bits.error := d_error
      // What maximum transfer sizes do downstream devices support?
      val maxArithmetics = managers.map(_.supportsArithmetic.max)
      val maxLogicals    = managers.map(_.supportsLogical.max)
      val maxGets        = managers.map(_.supportsGet.max)
      val maxPutFulls    = managers.map(_.supportsPutFull.max)
      val maxPutPartials = managers.map(_.supportsPutPartial.max)
      val maxHints       = managers.map(m => if (m.supportsHint) maxDownSize else 0)
      // We assume that the request is valid => size 0 is impossible
      val lgMinSize = UInt(log2Ceil(minSize))
      val maxLgArithmetics = maxArithmetics.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
      val maxLgLogicals    = maxLogicals   .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
      val maxLgGets        = maxGets       .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
      val maxLgPutFulls    = maxPutFulls   .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
      val maxLgPutPartials = maxPutPartials.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
      val maxLgHints       = maxHints      .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
      // Make the request repeatable
      val repeater = Module(new Repeater(in.a.bits))
      repeater.io.enq <> in.a
      val in_a = repeater.io.deq
      // If this is infront of a single manager, these become constants
      val find = manager.findFast(edgeIn.address(in_a.bits))
      val maxLgArithmetic  = Mux1H(find, maxLgArithmetics)
      val maxLgLogical     = Mux1H(find, maxLgLogicals)
      val maxLgGet         = Mux1H(find, maxLgGets)
      val maxLgPutFull     = Mux1H(find, maxLgPutFulls)
      val maxLgPutPartial  = Mux1H(find, maxLgPutPartials)
      val maxLgHint        = Mux1H(find, maxLgHints)
      val limit = if (alwaysMin) lgMinSize else 
        MuxLookup(in_a.bits.opcode, lgMinSize, Array(
          TLMessages.PutFullData    -> maxLgPutFull,
          TLMessages.PutPartialData -> maxLgPutPartial,
          TLMessages.ArithmeticData -> maxLgArithmetic,
          TLMessages.LogicalData    -> maxLgLogical,
          TLMessages.Get            -> maxLgGet,
          TLMessages.Hint           -> maxLgHint))
      val aOrig = in_a.bits.size
      val aFrag = Mux(aOrig > limit, limit, aOrig)
      val aOrigOH1 = UIntToOH1(aOrig, log2Ceil(maxSize))
      val aFragOH1 = UIntToOH1(aFrag, log2Up(maxDownSize))
      val aHasData = node.edgesIn(0).hasData(in_a.bits)
      val aMask = Mux(aHasData, UInt(0), aFragOH1)
      val gennum = RegInit(UInt(0, width = counterBits))
      val aFirst = gennum === UInt(0)
      val old_gennum1 = Mux(aFirst, aOrigOH1 >> log2Ceil(beatBytes), gennum - UInt(1))
      val new_gennum = ~(~old_gennum1 | (aMask >> log2Ceil(beatBytes))) // ~(~x|y) is width safe
      val aFragnum = ~(~(old_gennum1 >> log2Ceil(minSize/beatBytes)) | (aFragOH1 >> log2Ceil(minSize)))
      when (out.a.fire()) { gennum := new_gennum }
      repeater.io.repeat := !aHasData && aFragnum =/= UInt(0)
      out.a <> in_a
      out.a.bits.address := in_a.bits.address | (~aFragnum << log2Ceil(minSize) & aOrigOH1)
      out.a.bits.source := Cat(in_a.bits.source, aFragnum)
      out.a.bits.size := aFrag
      // Optimize away some of the Repeater's registers
      assert (!repeater.io.full || !aHasData)
      out.a.bits.data := in.a.bits.data
      val fullMask = UInt((BigInt(1) << beatBytes) - 1)
      assert (!repeater.io.full || in_a.bits.mask === fullMask)
      out.a.bits.mask := Mux(repeater.io.full, fullMask, in.a.bits.mask)
      // Tie off unused channels
      in.b.valid := Bool(false)
      in.c.ready := Bool(true)
      in.e.ready := Bool(true)
      out.b.ready := Bool(true)
      out.c.valid := Bool(false)
      out.e.valid := Bool(false)
    }
    // Swallow up non-data ack fragments
    val drop = !dHasData && (dFragnum =/= UInt(0))
    out.d.ready := in.d.ready || drop
    in.d.valid  := out.d.valid && !drop
    in.d.bits   := out.d.bits // pass most stuff unchanged
    in.d.bits.addr_lo := out.d.bits.addr_lo & ~dsizeOH1
    in.d.bits.source := out.d.bits.source >> fragmentBits
    in.d.bits.size   := Mux(dFirst, dFirst_size, dOrig)
    // Combine the error flag
    val r_error = RegInit(Bool(false))
    val d_error = r_error | out.d.bits.error
    when (out.d.fire()) { r_error := Mux(drop, d_error, UInt(0)) }
    in.d.bits.error := d_error
    // What maximum transfer sizes do downstream devices support?
    val maxArithmetics = managers.map(_.supportsArithmetic.max)
    val maxLogicals    = managers.map(_.supportsLogical.max)
    val maxGets        = managers.map(_.supportsGet.max)
    val maxPutFulls    = managers.map(_.supportsPutFull.max)
    val maxPutPartials = managers.map(_.supportsPutPartial.max)
    val maxHints       = managers.map(m => if (m.supportsHint) maxDownSize else 0)
    // We assume that the request is valid => size 0 is impossible
    val lgMinSize = UInt(log2Ceil(minSize))
    val maxLgArithmetics = maxArithmetics.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
    val maxLgLogicals    = maxLogicals   .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
    val maxLgGets        = maxGets       .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
    val maxLgPutFulls    = maxPutFulls   .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
    val maxLgPutPartials = maxPutPartials.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
    val maxLgHints       = maxHints      .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
    // Make the request repeatable
    val repeater = Module(new Repeater(in.a.bits))
    repeater.io.enq <> in.a
    val in_a = repeater.io.deq
    // If this is infront of a single manager, these become constants
    val find = manager.findFast(edgeIn.address(in_a.bits))
    val maxLgArithmetic  = Mux1H(find, maxLgArithmetics)
    val maxLgLogical     = Mux1H(find, maxLgLogicals)
    val maxLgGet         = Mux1H(find, maxLgGets)
    val maxLgPutFull     = Mux1H(find, maxLgPutFulls)
    val maxLgPutPartial  = Mux1H(find, maxLgPutPartials)
    val maxLgHint        = Mux1H(find, maxLgHints)
    val limit = if (alwaysMin) lgMinSize else 
      MuxLookup(in_a.bits.opcode, lgMinSize, Array(
        TLMessages.PutFullData    -> maxLgPutFull,
        TLMessages.PutPartialData -> maxLgPutPartial,
        TLMessages.ArithmeticData -> maxLgArithmetic,
        TLMessages.LogicalData    -> maxLgLogical,
        TLMessages.Get            -> maxLgGet,
        TLMessages.Hint           -> maxLgHint))
    val aOrig = in_a.bits.size
    val aFrag = Mux(aOrig > limit, limit, aOrig)
    val aOrigOH1 = UIntToOH1(aOrig, log2Ceil(maxSize))
    val aFragOH1 = UIntToOH1(aFrag, log2Up(maxDownSize))
    val aHasData = node.edgesIn(0).hasData(in_a.bits)
    val aMask = Mux(aHasData, UInt(0), aFragOH1)
    val gennum = RegInit(UInt(0, width = counterBits))
    val aFirst = gennum === UInt(0)
    val old_gennum1 = Mux(aFirst, aOrigOH1 >> log2Ceil(beatBytes), gennum - UInt(1))
    val new_gennum = ~(~old_gennum1 | (aMask >> log2Ceil(beatBytes))) // ~(~x|y) is width safe
    val aFragnum = ~(~(old_gennum1 >> log2Ceil(minSize/beatBytes)) | (aFragOH1 >> log2Ceil(minSize)))
    when (out.a.fire()) { gennum := new_gennum }
    repeater.io.repeat := !aHasData && aFragnum =/= UInt(0)
    out.a <> in_a
    out.a.bits.address := in_a.bits.address | (~aFragnum << log2Ceil(minSize) & aOrigOH1)
    out.a.bits.source := Cat(in_a.bits.source, aFragnum)
    out.a.bits.size := aFrag
    // Optimize away some of the Repeater's registers
    assert (!repeater.io.full || !aHasData)
    out.a.bits.data := in.a.bits.data
    val fullMask = UInt((BigInt(1) << beatBytes) - 1)
    assert (!repeater.io.full || in_a.bits.mask === fullMask)
    out.a.bits.mask := Mux(repeater.io.full, fullMask, in.a.bits.mask)
    // Tie off unused channels
    in.b.valid := Bool(false)
    in.c.ready := Bool(true)
    in.e.ready := Bool(true)
    out.b.ready := Bool(true)
    out.c.valid := Bool(false)
    out.e.valid := Bool(false)
  }
 }
--- a/src/main/scala/uncore/tilelink2/HintHandler.scala
+++ b/src/main/scala/uncore/tilelink2/HintHandler.scala
@ -12,8 +12,8 @@ import diplomacy._
 class TLHintHandler(supportManagers: Boolean = true, supportClients: Boolean = false, passthrough: Boolean = true)(implicit p: Parameters) extends LazyModule
 {
  val node = TLAdapterNode(
-    clientFn  = { case Seq(c) => if (!supportClients)  c else c.copy(minLatency = min(1, c.minLatency), clients  = c.clients .map(_.copy(supportsHint = TransferSizes(1, c.maxTransfer)))) },
+    clientFn  = { c => if (!supportClients)  c else c.copy(minLatency = min(1, c.minLatency), clients  = c.clients .map(_.copy(supportsHint = TransferSizes(1, c.maxTransfer)))) },
-    managerFn = { case Seq(m) => if (!supportManagers) m else m.copy(minLatency = min(1, m.minLatency), managers = m.managers.map(_.copy(supportsHint = TransferSizes(1, m.maxTransfer)))) })
+    managerFn = { m => if (!supportManagers) m else m.copy(minLatency = min(1, m.minLatency), managers = m.managers.map(_.copy(supportsHint = TransferSizes(1, m.maxTransfer)))) })
  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
@ -21,79 +21,76 @@ class TLHintHandler(supportManagers: Boolean = true, supportClients: Boolean = f
      val out = node.bundleOut
    }
-    val in  = io.in(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val out = io.out(0)
+      // Don't add support for clients if there is no BCE channel
-    val edgeIn  = node.edgesIn(0)
+      val bce = edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe
-    val edgeOut = node.edgesOut(0)
+      require (!supportClients || bce)
-    // Don't add support for clients if there is no BCE channel
+      // Does it even make sense to add the HintHandler?
-    val bce = edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe
+      val smartClients = edgeIn.client.clients.map(_.supportsHint.max == edgeIn.client.maxTransfer).reduce(_&&_)
-    require (!supportClients || bce)
+      val smartManagers = edgeOut.manager.managers.map(_.supportsHint.max == edgeOut.manager.maxTransfer).reduce(_&&_)
-    // Does it even make sense to add the HintHandler?
+      if (supportManagers && !(passthrough && smartManagers)) {
-    val smartClients = edgeIn.client.clients.map(_.supportsHint.max == edgeIn.client.maxTransfer).reduce(_&&_)
+        val address = edgeIn.address(in.a.bits)
-    val smartManagers = edgeOut.manager.managers.map(_.supportsHint.max == edgeOut.manager.maxTransfer).reduce(_&&_)
+        val handleA = if (passthrough) !edgeOut.manager.supportsHintFast(address, edgeIn.size(in.a.bits)) else Bool(true)
        val hintBitsAtA = handleA && in.a.bits.opcode === TLMessages.Hint
        val hint = Wire(out.d)
-    if (supportManagers && !(passthrough && smartManagers)) {
+        hint.valid  := in.a.valid &&  hintBitsAtA
-      val address = edgeIn.address(in.a.bits)
+        out.a.valid := in.a.valid && !hintBitsAtA
-      val handleA = if (passthrough) !edgeOut.manager.supportsHintFast(address, edgeIn.size(in.a.bits)) else Bool(true)
+        in.a.ready := Mux(hintBitsAtA, hint.ready, out.a.ready)
      val hintBitsAtA = handleA && in.a.bits.opcode === TLMessages.Hint
      val hint = Wire(out.d)
-      hint.valid  := in.a.valid &&  hintBitsAtA
+        hint.bits := edgeIn.HintAck(in.a.bits, UInt(0))
-      out.a.valid := in.a.valid && !hintBitsAtA
+        out.a.bits := in.a.bits
      in.a.ready := Mux(hintBitsAtA, hint.ready, out.a.ready)
-      hint.bits := edgeIn.HintAck(in.a.bits, UInt(0))
+        TLArbiter(TLArbiter.lowestIndexFirst)(in.d, (edgeOut.numBeats1(out.d.bits), out.d), (UInt(0), Queue(hint, 1)))
-      out.a.bits := in.a.bits
+      } else {
        out.a.valid := in.a.valid
        in.a.ready := out.a.ready
        out.a.bits := in.a.bits
-      TLArbiter(TLArbiter.lowestIndexFirst)(in.d, (edgeOut.numBeats1(out.d.bits), out.d), (UInt(0), Queue(hint, 1)))
+        in.d.valid := out.d.valid
-    } else {
+        out.d.ready := in.d.ready
-      out.a.valid := in.a.valid
+        in.d.bits := out.d.bits
-      in.a.ready := out.a.ready
+      }
      out.a.bits := in.a.bits
-      in.d.valid := out.d.valid
+      if (supportClients && !(passthrough && smartClients)) {
-      out.d.ready := in.d.ready
+        val handleB = if (passthrough) !edgeIn.client.supportsHint(out.b.bits.source, edgeOut.size(out.b.bits)) else Bool(true)
-      in.d.bits := out.d.bits
+        val hintBitsAtB = handleB && out.b.bits.opcode === TLMessages.Hint
-    }
+        val hint = Wire(in.c)
-    if (supportClients && !(passthrough && smartClients)) {
+        hint.valid := out.b.valid &&  hintBitsAtB
-      val handleB = if (passthrough) !edgeIn.client.supportsHint(out.b.bits.source, edgeOut.size(out.b.bits)) else Bool(true)
+        in.b.valid := out.b.valid && !hintBitsAtB
-      val hintBitsAtB = handleB && out.b.bits.opcode === TLMessages.Hint
+        out.b.ready := Mux(hintBitsAtB, hint.ready, in.b.ready)
      val hint = Wire(in.c)
-      hint.valid := out.b.valid &&  hintBitsAtB
+        hint.bits := edgeOut.HintAck(out.b.bits)
-      in.b.valid := out.b.valid && !hintBitsAtB
+        in.b.bits := out.b.bits
      out.b.ready := Mux(hintBitsAtB, hint.ready, in.b.ready)
-      hint.bits := edgeOut.HintAck(out.b.bits)
+        TLArbiter(TLArbiter.lowestIndexFirst)(out.c, (edgeIn.numBeats1(in.c.bits), in.c), (UInt(0), Queue(hint, 1)))
-      in.b.bits := out.b.bits
+      } else if (bce) {
        in.b.valid := out.b.valid
        out.b.ready := in.b.ready
        in.b.bits := out.b.bits
-      TLArbiter(TLArbiter.lowestIndexFirst)(out.c, (edgeIn.numBeats1(in.c.bits), in.c), (UInt(0), Queue(hint, 1)))
+        out.c.valid := in.c.valid
-    } else if (bce) {
+        in.c.ready := out.c.ready
-      in.b.valid := out.b.valid
+        out.c.bits := in.c.bits
-      out.b.ready := in.b.ready
+      } else {
-      in.b.bits := out.b.bits
+        in.b.valid := Bool(false)
        in.c.ready := Bool(true)
        out.b.ready := Bool(true)
        out.c.valid := Bool(false)
      }
-      out.c.valid := in.c.valid
+      if (bce) {
-      in.c.ready := out.c.ready
+        // Pass E through unchanged
-      out.c.bits := in.c.bits
+        out.e.valid := in.e.valid
-    } else {
+        in.e.ready := out.e.ready
-      in.b.valid := Bool(false)
+        out.e.bits := in.e.bits
-      in.c.ready := Bool(true)
+      } else {
-      out.b.ready := Bool(true)
+        in.e.ready := Bool(true)
-      out.c.valid := Bool(false)
+        out.e.valid := Bool(false)
-    }
+      }
    if (bce) {
      // Pass E through unchanged
      out.e.valid := in.e.valid
      in.e.ready := out.e.ready
      out.e.bits := in.e.bits
    } else {
      in.e.ready := Bool(true)
      out.e.valid := Bool(false)
    }
  }
 }
--- a/src/main/scala/uncore/tilelink2/IntNodes.scala
+++ b/src/main/scala/uncore/tilelink2/IntNodes.scala
@ -81,16 +81,16 @@ object IntImp extends NodeImp[IntSourcePortParameters, IntSinkPortParameters, In
 case class IntIdentityNode() extends IdentityNode(IntImp)
 case class IntSourceNode(num: Int) extends SourceNode(IntImp)(
-  IntSourcePortParameters(Seq(IntSourceParameters(num))), (if (num == 0) 0 else 1) to 1)
+  if (num == 0) Seq() else Seq(IntSourcePortParameters(Seq(IntSourceParameters(num)))))
 case class IntSinkNode() extends SinkNode(IntImp)(
-  IntSinkPortParameters(Seq(IntSinkParameters())))
+  Seq(IntSinkPortParameters(Seq(IntSinkParameters()))))
-case class IntAdapterNode(
+case class IntNexusNode(
  sourceFn:       Seq[IntSourcePortParameters] => IntSourcePortParameters,
  sinkFn:         Seq[IntSinkPortParameters]   => IntSinkPortParameters,
-  numSourcePorts: Range.Inclusive = 1 to 1,
+  numSourcePorts: Range.Inclusive = 0 to 128,
-  numSinkPorts:   Range.Inclusive = 1 to 1)
+  numSinkPorts:   Range.Inclusive = 0 to 128)
-  extends InteriorNode(IntImp)(sourceFn, sinkFn, numSourcePorts, numSinkPorts)
+  extends NexusNode(IntImp)(sourceFn, sinkFn, numSourcePorts, numSinkPorts)
 case class IntOutputNode() extends OutputNode(IntImp)
 case class IntInputNode() extends InputNode(IntImp)
@ -103,9 +103,7 @@ case class IntInternalInputNode(num: Int) extends InternalInputNode(IntImp)(Seq(
 class IntXbar()(implicit p: Parameters) extends LazyModule
 {
-  val intnode = IntAdapterNode(
+  val intnode = IntNexusNode(
    numSourcePorts = 0 to 128,
    numSinkPorts   = 0 to 128,
    sinkFn         = { _ => IntSinkPortParameters(Seq(IntSinkParameters())) },
    sourceFn       = { seq =>
      IntSourcePortParameters((seq zip seq.map(_.num).scanLeft(0)(_+_).init).map {
--- a/src/main/scala/uncore/tilelink2/Monitor.scala
+++ b/src/main/scala/uncore/tilelink2/Monitor.scala
@ -7,7 +7,7 @@ import chisel3.internal.sourceinfo.{SourceInfo, SourceLine}
 import config._
 import diplomacy._
-case class TLMonitorArgs(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: SourceInfo, p: Parameters)
+case class TLMonitorArgs(edge: () => Seq[TLEdge], sourceInfo: SourceInfo, p: Parameters)
 abstract class TLMonitorBase(args: TLMonitorArgs) extends LazyModule()(args.p)
 {
@ -16,11 +16,12 @@ abstract class TLMonitorBase(args: TLMonitorArgs) extends LazyModule()(args.p)
  def legalize(bundle: TLBundleSnoop, edge: TLEdge, reset: Bool): Unit
  lazy val module = new LazyModuleImp(this) {
    val edges = args.edge()
    val io = new Bundle {
-      val in = args.gen().asInput
+      val in = Vec(edges.size, new TLBundleSnoop(TLBundleParameters.union(edges.map(_.bundle)))).flip
    }
-    legalize(io.in, args.edge(), reset)
+    (edges zip io.in).foreach { case (e, in) => legalize(in, e, reset) }
  }
 }
--- a/src/main/scala/uncore/tilelink2/Nodes.scala
+++ b/src/main/scala/uncore/tilelink2/Nodes.scala
@ -24,44 +24,47 @@ object TLImp extends NodeImp[TLClientPortParameters, TLManagerPortParameters, TL
  override def labelI(ei: TLEdgeIn)  = (ei.manager.beatBytes * 8).toString
  override def labelO(eo: TLEdgeOut) = (eo.manager.beatBytes * 8).toString
-  def connect(bo: => TLBundle, bi: => TLBundle, ei: => TLEdgeIn)(implicit p: Parameters, sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) = {
+  override def connect(bindings: () => Seq[(TLEdgeIn, TLBundle, TLBundle)])(implicit p: Parameters, sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) = {
-    val monitor = p(TLMonitorBuilder)(TLMonitorArgs(() => new TLBundleSnoop(bo.params), () => ei, sourceInfo, p))
+    val monitor = p(TLMonitorBuilder)(TLMonitorArgs(() => bindings().map(_._1), sourceInfo, p))
    (monitor, () => {
-      bi <> bo
+      val eval = bindings ()
-      monitor.foreach { _.module.io.in := TLBundleSnoop(bo) }
+      monitor.foreach { m => (eval zip m.module.io.in) foreach { case ((_,i,o), m) => m := TLBundleSnoop(o,i) } }
-      if (p(TLCombinationalCheck)) {
+      eval.foreach { case (_, bi, bo) =>
-        // It is forbidden for valid to depend on ready in TL2
+        bi <> bo
-        // If someone did that, then this will create a detectable combinational loop
+        if (p(TLCombinationalCheck)) {
-        bo.a.ready := bi.a.ready && bo.a.valid
+          // It is forbidden for valid to depend on ready in TL2
-        bi.b.ready := bo.b.ready && bi.b.valid
+          // If someone did that, then this will create a detectable combinational loop
-        bo.c.ready := bi.c.ready && bo.c.valid
+          bo.a.ready := bi.a.ready && bo.a.valid
-        bi.d.ready := bo.d.ready && bi.d.valid
+          bi.b.ready := bo.b.ready && bi.b.valid
-        bo.e.ready := bi.e.ready && bo.e.valid
+          bo.c.ready := bi.c.ready && bo.c.valid
-      }
+          bi.d.ready := bo.d.ready && bi.d.valid
-      if (p(TLCombinationalCheck)) {
+          bo.e.ready := bi.e.ready && bo.e.valid
-        // Randomly stall the transfers
+        }
-        val allow = LFSRNoiseMaker(5)
+        if (p(TLCombinationalCheck)) {
-        bi.a.valid := bo.a.valid && allow(0)
+          // Randomly stall the transfers
-        bo.a.ready := bi.a.ready && allow(0)
+          val allow = LFSRNoiseMaker(5)
-        bo.b.valid := bi.b.valid && allow(1)
+          bi.a.valid := bo.a.valid && allow(0)
-        bi.b.ready := bo.b.ready && allow(1)
+          bo.a.ready := bi.a.ready && allow(0)
-        bi.c.valid := bo.c.valid && allow(2)
+          bo.b.valid := bi.b.valid && allow(1)
-        bo.c.ready := bi.c.ready && allow(2)
+          bi.b.ready := bo.b.ready && allow(1)
-        bo.d.valid := bi.d.valid && allow(3)
+          bi.c.valid := bo.c.valid && allow(2)
-        bi.d.ready := bo.d.ready && allow(3)
+          bo.c.ready := bi.c.ready && allow(2)
-        bi.e.valid := bo.e.valid && allow(4)
+          bo.d.valid := bi.d.valid && allow(3)
-        bo.e.ready := bi.e.ready && allow(4)
+          bi.d.ready := bo.d.ready && allow(3)
-        // Inject garbage whenever not valid
+          bi.e.valid := bo.e.valid && allow(4)
-        val bits_a = bo.a.bits.fromBits(LFSRNoiseMaker(bo.a.bits.asUInt.getWidth))
+          bo.e.ready := bi.e.ready && allow(4)
-        val bits_b = bi.b.bits.fromBits(LFSRNoiseMaker(bi.b.bits.asUInt.getWidth))
+          // Inject garbage whenever not valid
-        val bits_c = bo.c.bits.fromBits(LFSRNoiseMaker(bo.c.bits.asUInt.getWidth))
+          val bits_a = bo.a.bits.fromBits(LFSRNoiseMaker(bo.a.bits.asUInt.getWidth))
-        val bits_d = bi.d.bits.fromBits(LFSRNoiseMaker(bi.d.bits.asUInt.getWidth))
+          val bits_b = bi.b.bits.fromBits(LFSRNoiseMaker(bi.b.bits.asUInt.getWidth))
-        val bits_e = bo.e.bits.fromBits(LFSRNoiseMaker(bo.e.bits.asUInt.getWidth))
+          val bits_c = bo.c.bits.fromBits(LFSRNoiseMaker(bo.c.bits.asUInt.getWidth))
-        when (!bi.a.valid) { bi.a.bits := bits_a }
+          val bits_d = bi.d.bits.fromBits(LFSRNoiseMaker(bi.d.bits.asUInt.getWidth))
-        when (!bo.b.valid) { bo.b.bits := bits_b }
+          val bits_e = bo.e.bits.fromBits(LFSRNoiseMaker(bo.e.bits.asUInt.getWidth))
-        when (!bi.c.valid) { bi.c.bits := bits_c }
+          when (!bi.a.valid) { bi.a.bits := bits_a }
-        when (!bo.d.valid) { bo.d.bits := bits_d }
+          when (!bo.b.valid) { bo.b.bits := bits_b }
-        when (!bi.e.valid) { bi.e.bits := bits_e }
+          when (!bi.c.valid) { bi.c.bits := bits_c }
          when (!bo.d.valid) { bo.d.bits := bits_d }
          when (!bi.e.valid) { bi.e.bits := bits_e }
        }
      }
    })
  }
@ -86,29 +89,33 @@ object TLImp extends NodeImp[TLClientPortParameters, TLManagerPortParameters, TL
 // Nodes implemented inside modules
 case class TLIdentityNode() extends IdentityNode(TLImp)
-case class TLClientNode(portParams: TLClientPortParameters, numPorts: Range.Inclusive = 1 to 1)
+case class TLClientNode(portParams: Seq[TLClientPortParameters]) extends SourceNode(TLImp)(portParams)
-  extends SourceNode(TLImp)(portParams, numPorts)
+case class TLManagerNode(portParams: Seq[TLManagerPortParameters]) extends SinkNode(TLImp)(portParams)
 case class TLManagerNode(portParams: TLManagerPortParameters, numPorts: Range.Inclusive = 1 to 1)
  extends SinkNode(TLImp)(portParams, numPorts)
 object TLClientNode
 {
  def apply(params: TLClientParameters) =
-    new TLClientNode(TLClientPortParameters(Seq(params)), 1 to 1)
+    new TLClientNode(Seq(TLClientPortParameters(Seq(params))))
 }
 object TLManagerNode
 {
  def apply(beatBytes: Int, params: TLManagerParameters) =
-    new TLManagerNode(TLManagerPortParameters(Seq(params), beatBytes, minLatency = 0), 1 to 1)
+    new TLManagerNode(Seq(TLManagerPortParameters(Seq(params), beatBytes, minLatency = 0)))
 }
 case class TLAdapterNode(
  clientFn:  TLClientPortParameters  => TLClientPortParameters,
  managerFn: TLManagerPortParameters => TLManagerPortParameters,
  num:       Range.Inclusive = 0 to 999)
  extends AdapterNode(TLImp)(clientFn, managerFn, num)
 case class TLNexusNode(
  clientFn:        Seq[TLClientPortParameters]  => TLClientPortParameters,
  managerFn:       Seq[TLManagerPortParameters] => TLManagerPortParameters,
-  numClientPorts:  Range.Inclusive = 1 to 1,
+  numClientPorts:  Range.Inclusive = 1 to 999,
-  numManagerPorts: Range.Inclusive = 1 to 1)
+  numManagerPorts: Range.Inclusive = 1 to 999)
-  extends InteriorNode(TLImp)(clientFn, managerFn, numClientPorts, numManagerPorts)
+  extends NexusNode(TLImp)(clientFn, managerFn, numClientPorts, numManagerPorts)
 // Nodes passed from an inner module
 case class TLOutputNode() extends OutputNode(TLImp)
@ -155,10 +162,6 @@ object TLAsyncImp extends NodeImp[TLAsyncClientPortParameters, TLAsyncManagerPor
  override def labelI(ei: TLAsyncEdgeParameters) = ei.manager.depth.toString
  override def labelO(eo: TLAsyncEdgeParameters) = eo.manager.depth.toString
  def connect(bo: => TLAsyncBundle, bi: => TLAsyncBundle, ei: => TLAsyncEdgeParameters)(implicit p: Parameters, sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) = {
    (None, () => { bi <> bo })
  }
  override def mixO(pd: TLAsyncClientPortParameters, node: OutwardNode[TLAsyncClientPortParameters, TLAsyncManagerPortParameters, TLAsyncBundle]): TLAsyncClientPortParameters  =
   pd.copy(base = pd.base.copy(clients  = pd.base.clients.map  { c => c.copy (nodePath = node +: c.nodePath) }))
  override def mixI(pu: TLAsyncManagerPortParameters, node: InwardNode[TLAsyncClientPortParameters, TLAsyncManagerPortParameters, TLAsyncBundle]): TLAsyncManagerPortParameters =
@ -169,17 +172,15 @@ case class TLAsyncIdentityNode() extends IdentityNode(TLAsyncImp)
 case class TLAsyncOutputNode() extends OutputNode(TLAsyncImp)
 case class TLAsyncInputNode() extends InputNode(TLAsyncImp)
-case class TLAsyncSourceNode(sync: Int) extends MixedNode(TLImp, TLAsyncImp)(
+case class TLAsyncSourceNode(sync: Int)
-  dFn = { case (1, Seq(p)) => Seq(TLAsyncClientPortParameters(p)) },
+  extends MixedAdapterNode(TLImp, TLAsyncImp)(
-  uFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) }, // discard cycles in other clock domain
+    dFn = { p => TLAsyncClientPortParameters(p) },
-  numPO = 1 to 1,
+    uFn = { p => p.base.copy(minLatency = sync+1) }) // discard cycles in other clock domain
  numPI = 1 to 1)
-case class TLAsyncSinkNode(depth: Int, sync: Int) extends MixedNode(TLAsyncImp, TLImp)(
+case class TLAsyncSinkNode(depth: Int, sync: Int)
-  dFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) },
+  extends MixedAdapterNode(TLAsyncImp, TLImp)(
-  uFn = { case (1, Seq(p)) => Seq(TLAsyncManagerPortParameters(depth, p)) },
+    dFn = { p => p.base.copy(minLatency = sync+1) },
-  numPO = 1 to 1,
+    uFn = { p => TLAsyncManagerPortParameters(depth, p) })
  numPI = 1 to 1)
 object TLRationalImp extends NodeImp[TLClientPortParameters, TLManagerPortParameters, TLEdgeParameters, TLEdgeParameters, TLRationalBundle]
 {
@ -191,10 +192,6 @@ object TLRationalImp extends NodeImp[TLClientPortParameters, TLManagerPortParame
  def colour = "#00ff00" // green
  def connect(bo: => TLRationalBundle, bi: => TLRationalBundle, ei: => TLEdgeParameters)(implicit p: Parameters, sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) = {
    (None, () => { bi <> bo })
  }
  override def mixO(pd: TLClientPortParameters, node: OutwardNode[TLClientPortParameters, TLManagerPortParameters, TLRationalBundle]): TLClientPortParameters  =
   pd.copy(clients  = pd.clients.map  { c => c.copy (nodePath = node +: c.nodePath) })
  override def mixI(pu: TLManagerPortParameters, node: InwardNode[TLClientPortParameters, TLManagerPortParameters, TLRationalBundle]): TLManagerPortParameters =
@ -205,14 +202,12 @@ case class TLRationalIdentityNode() extends IdentityNode(TLRationalImp)
 case class TLRationalOutputNode() extends OutputNode(TLRationalImp)
 case class TLRationalInputNode() extends InputNode(TLRationalImp)
-case class TLRationalSourceNode() extends MixedNode(TLImp, TLRationalImp)(
+case class TLRationalSourceNode()
-  dFn = { case (_, s) => s },
+  extends MixedAdapterNode(TLImp, TLRationalImp)(
-  uFn = { case (_, s) => s.map(p => p.copy(minLatency = 1)) }, // discard cycles from other clock domain
+    dFn = { p => p },
-  numPO = 0 to 999,
+    uFn = { p => p.copy(minLatency = 1) }) // discard cycles from other clock domain
  numPI = 0 to 999)
-case class TLRationalSinkNode() extends MixedNode(TLRationalImp, TLImp)(
+case class TLRationalSinkNode()
-  dFn = { case (_, s) => s.map(p => p.copy(minLatency = 1)) },
+  extends MixedAdapterNode(TLRationalImp, TLImp)(
-  uFn = { case (_, s) => s },
+    dFn = { p => p.copy(minLatency = 1) },
-  numPO = 0 to 999,
+    uFn = { p => p })
  numPI = 0 to 999)
--- a/src/main/scala/uncore/tilelink2/RAMModel.scala
+++ b/src/main/scala/uncore/tilelink2/RAMModel.scala
@ -5,6 +5,7 @@ package uncore.tilelink2
 import Chisel._
 import config._
 import diplomacy._
 import util.GenericParameterizedBundle
 // We detect concurrent puts that put memory into an undefined state.
 // put0, put0Ack, put1, put1Ack => ok: defined
@ -31,268 +32,271 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule
      val out = node.bundleOut
    }
-    // !!! support multiple clients via clock division
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    require (io.out.size == 1)
+      val edge         = edgeIn
      val endAddress   = edge.manager.maxAddress + 1
      val endSourceId  = edge.client.endSourceId
      val maxTransfer  = edge.manager.maxTransfer
      val beatBytes    = edge.manager.beatBytes
      val endAddressHi = (endAddress / beatBytes).intValue
      val maxLgBeats   = log2Up(maxTransfer/beatBytes)
      val shift        = log2Ceil(beatBytes)
      val decTrees     = log2Up(maxTransfer/beatBytes)
      val addressBits  = log2Up(endAddress)
      val countBits    = log2Up(endSourceId)
      val sizeBits     = edge.bundle.sizeBits
-    val in = io.in(0)
+      // Reset control logic
-    val out = io.out(0)
+      val wipeIndex = RegInit(UInt(0, width = log2Ceil(endAddressHi) + 1))
      val wipe = !wipeIndex(log2Ceil(endAddressHi))
      wipeIndex := wipeIndex + wipe.asUInt
-    val edge         = node.edgesIn(0)
+      // Block traffic while wiping Mems
-    val endAddress   = edge.manager.maxAddress + 1
+      in.a.ready := out.a.ready && !wipe
-    val endSourceId  = edge.client.endSourceId
+      out.a.valid := in.a.valid && !wipe
-    val maxTransfer  = edge.manager.maxTransfer
+      out.a.bits  := in.a.bits
-    val beatBytes    = edge.manager.beatBytes
+      out.d.ready := in.d.ready && !wipe
-    val endAddressHi = (endAddress / beatBytes).intValue
+      in.d.valid := out.d.valid && !wipe
-    val maxLgBeats   = log2Up(maxTransfer/beatBytes)
+      in.d.bits  := out.d.bits
    val shift        = log2Ceil(beatBytes)
    val decTrees     = log2Up(maxTransfer/beatBytes)
    val addressBits  = log2Up(endAddress)
    val countBits    = log2Up(endSourceId)
    val sizeBits     = edge.bundle.sizeBits
-    // Reset control logic
+      // BCE unsupported
-    val wipeIndex = RegInit(UInt(0, width = log2Ceil(endAddressHi) + 1))
+      in.b.valid := Bool(false)
-    val wipe = !wipeIndex(log2Ceil(endAddressHi))
+      out.c.valid := Bool(false)
-    wipeIndex := wipeIndex + wipe.asUInt
+      out.e.valid := Bool(false)
      out.b.ready := Bool(true)
      in.c.ready := Bool(true)
      in.e.ready := Bool(true)
-    // Block traffic while wiping Mems
+      val params = TLRAMModel.MonitorParameters(addressBits, sizeBits)
    in.a.ready := out.a.ready && !wipe
    out.a.valid := in.a.valid && !wipe
    out.a.bits  := in.a.bits
    out.d.ready := in.d.ready && !wipe
    in.d.valid := out.d.valid && !wipe
    in.d.bits  := out.d.bits
-    // BCE unsupported
+      // Infer as simple dual port BRAM/M10k with write-first/new-data semantics (bypass needed)
-    in.b.valid := Bool(false)
+      val shadow = Seq.fill(beatBytes) { Mem(endAddressHi, new TLRAMModel.ByteMonitor(params)) }
-    out.c.valid := Bool(false)
+      val inc_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) }
-    out.e.valid := Bool(false)
+      val dec_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) }
-    out.b.ready := Bool(true)
+      val inc_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }
-    in.c.ready := Bool(true)
+      val dec_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }
    in.e.ready := Bool(true)
-    class ByteMonitor extends Bundle {
+      val shadow_wen = Wire(init = Fill(beatBytes, wipe))
-      val valid = Bool()
+      val inc_bytes_wen = Wire(init = Fill(beatBytes, wipe))
-      val value = UInt(width = 8)
+      val dec_bytes_wen = Wire(init = Fill(beatBytes, wipe))
-    }
+      val inc_trees_wen = Wire(init = Fill(decTrees, wipe))
-    class FlightMonitor extends Bundle {
+      val dec_trees_wen = Wire(init = Fill(decTrees, wipe))
      val base    = UInt(width = addressBits)
      val size    = UInt(width = sizeBits)
      val opcode  = UInt(width = 3)
    }
-    // Infer as simple dual port BRAM/M10k with write-first/new-data semantics (bypass needed)
+      // This must be registers b/c we build a CAM from it
-    val shadow = Seq.fill(beatBytes) { Mem(endAddressHi, new ByteMonitor) }
+      val flight = Reg(Vec(endSourceId, new TLRAMModel.FlightMonitor(params)))
-    val inc_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) }
+      val valid = Reg(Vec(endSourceId, Bool()))
    val dec_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) }
    val inc_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }
    val dec_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }
-    val shadow_wen = Wire(init = Fill(beatBytes, wipe))
+      // We want to cross flight data from A to D in the same cycle (for combinational TL2 devices)
-    val inc_bytes_wen = Wire(init = Fill(beatBytes, wipe))
+      val a_flight = Wire(new TLRAMModel.FlightMonitor(params))
-    val dec_bytes_wen = Wire(init = Fill(beatBytes, wipe))
+      a_flight.base   := edge.address(in.a.bits)
-    val inc_trees_wen = Wire(init = Fill(decTrees, wipe))
+      a_flight.size   := edge.size(in.a.bits)
-    val dec_trees_wen = Wire(init = Fill(decTrees, wipe))
+      a_flight.opcode := in.a.bits.opcode
-    // This must be registers b/c we build a CAM from it
+      when (in.a.fire()) { flight(in.a.bits.source) := a_flight }
-    val flight = Reg(Vec(endSourceId, new FlightMonitor))
+      val bypass = if (edge.manager.minLatency > 0) Bool(false) else in.a.valid && in.a.bits.source === out.d.bits.source
-    val valid = Reg(Vec(endSourceId, Bool()))
+      val d_flight = RegNext(Mux(bypass, a_flight, flight(out.d.bits.source)))
-    // We want to cross flight data from A to D in the same cycle (for combinational TL2 devices)
+      // Process A access requests
-    val a_flight = Wire(new FlightMonitor)
+      val a = Reg(next = in.a.bits)
-    a_flight.base   := edge.address(in.a.bits)
+      val a_fire = Reg(next = in.a.fire(), init = Bool(false))
-    a_flight.size   := edge.size(in.a.bits)
+      val (a_first, a_last, _, a_address_inc) = edge.addr_inc(a, a_fire)
-    a_flight.opcode := in.a.bits.opcode
+      val a_size = edge.size(a)
      val a_sizeOH = UIntToOH(a_size)
      val a_address = a.address | a_address_inc
      val a_addr_hi = edge.addr_hi(a_address)
      val a_base = edge.address(a)
      val a_mask = edge.mask(a_base, a_size)
      val a_fifo = edge.manager.hasFifoIdFast(a_base)
-    when (in.a.fire()) { flight(in.a.bits.source) := a_flight }
+      // Grab the concurrency state we need
-    val bypass = if (edge.manager.minLatency > 0) Bool(false) else in.a.valid && in.a.bits.source === out.d.bits.source
+      val a_inc_bytes = inc_bytes.map(_.read(a_addr_hi))
-    val d_flight = RegNext(Mux(bypass, a_flight, flight(out.d.bits.source)))
+      val a_dec_bytes = dec_bytes.map(_.read(a_addr_hi))
      val a_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
      val a_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
      val a_inc_tree = a_inc_trees.fold(UInt(0))(_ + _)
      val a_dec_tree = a_dec_trees.fold(UInt(0))(_ + _)
      val a_inc = a_inc_bytes.map(_ + a_inc_tree)
      val a_dec = a_dec_bytes.map(_ + a_dec_tree)
-    // Process A access requests
+      when (a_fire) {
-    val a = Reg(next = in.a.bits)
+        // Record the request so we can handle it's response
-    val a_fire = Reg(next = in.a.fire(), init = Bool(false))
+        assert (a.opcode =/= TLMessages.Acquire)
    val (a_first, a_last, _, a_address_inc) = edge.addr_inc(a, a_fire)
    val a_size = edge.size(a)
    val a_sizeOH = UIntToOH(a_size)
    val a_address = a.address | a_address_inc
    val a_addr_hi = edge.addr_hi(a_address)
    val a_base = edge.address(a)
    val a_mask = edge.mask(a_base, a_size)
    val a_fifo = edge.manager.hasFifoIdFast(a_base)
-    // Grab the concurrency state we need
+        // Mark the operation as valid
-    val a_inc_bytes = inc_bytes.map(_.read(a_addr_hi))
+        valid(a.source) := Bool(true)
    val a_dec_bytes = dec_bytes.map(_.read(a_addr_hi))
    val a_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
    val a_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
    val a_inc_tree = a_inc_trees.fold(UInt(0))(_ + _)
    val a_dec_tree = a_dec_trees.fold(UInt(0))(_ + _)
    val a_inc = a_inc_bytes.map(_ + a_inc_tree)
    val a_dec = a_dec_bytes.map(_ + a_dec_tree)
-    when (a_fire) {
+        // Increase the per-byte flight counter for the whole transaction
-      // Record the request so we can handle it's response
+        when (a_first && a.opcode =/= TLMessages.Hint && a.opcode =/= TLMessages.Get) {
-      assert (a.opcode =/= TLMessages.Acquire)
+          when (a_size <= UInt(shift)) {
-
+            inc_bytes_wen := a_mask
-      // Mark the operation as valid
+          }
-      valid(a.source) := Bool(true)
+          inc_trees_wen := a_sizeOH >> (shift+1)
      // Increase the per-byte flight counter for the whole transaction
      when (a_first && a.opcode =/= TLMessages.Hint && a.opcode =/= TLMessages.Get) {
        when (a_size <= UInt(shift)) {
          inc_bytes_wen := a_mask
        }
        inc_trees_wen := a_sizeOH >> (shift+1)
      }
-      when (a.opcode === TLMessages.PutFullData || a.opcode === TLMessages.PutPartialData ||
+        when (a.opcode === TLMessages.PutFullData || a.opcode === TLMessages.PutPartialData ||
-            a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData) {
+              a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData) {
-        shadow_wen := a.mask
+          shadow_wen := a.mask
-        for (i <- 0 until beatBytes) {
+          for (i <- 0 until beatBytes) {
-          val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt
+            val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt
-          val byte = a.data(8*(i+1)-1, 8*i)
+            val byte = a.data(8*(i+1)-1, 8*i)
-          when (a.mask(i)) {
+            when (a.mask(i)) {
-            printf(log + " ")
+              printf(log + " ")
-            when (a.opcode === TLMessages.PutFullData) { printf("PF") }
+              when (a.opcode === TLMessages.PutFullData) { printf("PF") }
-            when (a.opcode === TLMessages.PutPartialData) { printf("PP") }
+              when (a.opcode === TLMessages.PutPartialData) { printf("PP") }
-            when (a.opcode === TLMessages.ArithmeticData) { printf("A ") }
+              when (a.opcode === TLMessages.ArithmeticData) { printf("A ") }
-            when (a.opcode === TLMessages.LogicalData) { printf("L ") }
+              when (a.opcode === TLMessages.LogicalData) { printf("L ") }
-            printf(" 0x%x := 0x%x #%d %x\n", a_addr_hi << shift | UInt(i), byte, busy, a.param)
+              printf(" 0x%x := 0x%x #%d %x\n", a_addr_hi << shift | UInt(i), byte, busy, a.param)
            }
          }
        }
      }
-      when (a.opcode === TLMessages.Get) {
+        when (a.opcode === TLMessages.Get) {
-        printf(log + " G  0x%x - 0%x\n", a_base, a_base | UIntToOH1(a_size, addressBits))
+          printf(log + " G  0x%x - 0%x\n", a_base, a_base | UIntToOH1(a_size, addressBits))
      }
    }
    val a_waddr = Mux(wipe, wipeIndex, a_addr_hi)
    for (i <- 0 until beatBytes) {
      val data = Wire(new ByteMonitor)
      val busy = a_inc(i) =/= a_dec(i) + (!a_first).asUInt
      val amo = a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData
      data.valid := Mux(wipe, Bool(false), (!busy || a_fifo) && !amo)
      // !!! calculate the AMO?
      data.value := a.data(8*(i+1)-1, 8*i)
      when (shadow_wen(i)) {
        shadow(i).write(a_waddr, data)
      }
    }
    for (i <- 0 until beatBytes) {
      val data = Mux(wipe, UInt(0), a_inc_bytes(i) + UInt(1))
      when (inc_bytes_wen(i)) {
        inc_bytes(i).write(a_waddr, data)
      }
    }
    for (i <- 0 until inc_trees.size) {
      val data = Mux(wipe, UInt(0), a_inc_trees(i) + UInt(1))
      when (inc_trees_wen(i)) {
        inc_trees(i).write(a_waddr >> (i+1), data)
      }
    }
    // Process D access responses
    val d = RegNext(out.d.bits)
    val d_fire = Reg(next = out.d.fire(), init = Bool(false))
    val (d_first, d_last, _, d_address_inc) = edge.addr_inc(d, d_fire)
    val d_size = edge.size(d)
    val d_sizeOH = UIntToOH(d_size)
    val d_base = d_flight.base
    val d_address = d_base | d_address_inc
    val d_addr_hi = edge.addr_hi(d_address)
    val d_mask = edge.mask(d_base, d_size)
    val d_fifo = edge.manager.hasFifoIdFast(d_flight.base)
    // Grab the concurrency state we need
    val d_inc_bytes = inc_bytes.map(_.read(d_addr_hi))
    val d_dec_bytes = dec_bytes.map(_.read(d_addr_hi))
    val d_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
    val d_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
    val d_inc_tree = d_inc_trees.fold(UInt(0))(_ + _)
    val d_dec_tree = d_dec_trees.fold(UInt(0))(_ + _)
    val d_inc = d_inc_bytes.map(_ + d_inc_tree)
    val d_dec = d_dec_bytes.map(_ + d_dec_tree)
    val d_shadow = shadow.map(_.read(d_addr_hi))
    val d_valid = valid(d.source)
    when (d_fire) {
      // Check the response is correct
      assert (d_size === d_flight.size)
      // addr_lo is allowed to differ
      when (d_flight.opcode === TLMessages.Hint) {
        assert (d.opcode === TLMessages.HintAck)
      }
      // Decrease the per-byte flight counter for the whole transaction
      when (d_last && d_flight.opcode =/= TLMessages.Hint && d_flight.opcode =/= TLMessages.Get) {
        when (d_size <= UInt(shift)) {
          dec_bytes_wen := d_mask
        }
        dec_trees_wen := d_sizeOH >> (shift+1)
        // NOTE: D channel carries uninterrupted multibeast op, so updating on last is fine
        for (i <- 0 until endSourceId) {
          // Does this modification overlap a Get? => wipe it's valid
          val f_base = flight(i).base
          val f_size = flight(i).size
          val f_bits = UIntToOH1(f_size, addressBits)
          val d_bits = UIntToOH1(d_size, addressBits)
          val overlap = ~(~(f_base ^ d_base) | (f_bits | d_bits)) === UInt(0)
          when (overlap) { valid(i) := Bool(false) }
        }
      }
-      when (d_flight.opcode === TLMessages.PutFullData || d_flight.opcode === TLMessages.PutPartialData) {
+      val a_waddr = Mux(wipe, wipeIndex, a_addr_hi)
-        assert (d.opcode === TLMessages.AccessAck)
+      for (i <- 0 until beatBytes) {
-        printf(log + " ")
+        val data = Wire(new TLRAMModel.ByteMonitor(params))
-        when (d_flight.opcode === TLMessages.PutFullData) { printf("pf") }
+        val busy = a_inc(i) =/= a_dec(i) + (!a_first).asUInt
-        when (d_flight.opcode === TLMessages.PutPartialData) { printf("pp") }
+        val amo = a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData
-        printf(" 0x%x - 0x%x\n", d_base, d_base | UIntToOH1(d_size, addressBits))
+        data.valid := Mux(wipe, Bool(false), (!busy || a_fifo) && !amo)
        // !!! calculate the AMO?
        data.value := a.data(8*(i+1)-1, 8*i)
        when (shadow_wen(i)) {
          shadow(i).write(a_waddr, data)
        }
      }
-      when (d_flight.opcode === TLMessages.Get || d_flight.opcode === TLMessages.ArithmeticData || d_flight.opcode === TLMessages.LogicalData) {
+      for (i <- 0 until beatBytes) {
-        assert (d.opcode === TLMessages.AccessAckData)
+        val data = Mux(wipe, UInt(0), a_inc_bytes(i) + UInt(1))
-        for (i <- 0 until beatBytes) {
+        when (inc_bytes_wen(i)) {
-          val got = d.data(8*(i+1)-1, 8*i)
+          inc_bytes(i).write(a_waddr, data)
-          val shadow = Wire(init = d_shadow(i))
+        }
-          when (d_mask(i)) {
+      }
-            val d_addr = d_addr_hi << shift | UInt(i)
+
-            printf(log + " ")
+      for (i <- 0 until inc_trees.size) {
-            when (d_flight.opcode === TLMessages.Get) { printf("g ") }
+        val data = Mux(wipe, UInt(0), a_inc_trees(i) + UInt(1))
-            when (d_flight.opcode === TLMessages.ArithmeticData) { printf("a ") }
+        when (inc_trees_wen(i)) {
-            when (d_flight.opcode === TLMessages.LogicalData) { printf("l ") }
+          inc_trees(i).write(a_waddr >> (i+1), data)
-            printf(" 0x%x := 0x%x", d_addr, got)
+        }
-            when (!shadow.valid) {
+      }
-              printf(", undefined (uninitialized or prior overlapping puts)\n")
+
-            } .elsewhen (d_inc(i) =/= d_dec(i)) {
+      // Process D access responses
-              printf(", undefined (concurrent incomplete puts #%d)\n", d_inc(i) - d_dec(i))
+      val d = RegNext(out.d.bits)
-            } .elsewhen (!d_fifo && !d_valid) {
+      val d_fire = Reg(next = out.d.fire(), init = Bool(false))
-              printf(", undefined (concurrent completed put)\n")
+      val (d_first, d_last, _, d_address_inc) = edge.addr_inc(d, d_fire)
-            } .otherwise {
+      val d_size = edge.size(d)
-              printf("\n")
+      val d_sizeOH = UIntToOH(d_size)
-              assert (shadow.value === got)
+      val d_base = d_flight.base
      val d_address = d_base | d_address_inc
      val d_addr_hi = edge.addr_hi(d_address)
      val d_mask = edge.mask(d_base, d_size)
      val d_fifo = edge.manager.hasFifoIdFast(d_flight.base)
      // Grab the concurrency state we need
      val d_inc_bytes = inc_bytes.map(_.read(d_addr_hi))
      val d_dec_bytes = dec_bytes.map(_.read(d_addr_hi))
      val d_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
      val d_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
      val d_inc_tree = d_inc_trees.fold(UInt(0))(_ + _)
      val d_dec_tree = d_dec_trees.fold(UInt(0))(_ + _)
      val d_inc = d_inc_bytes.map(_ + d_inc_tree)
      val d_dec = d_dec_bytes.map(_ + d_dec_tree)
      val d_shadow = shadow.map(_.read(d_addr_hi))
      val d_valid = valid(d.source)
      when (d_fire) {
        // Check the response is correct
        assert (d_size === d_flight.size)
        // addr_lo is allowed to differ
        when (d_flight.opcode === TLMessages.Hint) {
          assert (d.opcode === TLMessages.HintAck)
        }
        // Decrease the per-byte flight counter for the whole transaction
        when (d_last && d_flight.opcode =/= TLMessages.Hint && d_flight.opcode =/= TLMessages.Get) {
          when (d_size <= UInt(shift)) {
            dec_bytes_wen := d_mask
          }
          dec_trees_wen := d_sizeOH >> (shift+1)
          // NOTE: D channel carries uninterrupted multibeast op, so updating on last is fine
          for (i <- 0 until endSourceId) {
            // Does this modification overlap a Get? => wipe it's valid
            val f_base = flight(i).base
            val f_size = flight(i).size
            val f_bits = UIntToOH1(f_size, addressBits)
            val d_bits = UIntToOH1(d_size, addressBits)
            val overlap = ~(~(f_base ^ d_base) | (f_bits | d_bits)) === UInt(0)
            when (overlap) { valid(i) := Bool(false) }
          }
        }
        when (d_flight.opcode === TLMessages.PutFullData || d_flight.opcode === TLMessages.PutPartialData) {
          assert (d.opcode === TLMessages.AccessAck)
          printf(log + " ")
          when (d_flight.opcode === TLMessages.PutFullData) { printf("pf") }
          when (d_flight.opcode === TLMessages.PutPartialData) { printf("pp") }
          printf(" 0x%x - 0x%x\n", d_base, d_base | UIntToOH1(d_size, addressBits))
        }
        when (d_flight.opcode === TLMessages.Get || d_flight.opcode === TLMessages.ArithmeticData || d_flight.opcode === TLMessages.LogicalData) {
          assert (d.opcode === TLMessages.AccessAckData)
          for (i <- 0 until beatBytes) {
            val got = d.data(8*(i+1)-1, 8*i)
            val shadow = Wire(init = d_shadow(i))
            when (d_mask(i)) {
              val d_addr = d_addr_hi << shift | UInt(i)
              printf(log + " ")
              when (d_flight.opcode === TLMessages.Get) { printf("g ") }
              when (d_flight.opcode === TLMessages.ArithmeticData) { printf("a ") }
              when (d_flight.opcode === TLMessages.LogicalData) { printf("l ") }
              printf(" 0x%x := 0x%x", d_addr, got)
              when (!shadow.valid) {
                printf(", undefined (uninitialized or prior overlapping puts)\n")
              } .elsewhen (d_inc(i) =/= d_dec(i)) {
                printf(", undefined (concurrent incomplete puts #%d)\n", d_inc(i) - d_dec(i))
              } .elsewhen (!d_fifo && !d_valid) {
                printf(", undefined (concurrent completed put)\n")
              } .otherwise {
                printf("\n")
                assert (shadow.value === got)
              }
            }
          }
        }
      }
    }
-    val d_waddr = Mux(wipe, wipeIndex, d_addr_hi)
+      val d_waddr = Mux(wipe, wipeIndex, d_addr_hi)
-    for (i <- 0 until beatBytes) {
+      for (i <- 0 until beatBytes) {
-      val data = Mux(wipe, UInt(0), d_dec_bytes(i) + UInt(1))
+        val data = Mux(wipe, UInt(0), d_dec_bytes(i) + UInt(1))
-      when (dec_bytes_wen(i)) {
+        when (dec_bytes_wen(i)) {
-        dec_bytes(i).write(d_waddr, data)
+          dec_bytes(i).write(d_waddr, data)
        }
      }
    }
-    for (i <- 0 until dec_trees.size) {
+      for (i <- 0 until dec_trees.size) {
-      val data = Mux(wipe, UInt(0), d_dec_trees(i) + UInt(1))
+        val data = Mux(wipe, UInt(0), d_dec_trees(i) + UInt(1))
-      when (dec_trees_wen(i)) {
+        when (dec_trees_wen(i)) {
-        dec_trees(i).write(d_waddr >> (i+1), data)
+          dec_trees(i).write(d_waddr >> (i+1), data)
        }
      }
    }
  }
 }
 object TLRAMModel
 {
  case class MonitorParameters(addressBits: Int, sizeBits: Int)
  class ByteMonitor(params: MonitorParameters) extends GenericParameterizedBundle(params) {
    val valid = Bool()
    val value = UInt(width = 8)
  }
  class FlightMonitor(params: MonitorParameters) extends GenericParameterizedBundle(params) {
    val base    = UInt(width = params.addressBits)
    val size    = UInt(width = params.sizeBits)
    val opcode  = UInt(width = 3)
  }
 }
--- a/src/main/scala/uncore/tilelink2/RegisterRouter.scala
+++ b/src/main/scala/uncore/tilelink2/RegisterRouter.scala
@ -9,7 +9,7 @@ import regmapper._
 import scala.math.{min,max}
 class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
-  extends TLManagerNode(TLManagerPortParameters(
+  extends TLManagerNode(Seq(TLManagerPortParameters(
    Seq(TLManagerParameters(
      address            = Seq(address),
      executable         = executable,
@ -18,7 +18,7 @@ class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int =
      supportsPutFull    = TransferSizes(1, beatBytes),
      fifoId             = Some(0))), // requests are handled in order
    beatBytes  = beatBytes,
-    minLatency = min(concurrency, 1))) // the Queue adds at most one cycle
+    minLatency = min(concurrency, 1)))) // the Queue adds at most one cycle
 {
  require (address.contiguous)
--- a/src/main/scala/uncore/tilelink2/SRAM.scala
+++ b/src/main/scala/uncore/tilelink2/SRAM.scala
@ -8,7 +8,7 @@ import diplomacy._
 class TLRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
 {
-  val node = TLManagerNode(TLManagerPortParameters(
+  val node = TLManagerNode(Seq(TLManagerPortParameters(
    Seq(TLManagerParameters(
      address            = List(address),
      regionType         = RegionType.UNCACHED,
@ -18,7 +18,7 @@ class TLRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)
      supportsPutFull    = TransferSizes(1, beatBytes),
      fifoId             = Some(0))), // requests are handled in order
    beatBytes  = beatBytes,
-    minLatency = 1)) // no bypass needed for this device
+    minLatency = 1))) // no bypass needed for this device
  // We require the address range to include an entire beat (for the write mask)
  require ((address.mask & (beatBytes-1)) == beatBytes-1)
--- a/src/main/scala/uncore/tilelink2/SourceShrinker.scala
+++ b/src/main/scala/uncore/tilelink2/SourceShrinker.scala
@ -15,8 +15,8 @@ class TLSourceShrinker(maxInFlight: Int)(implicit p: Parameters) extends LazyMod
  private val client = TLClientParameters(sourceId = IdRange(0, maxInFlight))
  val node = TLAdapterNode(
    // We erase all client information since we crush the source Ids
-    clientFn  = { case _ => TLClientPortParameters(clients = Seq(client)) },
+    clientFn  = { _ => TLClientPortParameters(clients = Seq(client)) },
-    managerFn = { case Seq(mp) => mp })
+    managerFn = { mp => mp })
  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
@ -24,54 +24,51 @@ class TLSourceShrinker(maxInFlight: Int)(implicit p: Parameters) extends LazyMod
      val out = node.bundleOut
    }
-    val edgeIn = node.edgesIn(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val edgeOut = node.edgesOut(0)
+      // Acquires cannot pass this adapter; it makes Probes impossible
-    val in = io.in(0)
+      require (!edgeIn.client.anySupportProbe || 
-    val out = io.out(0)
+               !edgeOut.manager.anySupportAcquireB)
-    // Acquires cannot pass this adapter; it makes Probes impossible
+      out.b.ready := Bool(true)
-    require (!edgeIn.client.anySupportProbe || 
+      out.c.valid := Bool(false)
-             !edgeOut.manager.anySupportAcquireB)
+      out.e.valid := Bool(false)
      in.b.valid := Bool(false)
      in.c.ready := Bool(true)
      in.e.ready := Bool(true)
-    out.b.ready := Bool(true)
+      if (maxInFlight >= edgeIn.client.endSourceId) {
-    out.c.valid := Bool(false)
+        out.a <> in.a
-    out.e.valid := Bool(false)
+        in.d <> out.d
-    in.b.valid := Bool(false)
+      } else {
-    in.c.ready := Bool(true)
+        // State tracking
-    in.e.ready := Bool(true)
+        val sourceIdMap = Mem(maxInFlight, in.a.bits.source)
        val allocated = RegInit(UInt(0, width = maxInFlight))
        val nextFreeOH = ~(leftOR(~allocated) << 1) & ~allocated
        val nextFree = OHToUInt(nextFreeOH)
        val full = allocated.andR()
-    if (maxInFlight >= edgeIn.client.endSourceId) {
+        val a_first = edgeIn.first(in.a)
-      out.a <> in.a
+        val d_last  = edgeIn.last(in.d)
      in.d <> out.d
    } else {
      // State tracking
      val sourceIdMap = Mem(maxInFlight, in.a.bits.source)
      val allocated = RegInit(UInt(0, width = maxInFlight))
      val nextFreeOH = ~(leftOR(~allocated) << 1) & ~allocated
      val nextFree = OHToUInt(nextFreeOH)
      val full = allocated.andR()
-      val a_first = edgeIn.first(in.a)
+        val block = a_first && full
-      val d_last  = edgeIn.last(in.d)
+        in.a.ready := out.a.ready && !block
        out.a.valid := in.a.valid && !block
        out.a.bits := in.a.bits
        out.a.bits.source := holdUnless(nextFree, a_first)
-      val block = a_first && full
+        in.d <> out.d
-      in.a.ready := out.a.ready && !block
+        in.d.bits.source := sourceIdMap(out.d.bits.source)
      out.a.valid := in.a.valid && !block
      out.a.bits := in.a.bits
      out.a.bits.source := holdUnless(nextFree, a_first)
-      in.d <> out.d
+        when (a_first && in.a.fire()) {
-      in.d.bits.source := sourceIdMap(out.d.bits.source)
+          sourceIdMap(nextFree) := in.a.bits.source
        }
-      when (a_first && in.a.fire()) {
+        val alloc = a_first && in.a.fire()
-        sourceIdMap(nextFree) := in.a.bits.source
+        val free = d_last && in.d.fire()
        val alloc_id = Mux(alloc, nextFreeOH, UInt(0))
        val free_id = Mux(free, UIntToOH(out.d.bits.source), UInt(0))
        allocated := (allocated | alloc_id) & ~free_id
      }
      val alloc = a_first && in.a.fire()
      val free = d_last && in.d.fire()
      val alloc_id = Mux(alloc, nextFreeOH, UInt(0))
      val free_id = Mux(free, UIntToOH(out.d.bits.source), UInt(0))
      allocated := (allocated | alloc_id) & ~free_id
    }
  }
 }
--- a/src/main/scala/uncore/tilelink2/ToAHB.scala
+++ b/src/main/scala/uncore/tilelink2/ToAHB.scala
@ -10,12 +10,12 @@ import uncore.ahb._
 import scala.math.{min, max}
 import AHBParameters._
-case class TLToAHBNode() extends MixedNode(TLImp, AHBImp)(
+case class TLToAHBNode() extends MixedAdapterNode(TLImp, AHBImp)(
-  dFn = { case (1, Seq(TLClientPortParameters(clients, unsafeAtomics, minLatency))) =>
+  dFn = { case TLClientPortParameters(clients, unsafeAtomics, minLatency) =>
    val masters = clients.map { case c => AHBMasterParameters(nodePath = c.nodePath) }
-    Seq(AHBMasterPortParameters(masters))
+    AHBMasterPortParameters(masters)
  },
-  uFn = { case (1, Seq(AHBSlavePortParameters(slaves, beatBytes))) =>
+  uFn = { case AHBSlavePortParameters(slaves, beatBytes) =>
    val managers = slaves.map { case s =>
      TLManagerParameters(
        address            = s.address,
@ -26,10 +26,8 @@ case class TLToAHBNode() extends MixedNode(TLImp, AHBImp)(
        supportsPutFull    = s.supportsWrite, // but not PutPartial
        fifoId             = Some(0)) // a common FIFO domain
    }
-    Seq(TLManagerPortParameters(managers, beatBytes, 1, 1))
+    TLManagerPortParameters(managers, beatBytes, 1, 1)
-  },
+  })
  numPO = 1 to 1,
  numPI = 1 to 1)
 class TLToAHB(combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
 {
@ -41,91 +39,89 @@ class TLToAHB(combinational: Boolean = true)(implicit p: Parameters) extends Laz
      val out = node.bundleOut
    }
-    val in = io.in(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val out = io.out(0)
+      val beatBytes = edgeOut.slave.beatBytes
-    val edgeIn  = node.edgesIn(0)
+      val maxTransfer = edgeOut.slave.maxTransfer
-    val edgeOut = node.edgesOut(0)
+      val lgMax = log2Ceil(maxTransfer)
-    val beatBytes = edgeOut.slave.beatBytes
+      val lgBytes = log2Ceil(beatBytes)
    val maxTransfer = edgeOut.slave.maxTransfer
    val lgMax = log2Ceil(maxTransfer)
    val lgBytes = log2Ceil(beatBytes)
-    // AHB has no cache coherence
+      // AHB has no cache coherence
-    in.b.valid := Bool(false)
+      in.b.valid := Bool(false)
-    in.c.ready := Bool(true)
+      in.c.ready := Bool(true)
-    in.e.ready := Bool(true)
+      in.e.ready := Bool(true)
-    // We need a skidpad to capture D output:
+      // We need a skidpad to capture D output:
-    // We cannot know if the D response will be accepted until we have
+      // We cannot know if the D response will be accepted until we have
-    // presented it on D as valid.  We also can't back-pressure AHB in the
+      // presented it on D as valid.  We also can't back-pressure AHB in the
-    // data phase.  Therefore, we must have enough space to save the data
+      // data phase.  Therefore, we must have enough space to save the data
-    // phase result.  Whenever we have a queued response, we can not allow
+      // phase result.  Whenever we have a queued response, we can not allow
-    // AHB to present new responses, so we must quash the address phase.
+      // AHB to present new responses, so we must quash the address phase.
-    val d = Wire(in.d)
+      val d = Wire(in.d)
-    in.d <> Queue(d, 1, flow = true)
+      in.d <> Queue(d, 1, flow = true)
-    val a_quash = in.d.valid && !in.d.ready
+      val a_quash = in.d.valid && !in.d.ready
-    // Record what is coming out in d_phase
+      // Record what is coming out in d_phase
-    val d_valid   = RegInit(Bool(false))
+      val d_valid   = RegInit(Bool(false))
-    val d_hasData = Reg(Bool())
+      val d_hasData = Reg(Bool())
-    val d_error   = Reg(Bool())
+      val d_error   = Reg(Bool())
-    val d_addr_lo = Reg(UInt(width = lgBytes))
+      val d_addr_lo = Reg(UInt(width = lgBytes))
-    val d_source  = Reg(UInt())
+      val d_source  = Reg(UInt())
-    val d_size    = Reg(UInt())
+      val d_size    = Reg(UInt())
-    when (out.hreadyout) { d_error := d_error || out.hresp }
+      when (out.hreadyout) { d_error := d_error || out.hresp }
-    when (d.fire()) { d_valid := Bool(false) }
+      when (d.fire()) { d_valid := Bool(false) }
-    d.valid := d_valid && out.hreadyout
+      d.valid := d_valid && out.hreadyout
-    d.bits  := edgeIn.AccessAck(d_addr_lo, UInt(0), d_source, d_size, out.hrdata, out.hresp || d_error)
+      d.bits  := edgeIn.AccessAck(d_addr_lo, UInt(0), d_source, d_size, out.hrdata, out.hresp || d_error)
-    d.bits.opcode := Mux(d_hasData, TLMessages.AccessAckData, TLMessages.AccessAck)
+      d.bits.opcode := Mux(d_hasData, TLMessages.AccessAckData, TLMessages.AccessAck)
-    // We need an irrevocable input for AHB to stall on read bursts
+      // We need an irrevocable input for AHB to stall on read bursts
-    // We also need the values to NOT change when valid goes low => 1 entry only
+      // We also need the values to NOT change when valid goes low => 1 entry only
-    val a = Queue(in.a, 1, flow = combinational, pipe = !combinational)
+      val a = Queue(in.a, 1, flow = combinational, pipe = !combinational)
-    val a_valid = a.valid && !a_quash
+      val a_valid = a.valid && !a_quash
-    // This is lot like TLEdge.firstlast, but counts beats also for single-beat TL types
+      // This is lot like TLEdge.firstlast, but counts beats also for single-beat TL types
-    val a_size = edgeIn.size(a.bits)
+      val a_size = edgeIn.size(a.bits)
-    val a_beats1 = UIntToOH1(a_size, lgMax) >> lgBytes
+      val a_beats1 = UIntToOH1(a_size, lgMax) >> lgBytes
-    val a_counter = RegInit(UInt(0, width = log2Up(maxTransfer/beatBytes)))
+      val a_counter = RegInit(UInt(0, width = log2Up(maxTransfer/beatBytes)))
-    val a_counter1 = a_counter - UInt(1)
+      val a_counter1 = a_counter - UInt(1)
-    val a_first = a_counter === UInt(0)
+      val a_first = a_counter === UInt(0)
-    val a_last = a_counter === UInt(1) || a_beats1 === UInt(0)
+      val a_last = a_counter === UInt(1) || a_beats1 === UInt(0)
-    val a_offset = (a_beats1 & ~a_counter1) << lgBytes
+      val a_offset = (a_beats1 & ~a_counter1) << lgBytes
-    val a_hasData = edgeIn.hasData(a.bits)
+      val a_hasData = edgeIn.hasData(a.bits)
-    // Expand no-data A-channel requests into multiple beats
+      // Expand no-data A-channel requests into multiple beats
-    a.ready := (a_hasData || a_last) && out.hreadyout && !a_quash
+      a.ready := (a_hasData || a_last) && out.hreadyout && !a_quash
-    when (a_valid && out.hreadyout) {
+      when (a_valid && out.hreadyout) {
-      a_counter := Mux(a_first, a_beats1, a_counter1)
+        a_counter := Mux(a_first, a_beats1, a_counter1)
-      d_valid := !a_hasData || a_last
+        d_valid := !a_hasData || a_last
-      // Record what will be in the data phase
+        // Record what will be in the data phase
-      when (a_first) {
+        when (a_first) {
-        d_hasData := !a_hasData
+          d_hasData := !a_hasData
-        d_error   := Bool(false)
+          d_error   := Bool(false)
-        d_addr_lo := a.bits.address
+          d_addr_lo := a.bits.address
-        d_source  := a.bits.source
+          d_source  := a.bits.source
-        d_size    := a.bits.size
+          d_size    := a.bits.size
        }
      }
      // Transform TL size into AHB hsize+hburst
      val a_size_bits = a_size.getWidth
      val a_sizeDelta = Cat(UInt(0, width = 1), a_size) - UInt(lgBytes+1)
      val a_singleBeat = a_sizeDelta(a_size_bits)
      val a_logBeats1 = a_sizeDelta(a_size_bits-1, 0)
      out.hmastlock := Bool(false) // for now
      out.htrans    := Mux(a_valid, Mux(a_first, TRANS_NONSEQ, TRANS_SEQ), Mux(a_first, TRANS_IDLE, TRANS_BUSY))
      out.hsel      := a_valid || !a_first
      out.hready    := out.hreadyout
      out.hwrite    := a_hasData
      out.haddr     := a.bits.address | a_offset
      out.hsize     := Mux(a_singleBeat, a.bits.size, UInt(lgBytes))
      out.hburst    := Mux(a_singleBeat, BURST_SINGLE, (a_logBeats1<<1) | UInt(1))
      out.hprot     := PROT_DEFAULT
      out.hwdata    := RegEnable(a.bits.data, a.fire())
    }
    // Transform TL size into AHB hsize+hburst
    val a_size_bits = a_size.getWidth
    val a_sizeDelta = Cat(UInt(0, width = 1), a_size) - UInt(lgBytes+1)
    val a_singleBeat = a_sizeDelta(a_size_bits)
    val a_logBeats1 = a_sizeDelta(a_size_bits-1, 0)
    out.hmastlock := Bool(false) // for now
    out.htrans    := Mux(a_valid, Mux(a_first, TRANS_NONSEQ, TRANS_SEQ), Mux(a_first, TRANS_IDLE, TRANS_BUSY))
    out.hsel      := a_valid || !a_first
    out.hready    := out.hreadyout
    out.hwrite    := a_hasData
    out.haddr     := a.bits.address | a_offset
    out.hsize     := Mux(a_singleBeat, a.bits.size, UInt(lgBytes))
    out.hburst    := Mux(a_singleBeat, BURST_SINGLE, (a_logBeats1<<1) | UInt(1))
    out.hprot     := PROT_DEFAULT
    out.hwdata    := RegEnable(a.bits.data, a.fire())
  }
 }
--- a/src/main/scala/uncore/tilelink2/ToAPB.scala
+++ b/src/main/scala/uncore/tilelink2/ToAPB.scala
@ -10,12 +10,12 @@ import uncore.apb._
 import scala.math.{min, max}
 import APBParameters._
-case class TLToAPBNode() extends MixedNode(TLImp, APBImp)(
+case class TLToAPBNode() extends MixedAdapterNode(TLImp, APBImp)(
-  dFn = { case (1, Seq(TLClientPortParameters(clients, unsafeAtomics, minLatency))) =>
+  dFn = { case TLClientPortParameters(clients, unsafeAtomics, minLatency) =>
    val masters = clients.map { case c => APBMasterParameters(nodePath = c.nodePath) }
-    Seq(APBMasterPortParameters(masters))
+    APBMasterPortParameters(masters)
  },
-  uFn = { case (1, Seq(APBSlavePortParameters(slaves, beatBytes))) =>
+  uFn = { case APBSlavePortParameters(slaves, beatBytes) =>
    val managers = slaves.map { case s =>
      TLManagerParameters(
        address            = s.address,
@ -27,10 +27,8 @@ case class TLToAPBNode() extends MixedNode(TLImp, APBImp)(
        supportsPutFull    = if (s.supportsWrite) TransferSizes(1, beatBytes) else TransferSizes.none,
        fifoId             = Some(0)) // a common FIFO domain
    }
-    Seq(TLManagerPortParameters(managers, beatBytes, 1, 0))
+    TLManagerPortParameters(managers, beatBytes, 1, 0)
-  },
+  })
  numPO = 1 to 1,
  numPI = 1 to 1)
 class TLToAPB(combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
 {
@ -42,51 +40,49 @@ class TLToAPB(combinational: Boolean = true)(implicit p: Parameters) extends Laz
      val out = node.bundleOut
    }
-    val in = io.in(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val out = io.out(0)
+      val beatBytes = edgeOut.slave.beatBytes
-    val edgeIn  = node.edgesIn(0)
+      val lgBytes = log2Ceil(beatBytes)
    val edgeOut = node.edgesOut(0)
    val beatBytes = edgeOut.slave.beatBytes
    val lgBytes = log2Ceil(beatBytes)
-    // APB has no cache coherence
+      // APB has no cache coherence
-    in.b.valid := Bool(false)
+      in.b.valid := Bool(false)
-    in.c.ready := Bool(true)
+      in.c.ready := Bool(true)
-    in.e.ready := Bool(true)
+      in.e.ready := Bool(true)
-    // We need a skidpad to capture D output:
+      // We need a skidpad to capture D output:
-    // We cannot know if the D response will be accepted until we have
+      // We cannot know if the D response will be accepted until we have
-    // presented it on D as valid.  We also can't back-pressure APB in the
+      // presented it on D as valid.  We also can't back-pressure APB in the
-    // data phase.  Therefore, we must have enough space to save the data
+      // data phase.  Therefore, we must have enough space to save the data
-    // phase result.  Whenever we have a queued response, we can not allow
+      // phase result.  Whenever we have a queued response, we can not allow
-    // APB to present new responses, so we must quash the address phase.
+      // APB to present new responses, so we must quash the address phase.
-    val d = Wire(in.d)
+      val d = Wire(in.d)
-    in.d <> Queue(d, 1, flow = true)
+      in.d <> Queue(d, 1, flow = true)
-    // We need an irrevocable input for APB to stall
+      // We need an irrevocable input for APB to stall
-    val a = Queue(in.a, 1, flow = combinational, pipe = !combinational)
+      val a = Queue(in.a, 1, flow = combinational, pipe = !combinational)
-    val a_enable = RegInit(Bool(false))
+      val a_enable = RegInit(Bool(false))
-    val a_sel    = a.valid && RegNext(!in.d.valid || in.d.ready)
+      val a_sel    = a.valid && RegNext(!in.d.valid || in.d.ready)
-    val a_write  = edgeIn.hasData(a.bits)
+      val a_write  = edgeIn.hasData(a.bits)
-    when (a_sel)    { a_enable := Bool(true) }
+      when (a_sel)    { a_enable := Bool(true) }
-    when (d.fire()) { a_enable := Bool(false) }
+      when (d.fire()) { a_enable := Bool(false) }
-    out.psel    := a_sel
+      out.psel    := a_sel
-    out.penable := a_enable
+      out.penable := a_enable
-    out.pwrite  := a_write
+      out.pwrite  := a_write
-    out.paddr   := a.bits.address
+      out.paddr   := a.bits.address
-    out.pprot   := PROT_DEFAULT
+      out.pprot   := PROT_DEFAULT
-    out.pwdata  := a.bits.data
+      out.pwdata  := a.bits.data
-    out.pstrb   := Mux(a_write, a.bits.mask, UInt(0))
+      out.pstrb   := Mux(a_write, a.bits.mask, UInt(0))
-    a.ready := a_enable && out.pready
+      a.ready := a_enable && out.pready
-    d.valid := a_enable && out.pready
+      d.valid := a_enable && out.pready
-    assert (!d.valid || d.ready)
+      assert (!d.valid || d.ready)
-    d.bits := edgeIn.AccessAck(a.bits, UInt(0), out.prdata, out.pslverr)
+      d.bits := edgeIn.AccessAck(a.bits, UInt(0), out.prdata, out.pslverr)
-    d.bits.opcode := Mux(a_write, TLMessages.AccessAck, TLMessages.AccessAckData)
+      d.bits.opcode := Mux(a_write, TLMessages.AccessAck, TLMessages.AccessAckData)
    }
  }
 }
--- a/src/main/scala/uncore/tilelink2/ToAXI4.scala
+++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala
@ -10,16 +10,16 @@ import util.PositionalMultiQueue
 import uncore.axi4._
 import scala.math.{min, max}
-case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)(
+case class TLToAXI4Node(idBits: Int) extends MixedAdapterNode(TLImp, AXI4Imp)(
-  dFn = { case (1, _) =>
+  dFn = { _ =>
    // We must erase all client information, because we crush their source Ids
    val masters = Seq(
      AXI4MasterParameters(
        id      = IdRange(0, 1 << idBits),
        aligned = true))
-    Seq(AXI4MasterPortParameters(masters))
+    AXI4MasterPortParameters(masters)
  },
-  uFn = { case (1, Seq(p)) => Seq(TLManagerPortParameters(
+  uFn = { p => TLManagerPortParameters(
    managers = p.slaves.map { case s =>
      TLManagerParameters(
        address            = s.address,
@ -31,10 +31,8 @@ case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)(
        supportsPutPartial = s.supportsWrite)},
        // AXI4 is NEVER fifo in TL sense (R+W are independent)
      beatBytes = p.beatBytes,
-      minLatency = p.minLatency))
+      minLatency = p.minLatency)
-  },
+  })
  numPO = 1 to 1,
  numPI = 1 to 1)
 class TLToAXI4(idBits: Int, combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
 {
@ -46,185 +44,182 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true)(implicit p: Parameter
      val out = node.bundleOut
    }
-    val in = io.in(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val out = io.out(0)
+      val slaves  = edgeOut.slave.slaves
-    val edgeIn  = node.edgesIn(0)
+      // All pairs of slaves must promise that they will never interleave data
-    val edgeOut = node.edgesOut(0)
+      require (slaves(0).interleavedId.isDefined)
-    val slaves  = edgeOut.slave.slaves
+      slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) }
-    // All pairs of slaves must promise that they will never interleave data
+      // We need to ensure that a slave does not stall trying to send B while we need to receive R
-    require (slaves(0).interleavedId.isDefined)
+      // Since R&W have independent flow control, it is possible for a W to cut in-line and get into
-    slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) }
+      // a slave's buffers, preventing us from getting all the R responses we need to release D for B.
      // This risk is compounded by an AXI fragmentation. Even a slave which responds completely to
      // AR before working on AW might have an AW slipped between two AR fragments.
      val out_b = Queue.irrevocable(out.b, entries=edgeIn.client.endSourceId, flow=combinational)
-    // We need to ensure that a slave does not stall trying to send B while we need to receive R
+      // We need to keep the following state from A => D: (addr_lo, size, source)
-    // Since R&W have independent flow control, it is possible for a W to cut in-line and get into
+      // All of those fields could potentially require 0 bits (argh. Chisel.)
-    // a slave's buffers, preventing us from getting all the R responses we need to release D for B.
+      // We will pack as many of the lowest bits of state as fit into the AXI ID.
-    // This risk is compounded by an AXI fragmentation. Even a slave which responds completely to
+      // Any bits left-over must be put into a bank of Queues.
-    // AR before working on AW might have an AW slipped between two AR fragments.
+      // The Queues are indexed by as many of the source bits as fit into the AXI ID.
-    val out_b = Queue.irrevocable(out.b, entries=edgeIn.client.endSourceId, flow=combinational)
+      // The Queues are deep enough that every source has guaranteed space in its Queue.
-    // We need to keep the following state from A => D: (addr_lo, size, source)
+      val sourceBits = log2Ceil(edgeIn.client.endSourceId)
-    // All of those fields could potentially require 0 bits (argh. Chisel.)
+      val sizeBits = log2Ceil(edgeIn.maxLgSize+1)
-    // We will pack as many of the lowest bits of state as fit into the AXI ID.
+      val addrBits = log2Ceil(edgeIn.manager.beatBytes)
-    // Any bits left-over must be put into a bank of Queues.
+      val stateBits = addrBits + sizeBits + sourceBits // could be 0
    // The Queues are indexed by as many of the source bits as fit into the AXI ID.
    // The Queues are deep enough that every source has guaranteed space in its Queue.
-    val sourceBits = log2Ceil(edgeIn.client.endSourceId)
+      val a_address = edgeIn.address(in.a.bits)
-    val sizeBits = log2Ceil(edgeIn.maxLgSize+1)
+      val a_addr_lo = edgeIn.addr_lo(a_address)
-    val addrBits = log2Ceil(edgeIn.manager.beatBytes)
+      val a_source  = in.a.bits.source
-    val stateBits = addrBits + sizeBits + sourceBits // could be 0
+      val a_size    = edgeIn.size(in.a.bits)
      val a_isPut   = edgeIn.hasData(in.a.bits)
      val a_last    = edgeIn.last(in.a)
-    val a_address = edgeIn.address(in.a.bits)
+      // Make sure the fields are within the bounds we assumed
-    val a_addr_lo = edgeIn.addr_lo(a_address)
+      assert (a_source  < UInt(BigInt(1) << sourceBits))
-    val a_source  = in.a.bits.source
+      assert (a_size    < UInt(BigInt(1) << sizeBits))
-    val a_size    = edgeIn.size(in.a.bits)
+      assert (a_addr_lo < UInt(BigInt(1) << addrBits))
    val a_isPut   = edgeIn.hasData(in.a.bits)
    val a_last    = edgeIn.last(in.a)
-    // Make sure the fields are within the bounds we assumed
+      // Carefully pack/unpack fields into the state we send
-    assert (a_source  < UInt(BigInt(1) << sourceBits))
+      val baseEnd = 0
-    assert (a_size    < UInt(BigInt(1) << sizeBits))
+      val (sourceEnd, sourceOff) = (sourceBits + baseEnd,   baseEnd)
-    assert (a_addr_lo < UInt(BigInt(1) << addrBits))
+      val (sizeEnd,   sizeOff)   = (sizeBits   + sourceEnd, sourceEnd)
      val (addrEnd,   addrOff)   = (addrBits   + sizeEnd,   sizeEnd)
      require (addrEnd == stateBits)
-    // Carefully pack/unpack fields into the state we send
+      val a_state = (a_source << sourceOff) | (a_size << sizeOff) | (a_addr_lo << addrOff)
-    val baseEnd = 0
+      val a_id = if (idBits == 0) UInt(0) else a_state
    val (sourceEnd, sourceOff) = (sourceBits + baseEnd,   baseEnd)
    val (sizeEnd,   sizeOff)   = (sizeBits   + sourceEnd, sourceEnd)
    val (addrEnd,   addrOff)   = (addrBits   + sizeEnd,   sizeEnd)
    require (addrEnd == stateBits)
-    val a_state = (a_source << sourceOff) | (a_size << sizeOff) | (a_addr_lo << addrOff)
+      val r_state = Wire(UInt(width = stateBits))
-    val a_id = if (idBits == 0) UInt(0) else a_state
+      val r_source  = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0)
      val r_size    = if (sizeBits   > 0) r_state(sizeEnd  -1, sizeOff)   else UInt(0)
      val r_addr_lo = if (addrBits   > 0) r_state(addrEnd  -1, addrOff)   else UInt(0)
-    val r_state = Wire(UInt(width = stateBits))
+      val b_state = Wire(UInt(width = stateBits))
-    val r_source  = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0)
+      val b_source  = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0)
-    val r_size    = if (sizeBits   > 0) r_state(sizeEnd  -1, sizeOff)   else UInt(0)
+      val b_size    = if (sizeBits   > 0) b_state(sizeEnd  -1, sizeOff)   else UInt(0)
-    val r_addr_lo = if (addrBits   > 0) r_state(addrEnd  -1, addrOff)   else UInt(0)
+      val b_addr_lo = if (addrBits   > 0) b_state(addrEnd  -1, addrOff)   else UInt(0)
-    val b_state = Wire(UInt(width = stateBits))
+      val r_last = out.r.bits.last
-    val b_source  = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0)
+      val r_id = out.r.bits.id
-    val b_size    = if (sizeBits   > 0) b_state(sizeEnd  -1, sizeOff)   else UInt(0)
+      val b_id = out_b.bits.id
    val b_addr_lo = if (addrBits   > 0) b_state(addrEnd  -1, addrOff)   else UInt(0)
-    val r_last = out.r.bits.last
+      if (stateBits <= idBits) { // No need for any state tracking
-    val r_id = out.r.bits.id
+        r_state := r_id
-    val b_id = out_b.bits.id
+        b_state := b_id
      } else {
        val bankIndexBits = min(sourceBits, idBits)
        val posBits = max(0, sourceBits - idBits)
        val implicitBits = max(idBits, sourceBits)
        val bankBits = stateBits - implicitBits
        val numBanks = min(1 << bankIndexBits, edgeIn.client.endSourceId)
        def bankEntries(i: Int) = (edgeIn.client.endSourceId+numBanks-i-1) / numBanks
-    if (stateBits <= idBits) { // No need for any state tracking
+        val banks = Seq.tabulate(numBanks) { i =>
-      r_state := r_id
+          // We know there can only be as many outstanding requests as TL sources
-      b_state := b_id
+          // However, AXI read and write queues are not mutually FIFO.
-    } else {
+          // Therefore, we want to pop them individually, but share the storage.
-      val bankIndexBits = min(sourceBits, idBits)
+          val bypass = combinational && edgeOut.slave.minLatency == 0
-      val posBits = max(0, sourceBits - idBits)
+          PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass)
-      val implicitBits = max(idBits, sourceBits)
+        }
      val bankBits = stateBits - implicitBits
      val numBanks = min(1 << bankIndexBits, edgeIn.client.endSourceId)
      def bankEntries(i: Int) = (edgeIn.client.endSourceId+numBanks-i-1) / numBanks
-      val banks = Seq.tabulate(numBanks) { i =>
+        val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits)
-        // We know there can only be as many outstanding requests as TL sources
+        val a_bankIndex = if (bankIndexBits == 0) UInt(0) else a_source(bankIndexBits-1, 0)
-        // However, AXI read and write queues are not mutually FIFO.
+        val r_bankIndex = if (bankIndexBits == 0) UInt(0) else r_id(bankIndexBits-1, 0)
-        // Therefore, we want to pop them individually, but share the storage.
+        val b_bankIndex = if (bankIndexBits == 0) UInt(0) else b_id(bankIndexBits-1, 0)
-        val bypass = combinational && edgeOut.slave.minLatency == 0
+        val a_bankSelect = UIntToOH(a_bankIndex, numBanks)
-        PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass)
+        val r_bankSelect = UIntToOH(r_bankIndex, numBanks)
        val b_bankSelect = UIntToOH(b_bankIndex, numBanks)
        banks.zipWithIndex.foreach { case (q, i) =>
          // Push a_state into the banks
          q.io.enq.valid := in.a.fire() && a_last && a_bankSelect(i)
          q.io.enq.bits.pos  := a_bankPosition
          q.io.enq.bits.data := a_state >> implicitBits
          q.io.enq.bits.way  := Mux(a_isPut, UInt(0), UInt(1))
          // Pop the bank's ways
          q.io.deq(0).ready := out_b.fire() && b_bankSelect(i)
          q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last
          // The FIFOs must be valid when we're ready to pop them...
          assert (q.io.deq(0).valid || !q.io.deq(0).ready)
          assert (q.io.deq(1).valid || !q.io.deq(1).ready)
        }
        val b_bankData = Vec(banks.map(_.io.deq(0).bits.data))(b_bankIndex)
        val b_bankPos  = Vec(banks.map(_.io.deq(0).bits.pos ))(b_bankIndex)
        val r_bankData = Vec(banks.map(_.io.deq(1).bits.data))(r_bankIndex)
        val r_bankPos  = Vec(banks.map(_.io.deq(1).bits.pos ))(r_bankIndex)
        def optCat(x: (Boolean, UInt)*) = { Cat(x.toList.filter(_._1).map(_._2)) }
        b_state := optCat((bankBits > 0, b_bankData), (posBits > 0, b_bankPos), (idBits > 0, b_id))
        r_state := optCat((bankBits > 0, r_bankData), (posBits > 0, r_bankPos), (idBits > 0, r_id))
      }
-      val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits)
+      // We need these Queues because AXI4 queues are irrevocable
-      val a_bankIndex = if (bankIndexBits == 0) UInt(0) else a_source(bankIndexBits-1, 0)
+      val depth = if (combinational) 1 else 2
-      val r_bankIndex = if (bankIndexBits == 0) UInt(0) else r_id(bankIndexBits-1, 0)
+      val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params)))
-      val b_bankIndex = if (bankIndexBits == 0) UInt(0) else b_id(bankIndexBits-1, 0)
+      val out_w = Wire(out.w)
-      val a_bankSelect = UIntToOH(a_bankIndex, numBanks)
+      out.w <> Queue.irrevocable(out_w, entries=depth, flow=combinational)
-      val r_bankSelect = UIntToOH(r_bankIndex, numBanks)
+      val queue_arw = Queue.irrevocable(out_arw, entries=depth, flow=combinational)
      val b_bankSelect = UIntToOH(b_bankIndex, numBanks)
-      banks.zipWithIndex.foreach { case (q, i) =>
+      // Fan out the ARW channel to AR and AW
-        // Push a_state into the banks
+      out.ar.bits := queue_arw.bits
-        q.io.enq.valid := in.a.fire() && a_last && a_bankSelect(i)
+      out.aw.bits := queue_arw.bits
-        q.io.enq.bits.pos  := a_bankPosition
+      out.ar.valid := queue_arw.valid && !queue_arw.bits.wen
-        q.io.enq.bits.data := a_state >> implicitBits
+      out.aw.valid := queue_arw.valid &&  queue_arw.bits.wen
-        q.io.enq.bits.way  := Mux(a_isPut, UInt(0), UInt(1))
+      queue_arw.ready := Mux(queue_arw.bits.wen, out.aw.ready, out.ar.ready)
        // Pop the bank's ways
        q.io.deq(0).ready := out_b.fire() && b_bankSelect(i)
        q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last
        // The FIFOs must be valid when we're ready to pop them...
        assert (q.io.deq(0).valid || !q.io.deq(0).ready)
        assert (q.io.deq(1).valid || !q.io.deq(1).ready)
      }
-      val b_bankData = Vec(banks.map(_.io.deq(0).bits.data))(b_bankIndex)
+      val beatBytes = edgeIn.manager.beatBytes
-      val b_bankPos  = Vec(banks.map(_.io.deq(0).bits.pos ))(b_bankIndex)
+      val maxSize   = UInt(log2Ceil(beatBytes))
-      val r_bankData = Vec(banks.map(_.io.deq(1).bits.data))(r_bankIndex)
+      val doneAW    = RegInit(Bool(false))
-      val r_bankPos  = Vec(banks.map(_.io.deq(1).bits.pos ))(r_bankIndex)
+      when (in.a.fire()) { doneAW := !a_last }
-      def optCat(x: (Boolean, UInt)*) = { Cat(x.toList.filter(_._1).map(_._2)) }
+      val arw = out_arw.bits
-      b_state := optCat((bankBits > 0, b_bankData), (posBits > 0, b_bankPos), (idBits > 0, b_id))
+      arw.wen   := a_isPut
-      r_state := optCat((bankBits > 0, r_bankData), (posBits > 0, r_bankPos), (idBits > 0, r_id))
+      arw.id    := a_id // truncated
      arw.addr  := a_address
      arw.len   := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes)
      arw.size  := Mux(a_size >= maxSize, maxSize, a_size)
      arw.burst := AXI4Parameters.BURST_INCR
      arw.lock  := UInt(0) // not exclusive (LR/SC unsupported b/c no forward progress guarantee)
      arw.cache := UInt(0) // do not allow AXI to modify our transactions
      arw.prot  := AXI4Parameters.PROT_PRIVILEDGED
      arw.qos   := UInt(0) // no QoS
      in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready)
      out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true))
      out_w.valid := in.a.valid && a_isPut && (doneAW || out_arw.ready)
      out_w.bits.data := in.a.bits.data
      out_w.bits.strb := in.a.bits.mask
      out_w.bits.last := a_last
      // R and B => D arbitration
      val r_holds_d = RegInit(Bool(false))
      when (out.r.fire()) { r_holds_d := !out.r.bits.last }
      // Give R higher priority than B
      val r_wins = out.r.valid || r_holds_d
      out.r.ready := in.d.ready
      out_b.ready := in.d.ready && !r_wins
      in.d.valid := Mux(r_wins, out.r.valid, out_b.valid)
      val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY
      val b_error = out_b.bits.resp =/= AXI4Parameters.RESP_OKAY
      val r_d = edgeIn.AccessAck(r_addr_lo, UInt(0), r_source, r_size, UInt(0), r_error)
      val b_d = edgeIn.AccessAck(b_addr_lo, UInt(0), b_source, b_size, b_error)
      in.d.bits := Mux(r_wins, r_d, b_d)
      in.d.bits.data := out.r.bits.data // avoid a costly Mux
      // Tie off unused channels
      in.b.valid := Bool(false)
      in.c.ready := Bool(true)
      in.e.ready := Bool(true)
    }
    // We need these Queues because AXI4 queues are irrevocable
    val depth = if (combinational) 1 else 2
    val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params)))
    val out_w = Wire(out.w)
    out.w <> Queue.irrevocable(out_w, entries=depth, flow=combinational)
    val queue_arw = Queue.irrevocable(out_arw, entries=depth, flow=combinational)
    // Fan out the ARW channel to AR and AW
    out.ar.bits := queue_arw.bits
    out.aw.bits := queue_arw.bits
    out.ar.valid := queue_arw.valid && !queue_arw.bits.wen
    out.aw.valid := queue_arw.valid &&  queue_arw.bits.wen
    queue_arw.ready := Mux(queue_arw.bits.wen, out.aw.ready, out.ar.ready)
    val beatBytes = edgeIn.manager.beatBytes
    val maxSize   = UInt(log2Ceil(beatBytes))
    val doneAW    = RegInit(Bool(false))
    when (in.a.fire()) { doneAW := !a_last }
    val arw = out_arw.bits
    arw.wen   := a_isPut
    arw.id    := a_id // truncated
    arw.addr  := a_address
    arw.len   := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes)
    arw.size  := Mux(a_size >= maxSize, maxSize, a_size)
    arw.burst := AXI4Parameters.BURST_INCR
    arw.lock  := UInt(0) // not exclusive (LR/SC unsupported b/c no forward progress guarantee)
    arw.cache := UInt(0) // do not allow AXI to modify our transactions
    arw.prot  := AXI4Parameters.PROT_PRIVILEDGED
    arw.qos   := UInt(0) // no QoS
    in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready)
    out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true))
    out_w.valid := in.a.valid && a_isPut && (doneAW || out_arw.ready)
    out_w.bits.data := in.a.bits.data
    out_w.bits.strb := in.a.bits.mask
    out_w.bits.last := a_last
    // R and B => D arbitration
    val r_holds_d = RegInit(Bool(false))
    when (out.r.fire()) { r_holds_d := !out.r.bits.last }
    // Give R higher priority than B
    val r_wins = out.r.valid || r_holds_d
    out.r.ready := in.d.ready
    out_b.ready := in.d.ready && !r_wins
    in.d.valid := Mux(r_wins, out.r.valid, out_b.valid)
    val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY
    val b_error = out_b.bits.resp =/= AXI4Parameters.RESP_OKAY
    val r_d = edgeIn.AccessAck(r_addr_lo, UInt(0), r_source, r_size, UInt(0), r_error)
    val b_d = edgeIn.AccessAck(b_addr_lo, UInt(0), b_source, b_size, b_error)
    in.d.bits := Mux(r_wins, r_d, b_d)
    in.d.bits.data := out.r.bits.data // avoid a costly Mux
    // Tie off unused channels
    in.b.valid := Bool(false)
    in.c.ready := Bool(true)
    in.e.ready := Bool(true)
  }
 }
--- a/src/main/scala/uncore/tilelink2/WidthWidget.scala
+++ b/src/main/scala/uncore/tilelink2/WidthWidget.scala
@ -12,8 +12,8 @@ import scala.math.{min,max}
 class TLWidthWidget(innerBeatBytes: Int)(implicit p: Parameters) extends LazyModule
 {
  val node = TLAdapterNode(
-    clientFn  = { case Seq(c) => c },
+    clientFn  = { case c => c },
-    managerFn = { case Seq(m) => m.copy(beatBytes = innerBeatBytes) })
+    managerFn = { case m => m.copy(beatBytes = innerBeatBytes) })
  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
@ -139,27 +139,24 @@ class TLWidthWidget(innerBeatBytes: Int)(implicit p: Parameters) extends LazyMod
      }
    }
-    val edgeOut = node.edgesOut(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
-    val edgeIn = node.edgesIn(0)
+      splice(edgeIn,  in.a,  edgeOut, out.a)
-    val in = io.in(0)
+      splice(edgeOut, out.d, edgeIn,  in.d)
    val out = io.out(0)
-    splice(edgeIn,  in.a,  edgeOut, out.a)
+      if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) {
-    splice(edgeOut, out.d, edgeIn,  in.d)
+        splice(edgeOut, out.b, edgeIn,  in.b)
-
+        splice(edgeIn,  in.c,  edgeOut, out.c)
-    if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) {
+        in.e.ready := out.e.ready
-      splice(edgeOut, out.b, edgeIn,  in.b)
+        out.e.valid := in.e.valid
-      splice(edgeIn,  in.c,  edgeOut, out.c)
+        out.e.bits := in.e.bits
-      in.e.ready := out.e.ready
+      } else {
-      out.e.valid := in.e.valid
+        in.b.valid := Bool(false)
-      out.e.bits := in.e.bits
+        in.c.ready := Bool(true)
-    } else {
+        in.e.ready := Bool(true)
-      in.b.valid := Bool(false)
+        out.b.ready := Bool(true)
-      in.c.ready := Bool(true)
+        out.c.valid := Bool(false)
-      in.e.ready := Bool(true)
+        out.e.valid := Bool(false)
-      out.b.ready := Bool(true)
+      }
      out.c.valid := Bool(false)
      out.e.valid := Bool(false)
    }
  }
 }
--- a/src/main/scala/uncore/tilelink2/Xbar.scala
+++ b/src/main/scala/uncore/tilelink2/Xbar.scala
@ -34,7 +34,7 @@ class TLXbar(policy: TLArbiter.Policy = TLArbiter.lowestIndexFirst)(implicit p:
    }
  }
-  val node = TLAdapterNode(
+  val node = TLNexusNode(
    numClientPorts  = 1 to 32,
    numManagerPorts = 1 to 32,
    clientFn  = { seq =>