uncore: switch to new diplomacy Node API

Most adapters should work on multiple ports. This patch changes them all.
2017-01-29 15:17:52 -08:00
parent 4d646939b0
commit 972953868c
34 changed files with 1681 additions and 1722 deletions
--- a/src/main/scala/rocket/ScratchpadSlavePort.scala
+++ b/src/main/scala/rocket/ScratchpadSlavePort.scala
@@ -13,7 +13,7 @@ import uncore.util._

 class ScratchpadSlavePort(implicit p: Parameters) extends LazyModule {
  val coreDataBytes = p(XLen)/8
-  val node = TLManagerNode(TLManagerPortParameters(
+  val node = TLManagerNode(Seq(TLManagerPortParameters(
    Seq(TLManagerParameters(
      address            = List(AddressSet(0x80000000L, BigInt(p(DataScratchpadSize)-1))),
      regionType         = RegionType.UNCACHED,
@@ -25,7 +25,7 @@ class ScratchpadSlavePort(implicit p: Parameters) extends LazyModule {
      supportsGet        = TransferSizes(1, coreDataBytes),
      fifoId             = Some(0))), // requests handled in FIFO order
    beatBytes = coreDataBytes,
-    minLatency = 1))
+    minLatency = 1)))

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
--- a/src/main/scala/uncore/ahb/Nodes.scala
+++ b/src/main/scala/uncore/ahb/Nodes.scala
@@ -31,16 +31,14 @@ object AHBImp extends NodeImp[AHBMasterPortParameters, AHBSlavePortParameters, A

 // Nodes implemented inside modules
 case class AHBIdentityNode() extends IdentityNode(AHBImp)
-case class AHBMasterNode(portParams: AHBMasterPortParameters, numPorts: Range.Inclusive = 1 to 1)
-  extends SourceNode(AHBImp)(portParams, numPorts)
-case class AHBSlaveNode(portParams: AHBSlavePortParameters, numPorts: Range.Inclusive = 1 to 1)
-  extends SinkNode(AHBImp)(portParams, numPorts)
-case class AHBAdapterNode(
-  masterFn:       Seq[AHBMasterPortParameters]  => AHBMasterPortParameters,
-  slaveFn:        Seq[AHBSlavePortParameters] => AHBSlavePortParameters,
-  numMasterPorts: Range.Inclusive = 1 to 1,
-  numSlavePorts:  Range.Inclusive = 1 to 1)
-  extends InteriorNode(AHBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
+case class AHBMasterNode(portParams: Seq[AHBMasterPortParameters]) extends SourceNode(AHBImp)(portParams)
+case class AHBSlaveNode(portParams: Seq[AHBSlavePortParameters]) extends SinkNode(AHBImp)(portParams)
+case class AHBNexusNode(
+  masterFn:       Seq[AHBMasterPortParameters] => AHBMasterPortParameters,
+  slaveFn:        Seq[AHBSlavePortParameters]  => AHBSlavePortParameters,
+  numMasterPorts: Range.Inclusive = 1 to 999,
+  numSlavePorts:  Range.Inclusive = 1 to 999)
+  extends NexusNode(AHBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)

 // Nodes passed from an inner module
 case class AHBOutputNode() extends OutputNode(AHBImp)
--- a/src/main/scala/uncore/ahb/RegisterRouter.scala
+++ b/src/main/scala/uncore/ahb/RegisterRouter.scala
@@ -9,13 +9,13 @@ import regmapper._
 import scala.math.{min,max}

 class AHBRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
-  extends AHBSlaveNode(AHBSlavePortParameters(
+  extends AHBSlaveNode(Seq(AHBSlavePortParameters(
    Seq(AHBSlaveParameters(
      address       = Seq(address),
      executable    = executable,
      supportsWrite = TransferSizes(1, min(address.alignment.toInt, beatBytes * AHBParameters.maxTransfer)),
      supportsRead  = TransferSizes(1, min(address.alignment.toInt, beatBytes * AHBParameters.maxTransfer)))),
-    beatBytes  = beatBytes))
+    beatBytes  = beatBytes)))
 {
  require (address.contiguous)

--- a/src/main/scala/uncore/ahb/SRAM.scala
+++ b/src/main/scala/uncore/ahb/SRAM.scala
@@ -8,14 +8,14 @@ import diplomacy._

 class AHBRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
 {
-  val node = AHBSlaveNode(AHBSlavePortParameters(
+  val node = AHBSlaveNode(Seq(AHBSlavePortParameters(
    Seq(AHBSlaveParameters(
      address       = List(address),
      regionType    = RegionType.UNCACHED,
      executable    = executable,
      supportsRead  = TransferSizes(1, beatBytes * AHBParameters.maxTransfer),
      supportsWrite = TransferSizes(1, beatBytes * AHBParameters.maxTransfer))),
-    beatBytes  = beatBytes))
+    beatBytes  = beatBytes)))

  // We require the address range to include an entire beat (for the write mask)
  require ((address.mask & (beatBytes-1)) == beatBytes-1)
--- a/src/main/scala/uncore/ahb/Xbar.scala
+++ b/src/main/scala/uncore/ahb/Xbar.scala
@@ -9,7 +9,7 @@ import regmapper._
 import scala.math.{min,max}

 class AHBFanout()(implicit p: Parameters) extends LazyModule {
-  val node = AHBAdapterNode(
+  val node = AHBNexusNode(
    numSlavePorts  = 1 to 1,
    numMasterPorts = 1 to 32,
    masterFn = { case Seq(m) => m },
--- a/src/main/scala/uncore/apb/Nodes.scala
+++ b/src/main/scala/uncore/apb/Nodes.scala
@@ -31,16 +31,14 @@ object APBImp extends NodeImp[APBMasterPortParameters, APBSlavePortParameters, A

 // Nodes implemented inside modules
 case class APBIdentityNode() extends IdentityNode(APBImp)
-case class APBMasterNode(portParams: APBMasterPortParameters, numPorts: Range.Inclusive = 1 to 1)
-  extends SourceNode(APBImp)(portParams, numPorts)
-case class APBSlaveNode(portParams: APBSlavePortParameters, numPorts: Range.Inclusive = 1 to 1)
-  extends SinkNode(APBImp)(portParams, numPorts)
-case class APBAdapterNode(
-  masterFn:       Seq[APBMasterPortParameters]  => APBMasterPortParameters,
-  slaveFn:        Seq[APBSlavePortParameters] => APBSlavePortParameters,
+case class APBMasterNode(portParams: Seq[APBMasterPortParameters]) extends SourceNode(APBImp)(portParams)
+case class APBSlaveNode(portParams: Seq[APBSlavePortParameters]) extends SinkNode(APBImp)(portParams)
+case class APBNexusNode(
+  masterFn:       Seq[APBMasterPortParameters] => APBMasterPortParameters,
+  slaveFn:        Seq[APBSlavePortParameters]  => APBSlavePortParameters,
  numMasterPorts: Range.Inclusive = 1 to 1,
  numSlavePorts:  Range.Inclusive = 1 to 1)
-  extends InteriorNode(APBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
+  extends NexusNode(APBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)

 // Nodes passed from an inner module
 case class APBOutputNode() extends OutputNode(APBImp)
--- a/src/main/scala/uncore/apb/RegisterRouter.scala
+++ b/src/main/scala/uncore/apb/RegisterRouter.scala
@@ -9,13 +9,13 @@ import regmapper._
 import scala.math.{min,max}

 class APBRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
-  extends APBSlaveNode(APBSlavePortParameters(
+  extends APBSlaveNode(Seq(APBSlavePortParameters(
    Seq(APBSlaveParameters(
      address       = Seq(address),
      executable    = executable,
      supportsWrite = true,
      supportsRead  = true)),
-    beatBytes  = beatBytes))
+    beatBytes  = beatBytes)))
 {
  require (address.contiguous)

--- a/src/main/scala/uncore/apb/SRAM.scala
+++ b/src/main/scala/uncore/apb/SRAM.scala
@@ -8,14 +8,14 @@ import diplomacy._

 class APBRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
 {
-  val node = APBSlaveNode(APBSlavePortParameters(
+  val node = APBSlaveNode(Seq(APBSlavePortParameters(
    Seq(APBSlaveParameters(
      address       = List(address),
      regionType    = RegionType.UNCACHED,
      executable    = executable,
      supportsRead  = true,
      supportsWrite = true)),
-    beatBytes  = beatBytes))
+    beatBytes  = beatBytes)))

  // We require the address range to include an entire beat (for the write mask)
  require ((address.mask & (beatBytes-1)) == beatBytes-1)
--- a/src/main/scala/uncore/apb/Xbar.scala
+++ b/src/main/scala/uncore/apb/Xbar.scala
@@ -9,7 +9,7 @@ import regmapper._
 import scala.math.{min,max}

 class APBFanout()(implicit p: Parameters) extends LazyModule {
-  val node = APBAdapterNode(
+  val node = APBNexusNode(
    numSlavePorts  = 1 to 1,
    numMasterPorts = 1 to 32,
    masterFn = { case Seq(m) => m },
--- a/src/main/scala/uncore/axi4/Buffer.scala
+++ b/src/main/scala/uncore/axi4/Buffer.scala
@@ -18,8 +18,8 @@ class AXI4Buffer(aw: Int = 2, w: Int = 2, b: Int = 2, ar: Int = 2, r: Int = 2, p
  require (r  >= 0)

  val node = AXI4AdapterNode(
-    masterFn = { case Seq(p) => p },
-    slaveFn  = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,min(aw,ar)) + min(1,min(r,b))) })
+    masterFn = { p => p },
+    slaveFn  = { p => p.copy(minLatency = p.minLatency + min(1,min(aw,ar)) + min(1,min(r,b))) })

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
--- a/src/main/scala/uncore/axi4/Fragmenter.scala
+++ b/src/main/scala/uncore/axi4/Fragmenter.scala
@@ -23,8 +23,8 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio
  def mapMaster(m: AXI4MasterParameters) = m.copy(aligned = true)

  val node = AXI4AdapterNode(
-    masterFn = { case Seq(mp) => mp.copy(masters = mp.masters.map(m => mapMaster(m))) },
-    slaveFn  = { case Seq(sp) => sp.copy(slaves  = sp.slaves .map(s => mapSlave(s, sp.beatBytes))) })
+    masterFn = { mp => mp.copy(masters = mp.masters.map(m => mapMaster(m))) },
+    slaveFn  = { sp => sp.copy(slaves  = sp.slaves .map(s => mapSlave(s, sp.beatBytes))) })

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
@@ -32,256 +32,253 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio
      val out = node.bundleOut
    }

-    val edgeOut   = node.edgesOut(0)
-    val edgeIn    = node.edgesIn(0)
-    val slave     = edgeOut.slave
-    val slaves    = slave.slaves
-    val beatBytes = slave.beatBytes
-    val lgBytes   = log2Ceil(beatBytes)
-    val master    = edgeIn.master
-    val masters   = master.masters
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      val slave     = edgeOut.slave
+      val slaves    = slave.slaves
+      val beatBytes = slave.beatBytes
+      val lgBytes   = log2Ceil(beatBytes)
+      val master    = edgeIn.master
+      val masters   = master.masters

-    // If the user claimed this was a lite interface, then there must be only one Id
-    require (!lite || master.endId == 1)
+      // If the user claimed this was a lite interface, then there must be only one Id
+      require (!lite || master.endId == 1)

-    // We don't support fragmenting to sub-beat accesses
-    slaves.foreach { s =>
-      require (!s.supportsRead  || s.supportsRead.contains(beatBytes))
-      require (!s.supportsWrite || s.supportsWrite.contains(beatBytes))
-    }
+      // We don't support fragmenting to sub-beat accesses
+      slaves.foreach { s =>
+        require (!s.supportsRead  || s.supportsRead.contains(beatBytes))
+        require (!s.supportsWrite || s.supportsWrite.contains(beatBytes))
+      }

-    /* We need to decompose a request into 
-     *   FIXED => each beat is a new request
-     *   WRAP/INCR => take xfr up to next power of two, capped by max size of target
-     *
-     * On AR and AW, we fragment one request into many
-     * On W we set 'last' on beats which are fragment boundaries
-     * On R we clear 'last' on the fragments being reassembled
-     * On B we clear 'valid' on the responses for the injected fragments
-     *
-     * AR=>R and AW+W=>B are completely independent state machines.
-     */
-
-    /* Returns the number of beats to execute and the new address */
-    def fragment(a: IrrevocableIO[AXI4BundleA], supportedSizes1: Seq[Int]): (IrrevocableIO[AXI4BundleA], Bool, UInt) = {
-      val out = Wire(a)
-
-      val busy   = RegInit(Bool(false))
-      val r_addr = Reg(UInt(width = a.bits.params.addrBits))
-      val r_len  = Reg(UInt(width = AXI4Parameters.lenBits))
-
-      val len  = Mux(busy, r_len,  a.bits.len)
-      val addr = Mux(busy, r_addr, a.bits.addr)
-
-      val lo = if (lgBytes == 0) UInt(0) else addr(lgBytes-1, 0)
-      val hi = addr >> lgBytes
-      val alignment = hi(AXI4Parameters.lenBits-1,0)
-
-      val allSame = supportedSizes1.filter(_ >= 0).distinct.size <= 1
-      val dynamic1 = Mux1H(slave.findFast(addr), supportedSizes1.map(s => UInt(max(0, s))))
-      val fixed1 = UInt(supportedSizes1.filter(_ >= 0).headOption.getOrElse(0))
-
-      /* We need to compute the largest transfer allowed by the AXI len.
-       * len+1 is the number of beats to execute.
-       * We want the MSB(len+1)-1; one less than the largest power of two we could execute.
-       * There are two cases; either len is 2^n-1 in which case we leave it unchanged, ELSE
-       *   fill the bits from highest to lowest, and shift right by one bit.
+      /* We need to decompose a request into 
+       *   FIXED => each beat is a new request
+       *   WRAP/INCR => take xfr up to next power of two, capped by max size of target
+       *
+       * On AR and AW, we fragment one request into many
+       * On W we set 'last' on beats which are fragment boundaries
+       * On R we clear 'last' on the fragments being reassembled
+       * On B we clear 'valid' on the responses for the injected fragments
+       *
+       * AR=>R and AW+W=>B are completely independent state machines.
       */
-      val fillLow  = rightOR(len) >> 1   // set   all bits in positions <  a set     bit
-      val wipeHigh = ~leftOR(~len)       // clear all bits in position  >= a cleared bit
-      val remain1  = fillLow | wipeHigh  // MSB(a.len+1)-1
-      val align1   = ~leftOR(alignment)  // transfer size limited by address alignment
-      val support1 = if (allSame) fixed1 else dynamic1 // maximum supported size-1 based on target address
-      val maxSupported1 = remain1 & align1 & support1 // Take the minimum of all the limits

-      // Things that cause us to degenerate to a single beat
-      val fixed = a.bits.burst === AXI4Parameters.BURST_FIXED
-      val narrow = a.bits.size =/= UInt(lgBytes)
-      val bad = fixed || narrow
+      /* Returns the number of beats to execute and the new address */
+      def fragment(a: IrrevocableIO[AXI4BundleA], supportedSizes1: Seq[Int]): (IrrevocableIO[AXI4BundleA], Bool, UInt) = {
+        val out = Wire(a)

-      // The number of beats-1 to execute
-      val beats1 = Mux(bad, UInt(0), maxSupported1)
-      val beats = OH1ToOH(beats1) // beats1 + 1
+        val busy   = RegInit(Bool(false))
+        val r_addr = Reg(UInt(width = a.bits.params.addrBits))
+        val r_len  = Reg(UInt(width = AXI4Parameters.lenBits))

-      val inc_addr = addr + (beats << a.bits.size) // address after adding transfer
-      val wrapMask = a.bits.bytes1() // only these bits may change, if wrapping
-      val mux_addr = Wire(init = inc_addr)
-      when (a.bits.burst === AXI4Parameters.BURST_WRAP) {
-        mux_addr := (inc_addr & wrapMask) | ~(~a.bits.addr | wrapMask)
-      }
-      when (a.bits.burst === AXI4Parameters.BURST_FIXED) {
-        mux_addr := a.bits.addr
+        val len  = Mux(busy, r_len,  a.bits.len)
+        val addr = Mux(busy, r_addr, a.bits.addr)
+
+        val lo = if (lgBytes == 0) UInt(0) else addr(lgBytes-1, 0)
+        val hi = addr >> lgBytes
+        val alignment = hi(AXI4Parameters.lenBits-1,0)
+
+        val allSame = supportedSizes1.filter(_ >= 0).distinct.size <= 1
+        val dynamic1 = Mux1H(slave.findFast(addr), supportedSizes1.map(s => UInt(max(0, s))))
+        val fixed1 = UInt(supportedSizes1.filter(_ >= 0).headOption.getOrElse(0))
+
+        /* We need to compute the largest transfer allowed by the AXI len.
+         * len+1 is the number of beats to execute.
+         * We want the MSB(len+1)-1; one less than the largest power of two we could execute.
+         * There are two cases; either len is 2^n-1 in which case we leave it unchanged, ELSE
+         *   fill the bits from highest to lowest, and shift right by one bit.
+         */
+        val fillLow  = rightOR(len) >> 1   // set   all bits in positions <  a set     bit
+        val wipeHigh = ~leftOR(~len)       // clear all bits in position  >= a cleared bit
+        val remain1  = fillLow | wipeHigh  // MSB(a.len+1)-1
+        val align1   = ~leftOR(alignment)  // transfer size limited by address alignment
+        val support1 = if (allSame) fixed1 else dynamic1 // maximum supported size-1 based on target address
+        val maxSupported1 = remain1 & align1 & support1 // Take the minimum of all the limits
+
+        // Things that cause us to degenerate to a single beat
+        val fixed = a.bits.burst === AXI4Parameters.BURST_FIXED
+        val narrow = a.bits.size =/= UInt(lgBytes)
+        val bad = fixed || narrow
+
+        // The number of beats-1 to execute
+        val beats1 = Mux(bad, UInt(0), maxSupported1)
+        val beats = OH1ToOH(beats1) // beats1 + 1
+
+        val inc_addr = addr + (beats << a.bits.size) // address after adding transfer
+        val wrapMask = a.bits.bytes1() // only these bits may change, if wrapping
+        val mux_addr = Wire(init = inc_addr)
+        when (a.bits.burst === AXI4Parameters.BURST_WRAP) {
+          mux_addr := (inc_addr & wrapMask) | ~(~a.bits.addr | wrapMask)
+        }
+        when (a.bits.burst === AXI4Parameters.BURST_FIXED) {
+          mux_addr := a.bits.addr
+        }
+
+        val last = beats1 === len
+        a.ready := out.ready && last
+        out.valid := a.valid
+
+        out.bits := a.bits
+        out.bits.len := beats1
+
+        // We forcibly align every access. If the first beat was misaligned, the strb bits
+        // for the lower addresses must not have been set. Therefore, rounding the address
+        // down is harmless. We can do this after the address update algorithm, because the
+        // incremented values will be rounded down the same way. Furthermore, a subword
+        // offset cannot cause a premature wrap-around.
+        out.bits.addr := ~(~addr | UIntToOH1(a.bits.size, lgBytes))
+
+        when (out.fire()) {
+          busy := !last
+          r_addr := mux_addr
+          r_len  := len - beats
+        }
+
+        (out, last, beats)
      }

-      val last = beats1 === len
-      a.ready := out.ready && last
-      out.valid := a.valid
+      // The size to which we will fragment the access
+      val readSizes1  = slaves.map(s => s.supportsRead .max/beatBytes-1)
+      val writeSizes1 = slaves.map(s => s.supportsWrite.max/beatBytes-1)

-      out.bits := a.bits
-      out.bits.len := beats1
+      // Indirection variables for inputs and outputs; makes transformation application easier
+      val (in_ar, ar_last, _)       = fragment(Queue.irrevocable(in.ar, 1, flow=true), readSizes1)
+      val (in_aw, aw_last, w_beats) = fragment(Queue.irrevocable(in.aw, 1, flow=true), writeSizes1)
+      val in_w = in.w
+      val in_r = in.r
+      val in_b = in.b
+      val out_ar = Wire(out.ar)
+      val out_aw = out.aw
+      val out_w = out.w
+      val out_r = Wire(out.r)
+      val out_b = Wire(out.b)

-      // We forcibly align every access. If the first beat was misaligned, the strb bits
-      // for the lower addresses must not have been set. Therefore, rounding the address
-      // down is harmless. We can do this after the address update algorithm, because the
-      // incremented values will be rounded down the same way. Furthermore, a subword
-      // offset cannot cause a premature wrap-around.
-      out.bits.addr := ~(~addr | UIntToOH1(a.bits.size, lgBytes))
-
-      when (out.fire()) {
-        busy := !last
-        r_addr := mux_addr
-        r_len  := len - beats
+      val depth = if (combinational) 1 else 2
+      // In case a slave ties arready := rready, we need a queue to break the combinational loop
+      // between the two branches (in_ar => {out_ar => out_r, sideband} => in_r).
+      if (in.ar.bits.getWidth < in.r.bits.getWidth) {
+        out.ar <> Queue(out_ar, depth, flow=combinational)
+        out_r <> out.r
+      } else {
+        out.ar <> out_ar
+        out_r <> Queue(out.r, depth, flow=combinational)
      }
+      // In case a slave ties awready := bready or wready := bready, we need this queue
+      out_b <> Queue(out.b, depth, flow=combinational)

-      (out, last, beats)
-    }
+      // Sideband to track which transfers were the last fragment
+      def sideband() = if (lite) {
+        Module(new Queue(Bool(), maxInFlight, flow=combinational)).io
+      } else {
+        Module(new AXI4FragmenterSideband(maxInFlight, flow=combinational)).io
+      }
+      val sideband_ar_r = sideband()
+      val sideband_aw_b = sideband()

-    val in = io.in(0)
-    val out = io.out(0)
+      // AR flow control
+      out_ar.valid := in_ar.valid && sideband_ar_r.enq.ready
+      in_ar.ready := sideband_ar_r.enq.ready && out_ar.ready
+      sideband_ar_r.enq.valid := in_ar.valid && out_ar.ready
+      out_ar.bits := in_ar.bits
+      sideband_ar_r.enq.bits := ar_last

-    // The size to which we will fragment the access
-    val readSizes1  = slaves.map(s => s.supportsRead .max/beatBytes-1)
-    val writeSizes1 = slaves.map(s => s.supportsWrite.max/beatBytes-1)
+      // When does W channel start counting a new transfer
+      val wbeats_latched = RegInit(Bool(false))
+      val wbeats_ready = Wire(Bool())
+      val wbeats_valid = Wire(Bool())
+      when (wbeats_valid && wbeats_ready) { wbeats_latched := Bool(true) }
+      when (out_aw.fire()) { wbeats_latched := Bool(false) }

-    // Indirection variables for inputs and outputs; makes transformation application easier
-    val (in_ar, ar_last, _)       = fragment(Queue.irrevocable(in.ar, 1, flow=true), readSizes1)
-    val (in_aw, aw_last, w_beats) = fragment(Queue.irrevocable(in.aw, 1, flow=true), writeSizes1)
-    val in_w = in.w
-    val in_r = in.r
-    val in_b = in.b
-    val out_ar = Wire(out.ar)
-    val out_aw = out.aw
-    val out_w = out.w
-    val out_r = Wire(out.r)
-    val out_b = Wire(out.b)
+      // AW flow control
+      out_aw.valid := in_aw.valid && sideband_aw_b.enq.ready && (wbeats_ready || wbeats_latched)
+      in_aw.ready := sideband_aw_b.enq.ready && out_aw.ready && (wbeats_ready || wbeats_latched)
+      sideband_aw_b.enq.valid := in_aw.valid && out_aw.ready && (wbeats_ready || wbeats_latched)
+      wbeats_valid := in_aw.valid && !wbeats_latched
+      out_aw.bits := in_aw.bits
+      sideband_aw_b.enq.bits := aw_last

-    val depth = if (combinational) 1 else 2
-    // In case a slave ties arready := rready, we need a queue to break the combinational loop
-    // between the two branches (in_ar => {out_ar => out_r, sideband} => in_r).
-    if (in.ar.bits.getWidth < in.r.bits.getWidth) {
-      out.ar <> Queue(out_ar, depth, flow=combinational)
-      out_r <> out.r
-    } else {
-      out.ar <> out_ar
-      out_r <> Queue(out.r, depth, flow=combinational)
-    }
-    // In case a slave ties awready := bready or wready := bready, we need this queue
-    out_b <> Queue(out.b, depth, flow=combinational)
+      // We need to inject 'last' into the W channel fragments, count!
+      val w_counter = RegInit(UInt(0, width = AXI4Parameters.lenBits+1))
+      val w_idle = w_counter === UInt(0)
+      val w_todo = Mux(w_idle, Mux(wbeats_valid, w_beats, UInt(0)), w_counter)
+      val w_last = w_todo === UInt(1)
+      w_counter := w_todo - out_w.fire()
+      assert (!out_w.fire() || w_todo =/= UInt(0)) // underflow impossible

-    // Sideband to track which transfers were the last fragment
-    def sideband() = if (lite) {
-      Module(new Queue(Bool(), maxInFlight, flow=combinational)).io
-    } else {
-      Module(new AXI4FragmenterSideband(maxInFlight, flow=combinational)).io
-    }
-    val sideband_ar_r = sideband()
-    val sideband_aw_b = sideband()
+      // W flow control
+      wbeats_ready := w_idle
+      out_w.valid := in_w.valid && (!wbeats_ready || wbeats_valid)
+      in_w.ready := out_w.ready && (!wbeats_ready || wbeats_valid)
+      out_w.bits := in_w.bits
+      out_w.bits.last := w_last
+      // We should also recreate the last last
+      assert (!out_w.valid || !in_w.bits.last || w_last)

-    // AR flow control
-    out_ar.valid := in_ar.valid && sideband_ar_r.enq.ready
-    in_ar.ready := sideband_ar_r.enq.ready && out_ar.ready
-    sideband_ar_r.enq.valid := in_ar.valid && out_ar.ready
-    out_ar.bits := in_ar.bits
-    sideband_ar_r.enq.bits := ar_last
+      // R flow control
+      val r_last = out_r.bits.last
+      in_r.valid := out_r.valid && (!r_last || sideband_ar_r.deq.valid)
+      out_r.ready := in_r.ready && (!r_last || sideband_ar_r.deq.valid)
+      sideband_ar_r.deq.ready := r_last && out_r.valid && in_r.ready
+      in_r.bits := out_r.bits
+      in_r.bits.last := r_last && sideband_ar_r.deq.bits

-    // When does W channel start counting a new transfer
-    val wbeats_latched = RegInit(Bool(false))
-    val wbeats_ready = Wire(Bool())
-    val wbeats_valid = Wire(Bool())
-    when (wbeats_valid && wbeats_ready) { wbeats_latched := Bool(true) }
-    when (out_aw.fire()) { wbeats_latched := Bool(false) }
+      // B flow control
+      val b_last = sideband_aw_b.deq.bits
+      in_b.valid := out_b.valid && sideband_aw_b.deq.valid && b_last
+      out_b.ready := sideband_aw_b.deq.valid && (!b_last || in_b.ready)
+      sideband_aw_b.deq.ready := out_b.valid && (!b_last || in_b.ready)
+      in_b.bits := out_b.bits

-    // AW flow control
-    out_aw.valid := in_aw.valid && sideband_aw_b.enq.ready && (wbeats_ready || wbeats_latched)
-    in_aw.ready := sideband_aw_b.enq.ready && out_aw.ready && (wbeats_ready || wbeats_latched)
-    sideband_aw_b.enq.valid := in_aw.valid && out_aw.ready && (wbeats_ready || wbeats_latched)
-    wbeats_valid := in_aw.valid && !wbeats_latched
-    out_aw.bits := in_aw.bits
-    sideband_aw_b.enq.bits := aw_last
-
-    // We need to inject 'last' into the W channel fragments, count!
-    val w_counter = RegInit(UInt(0, width = AXI4Parameters.lenBits+1))
-    val w_idle = w_counter === UInt(0)
-    val w_todo = Mux(w_idle, Mux(wbeats_valid, w_beats, UInt(0)), w_counter)
-    val w_last = w_todo === UInt(1)
-    w_counter := w_todo - out_w.fire()
-    assert (!out_w.fire() || w_todo =/= UInt(0)) // underflow impossible
-
-    // W flow control
-    wbeats_ready := w_idle
-    out_w.valid := in_w.valid && (!wbeats_ready || wbeats_valid)
-    in_w.ready := out_w.ready && (!wbeats_ready || wbeats_valid)
-    out_w.bits := in_w.bits
-    out_w.bits.last := w_last
-    // We should also recreate the last last
-    assert (!out_w.valid || !in_w.bits.last || w_last)
-
-    // R flow control
-    val r_last = out_r.bits.last
-    in_r.valid := out_r.valid && (!r_last || sideband_ar_r.deq.valid)
-    out_r.ready := in_r.ready && (!r_last || sideband_ar_r.deq.valid)
-    sideband_ar_r.deq.ready := r_last && out_r.valid && in_r.ready
-    in_r.bits := out_r.bits
-    in_r.bits.last := r_last && sideband_ar_r.deq.bits
-
-    // B flow control
-    val b_last = sideband_aw_b.deq.bits
-    in_b.valid := out_b.valid && sideband_aw_b.deq.valid && b_last
-    out_b.ready := sideband_aw_b.deq.valid && (!b_last || in_b.ready)
-    sideband_aw_b.deq.ready := out_b.valid && (!b_last || in_b.ready)
-    in_b.bits := out_b.bits
-
-    // Merge errors from dropped B responses
-    val r_resp = RegInit(UInt(0, width = AXI4Parameters.respBits))
-    val resp = out_b.bits.resp | r_resp
-    when (out_b.fire()) { r_resp := Mux(b_last, UInt(0), resp) }
-    in_b.bits.resp := resp
-  }
-}
-
-/* We want to put barriers between the fragments of a fragmented transfer and all other transfers.
- * This lets us use very little state to reassemble the fragments (else we need one FIFO per ID).
- * Furthermore, because all the fragments share the same AXI ID, they come back contiguously.
- * This guarantees that no other R responses might get mixed between fragments, ensuring that the
- * interleavedId for the slaves remains unaffected by the fragmentation transformation.
- * Of course, if you need to fragment, this means there is a potentially hefty serialization cost.
- * However, this design allows full concurrency in the common no-fragmentation-needed scenario.
- */
-class AXI4FragmenterSideband(maxInFlight: Int, flow: Boolean = false) extends Module
-{
-  val io = new QueueIO(Bool(), maxInFlight)
-  io.count := UInt(0)
-
-  val PASS = UInt(2, width = 2) // allow 'last=1' bits to enque, on 'last=0' if count>0 block else accept+FIND
-  val FIND = UInt(0, width = 2) // allow 'last=0' bits to enque, accept 'last=1' and switch to WAIT
-  val WAIT = UInt(1, width = 2) // block all access till count=0
-
-  val state = RegInit(PASS)
-  val count = RegInit(UInt(0, width = log2Up(maxInFlight)))
-  val full  = count === UInt(maxInFlight-1)
-  val empty = count === UInt(0)
-  val last  = count === UInt(1)
-
-  io.deq.bits := state(1) || (last && state(0)) // PASS || (last && WAIT)
-  io.deq.valid := !empty
-
-  io.enq.ready := !full && (empty || (state === FIND) || (state === PASS && io.enq.bits))
-
-  // WAIT => count > 0
-  assert (state =/= WAIT || count =/= UInt(0))
-
-  if (flow) {
-    when (io.enq.valid) {
-      io.deq.valid := Bool(true)
-      when (empty) { io.deq.bits := io.enq.bits }
+      // Merge errors from dropped B responses
+      val r_resp = RegInit(UInt(0, width = AXI4Parameters.respBits))
+      val resp = out_b.bits.resp | r_resp
+      when (out_b.fire()) { r_resp := Mux(b_last, UInt(0), resp) }
+      in_b.bits.resp := resp
    }
  }

-  count := count + io.enq.fire() - io.deq.fire()
-  switch (state) {
-    is(PASS) { when (io.enq.valid && !io.enq.bits && empty) { state := FIND } }
-    is(FIND) { when (io.enq.valid &&  io.enq.bits && !full) { state := Mux(empty, PASS, WAIT) } }
-    is(WAIT) { when (last && io.deq.ready)                  { state := PASS } }
+  /* We want to put barriers between the fragments of a fragmented transfer and all other transfers.
+   * This lets us use very little state to reassemble the fragments (else we need one FIFO per ID).
+   * Furthermore, because all the fragments share the same AXI ID, they come back contiguously.
+   * This guarantees that no other R responses might get mixed between fragments, ensuring that the
+   * interleavedId for the slaves remains unaffected by the fragmentation transformation.
+   * Of course, if you need to fragment, this means there is a potentially hefty serialization cost.
+   * However, this design allows full concurrency in the common no-fragmentation-needed scenario.
+   */
+  class AXI4FragmenterSideband(maxInFlight: Int, flow: Boolean = false) extends Module
+  {
+    val io = new QueueIO(Bool(), maxInFlight)
+    io.count := UInt(0)
+
+    val PASS = UInt(2, width = 2) // allow 'last=1' bits to enque, on 'last=0' if count>0 block else accept+FIND
+    val FIND = UInt(0, width = 2) // allow 'last=0' bits to enque, accept 'last=1' and switch to WAIT
+    val WAIT = UInt(1, width = 2) // block all access till count=0
+
+    val state = RegInit(PASS)
+    val count = RegInit(UInt(0, width = log2Up(maxInFlight)))
+    val full  = count === UInt(maxInFlight-1)
+    val empty = count === UInt(0)
+    val last  = count === UInt(1)
+
+    io.deq.bits := state(1) || (last && state(0)) // PASS || (last && WAIT)
+    io.deq.valid := !empty
+
+    io.enq.ready := !full && (empty || (state === FIND) || (state === PASS && io.enq.bits))
+
+    // WAIT => count > 0
+    assert (state =/= WAIT || count =/= UInt(0))
+
+    if (flow) {
+      when (io.enq.valid) {
+        io.deq.valid := Bool(true)
+        when (empty) { io.deq.bits := io.enq.bits }
+      }
+    }
+
+    count := count + io.enq.fire() - io.deq.fire()
+    switch (state) {
+      is(PASS) { when (io.enq.valid && !io.enq.bits && empty) { state := FIND } }
+      is(FIND) { when (io.enq.valid &&  io.enq.bits && !full) { state := Mux(empty, PASS, WAIT) } }
+      is(WAIT) { when (last && io.deq.ready)                  { state := PASS } }
+    }
  }
 }

--- a/src/main/scala/uncore/axi4/Nodes.scala
+++ b/src/main/scala/uncore/axi4/Nodes.scala
@@ -31,16 +31,13 @@ object AXI4Imp extends NodeImp[AXI4MasterPortParameters, AXI4SlavePortParameters

 // Nodes implemented inside modules
 case class AXI4IdentityNode() extends IdentityNode(AXI4Imp)
-case class AXI4MasterNode(portParams: AXI4MasterPortParameters, numPorts: Range.Inclusive = 1 to 1)
-  extends SourceNode(AXI4Imp)(portParams, numPorts)
-case class AXI4SlaveNode(portParams: AXI4SlavePortParameters, numPorts: Range.Inclusive = 1 to 1)
-  extends SinkNode(AXI4Imp)(portParams, numPorts)
+case class AXI4MasterNode(portParams: Seq[AXI4MasterPortParameters]) extends SourceNode(AXI4Imp)(portParams)
+case class AXI4SlaveNode(portParams: Seq[AXI4SlavePortParameters]) extends SinkNode(AXI4Imp)(portParams)
 case class AXI4AdapterNode(
-  masterFn:       Seq[AXI4MasterPortParameters]  => AXI4MasterPortParameters,
-  slaveFn:        Seq[AXI4SlavePortParameters] => AXI4SlavePortParameters,
-  numMasterPorts: Range.Inclusive = 1 to 1,
-  numSlavePorts:  Range.Inclusive = 1 to 1)
-  extends InteriorNode(AXI4Imp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
+  masterFn:  AXI4MasterPortParameters => AXI4MasterPortParameters,
+  slaveFn:   AXI4SlavePortParameters  => AXI4SlavePortParameters,
+  numPorts:  Range.Inclusive = 0 to 999)
+  extends AdapterNode(AXI4Imp)(masterFn, slaveFn, numPorts)

 // Nodes passed from an inner module
 case class AXI4OutputNode() extends OutputNode(AXI4Imp)
--- a/src/main/scala/uncore/axi4/RegisterRouter.scala
+++ b/src/main/scala/uncore/axi4/RegisterRouter.scala
@@ -9,7 +9,7 @@ import regmapper._
 import scala.math.{min,max}

 class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
-  extends AXI4SlaveNode(AXI4SlavePortParameters(
+  extends AXI4SlaveNode(Seq(AXI4SlavePortParameters(
    Seq(AXI4SlaveParameters(
      address       = Seq(address),
      executable    = executable,
@@ -17,7 +17,7 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int
      supportsRead  = TransferSizes(1, beatBytes),
      interleavedId = Some(0))),
    beatBytes  = beatBytes,
-    minLatency = min(concurrency, 1))) // the Queue adds at most one cycle
+    minLatency = min(concurrency, 1)))) // the Queue adds at most one cycle
 {
  require (address.contiguous)

--- a/src/main/scala/uncore/axi4/SRAM.scala
+++ b/src/main/scala/uncore/axi4/SRAM.scala
@@ -8,7 +8,7 @@ import diplomacy._

 class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
 {
-  val node = AXI4SlaveNode(AXI4SlavePortParameters(
+  val node = AXI4SlaveNode(Seq(AXI4SlavePortParameters(
    Seq(AXI4SlaveParameters(
      address       = List(address),
      regionType    = RegionType.UNCACHED,
@@ -17,7 +17,7 @@ class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int =
      supportsWrite = TransferSizes(1, beatBytes),
      interleavedId = Some(0))),
    beatBytes  = beatBytes,
-    minLatency = 0)) // B responds on same cycle
+    minLatency = 0))) // B responds on same cycle

  // We require the address range to include an entire beat (for the write mask)
  require ((address.mask & (beatBytes-1)) == beatBytes-1)
--- a/src/main/scala/uncore/axi4/ToTL.scala
+++ b/src/main/scala/uncore/axi4/ToTL.scala
@@ -8,15 +8,15 @@ import config._
 import diplomacy._
 import uncore.tilelink2._

-case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)(
-  dFn = { case (1, Seq(AXI4MasterPortParameters(masters))) =>
-    Seq(TLClientPortParameters(clients = masters.map { m =>
+case class AXI4ToTLNode() extends MixedAdapterNode(AXI4Imp, TLImp)(
+  dFn = { case AXI4MasterPortParameters(masters) =>
+    TLClientPortParameters(clients = masters.map { m =>
      TLClientParameters(
        sourceId = IdRange(m.id.start << 1, m.id.end << 1), // R+W ids are distinct
        nodePath = m.nodePath)
-    }))
+    })
  },
-  uFn = { case (1, Seq(mp)) => Seq(AXI4SlavePortParameters(
+  uFn = { mp => AXI4SlavePortParameters(
    slaves = mp.managers.map { m =>
      AXI4SlaveParameters(
        address       = m.address,
@@ -27,10 +27,8 @@ case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)(
        supportsRead  = m.supportsGet,
        interleavedId = Some(0))}, // TL2 never interleaves D beats
    beatBytes = mp.beatBytes,
-    minLatency = mp.minLatency))
-  },
-  numPO = 1 to 1,
-  numPI = 1 to 1)
+    minLatency = mp.minLatency)
+  })

 class AXI4ToTL()(implicit p: Parameters) extends LazyModule
 {
@@ -42,131 +40,129 @@ class AXI4ToTL()(implicit p: Parameters) extends LazyModule
      val out = node.bundleOut
    }

-    val in = io.in(0)
-    val out = io.out(0)
-    val edgeIn = node.edgesIn(0)
-    val edgeOut = node.edgesOut(0)
-    val numIds = edgeIn.master.endId
-    val beatBytes = edgeOut.manager.beatBytes
-    val countBits = AXI4Parameters.lenBits + (1 << AXI4Parameters.sizeBits) - 1
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      val numIds = edgeIn.master.endId
+      val beatBytes = edgeOut.manager.beatBytes
+      val countBits = AXI4Parameters.lenBits + (1 << AXI4Parameters.sizeBits) - 1

-    require (edgeIn.master.masters(0).aligned)
+      require (edgeIn.master.masters(0).aligned)

-    val r_out = Wire(out.a)
-    val r_inflight = RegInit(UInt(0, width = numIds))
-    val r_block = r_inflight(in.ar.bits.id)
-    val r_size1 = in.ar.bits.bytes1()
-    val r_size = OH1ToUInt(r_size1)
-    val r_addr = in.ar.bits.addr
-    val r_ok = edgeOut.manager.supportsGetSafe(r_addr, r_size)
-    val r_err_in = Wire(Decoupled(new AXI4BundleRError(in.ar.bits.params)))
-    val r_err_out = Queue(r_err_in, 2)
-    val r_count = RegInit(UInt(0, width = in.ar.bits.params.lenBits))
-    val r_last = r_count === in.ar.bits.len
+      val r_out = Wire(out.a)
+      val r_inflight = RegInit(UInt(0, width = numIds))
+      val r_block = r_inflight(in.ar.bits.id)
+      val r_size1 = in.ar.bits.bytes1()
+      val r_size = OH1ToUInt(r_size1)
+      val r_addr = in.ar.bits.addr
+      val r_ok = edgeOut.manager.supportsGetSafe(r_addr, r_size)
+      val r_err_in = Wire(Decoupled(new AXI4BundleRError(in.ar.bits.params)))
+      val r_err_out = Queue(r_err_in, 2)
+      val r_count = RegInit(UInt(0, width = in.ar.bits.params.lenBits))
+      val r_last = r_count === in.ar.bits.len

-    assert (!in.ar.valid || r_size1 === UIntToOH1(r_size, countBits)) // because aligned
-    in.ar.ready := Mux(r_ok, r_out.ready, r_err_in.ready && r_last) && !r_block
-    r_out.valid := in.ar.valid && !r_block && r_ok
-    r_out.bits := edgeOut.Get(in.ar.bits.id << 1 | UInt(1), r_addr, r_size)._2
-    r_err_in.valid := in.ar.valid && !r_block && !r_ok
-    r_err_in.bits.last := r_last
-    r_err_in.bits.id := in.ar.bits.id
+      assert (!in.ar.valid || r_size1 === UIntToOH1(r_size, countBits)) // because aligned
+      in.ar.ready := Mux(r_ok, r_out.ready, r_err_in.ready && r_last) && !r_block
+      r_out.valid := in.ar.valid && !r_block && r_ok
+      r_out.bits := edgeOut.Get(in.ar.bits.id << 1 | UInt(1), r_addr, r_size)._2
+      r_err_in.valid := in.ar.valid && !r_block && !r_ok
+      r_err_in.bits.last := r_last
+      r_err_in.bits.id := in.ar.bits.id

-    when (r_err_in.fire()) { r_count := Mux(r_last, UInt(0), r_count + UInt(1)) }
+      when (r_err_in.fire()) { r_count := Mux(r_last, UInt(0), r_count + UInt(1)) }

-    val w_out = Wire(out.a)
-    val w_inflight = RegInit(UInt(0, width = numIds))
-    val w_block = w_inflight(in.aw.bits.id)
-    val w_size1 = in.aw.bits.bytes1()
-    val w_size = OH1ToUInt(w_size1)
-    val w_addr = in.aw.bits.addr
-    val w_ok = edgeOut.manager.supportsPutPartialSafe(w_addr, w_size)
-    val w_err_in = Wire(Decoupled(in.aw.bits.id))
-    val w_err_out = Queue(w_err_in, 2)
+      val w_out = Wire(out.a)
+      val w_inflight = RegInit(UInt(0, width = numIds))
+      val w_block = w_inflight(in.aw.bits.id)
+      val w_size1 = in.aw.bits.bytes1()
+      val w_size = OH1ToUInt(w_size1)
+      val w_addr = in.aw.bits.addr
+      val w_ok = edgeOut.manager.supportsPutPartialSafe(w_addr, w_size)
+      val w_err_in = Wire(Decoupled(in.aw.bits.id))
+      val w_err_out = Queue(w_err_in, 2)

-    assert (!in.aw.valid || w_size1 === UIntToOH1(w_size, countBits)) // because aligned
-    assert (!in.aw.valid || in.aw.bits.len === UInt(0) || in.aw.bits.size === UInt(log2Ceil(beatBytes))) // because aligned
-    in.aw.ready := Mux(w_ok, w_out.ready, w_err_in.ready) && in.w.valid && in.w.bits.last && !w_block
-    in.w.ready  := Mux(w_ok, w_out.ready, w_err_in.ready || !in.w.bits.last) && in.aw.valid && !w_block
-    w_out.valid := in.aw.valid && in.w.valid && !w_block && w_ok
-    w_out.bits := edgeOut.Put(in.aw.bits.id << 1, w_addr, w_size, in.w.bits.data, in.w.bits.strb)._2
-    w_err_in.valid := in.aw.valid && in.w.valid && !w_block && !w_ok && in.w.bits.last
-    w_err_in.bits := in.aw.bits.id
+      assert (!in.aw.valid || w_size1 === UIntToOH1(w_size, countBits)) // because aligned
+      assert (!in.aw.valid || in.aw.bits.len === UInt(0) || in.aw.bits.size === UInt(log2Ceil(beatBytes))) // because aligned
+      in.aw.ready := Mux(w_ok, w_out.ready, w_err_in.ready) && in.w.valid && in.w.bits.last && !w_block
+      in.w.ready  := Mux(w_ok, w_out.ready, w_err_in.ready || !in.w.bits.last) && in.aw.valid && !w_block
+      w_out.valid := in.aw.valid && in.w.valid && !w_block && w_ok
+      w_out.bits := edgeOut.Put(in.aw.bits.id << 1, w_addr, w_size, in.w.bits.data, in.w.bits.strb)._2
+      w_err_in.valid := in.aw.valid && in.w.valid && !w_block && !w_ok && in.w.bits.last
+      w_err_in.bits := in.aw.bits.id

-    TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), r_out), (in.aw.bits.len, w_out))
+      TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), r_out), (in.aw.bits.len, w_out))

-    val ok_b  = Wire(in.b)
-    val err_b = Wire(in.b)
-    val mux_b = Wire(in.b)
-    val ok_r  = Wire(in.r)
-    val err_r = Wire(in.r)
-    val mux_r = Wire(in.r)
+      val ok_b  = Wire(in.b)
+      val err_b = Wire(in.b)
+      val mux_b = Wire(in.b)
+      val ok_r  = Wire(in.r)
+      val err_r = Wire(in.r)
+      val mux_r = Wire(in.r)

-    val d_resp = Mux(out.d.bits.error, AXI4Parameters.RESP_SLVERR, AXI4Parameters.RESP_OKAY)
-    val d_hasData = edgeOut.hasData(out.d.bits)
-    val d_last = edgeOut.last(out.d)
+      val d_resp = Mux(out.d.bits.error, AXI4Parameters.RESP_SLVERR, AXI4Parameters.RESP_OKAY)
+      val d_hasData = edgeOut.hasData(out.d.bits)
+      val d_last = edgeOut.last(out.d)

-    out.d.ready := Mux(d_hasData, ok_r.ready, ok_b.ready)
-    ok_r.valid := out.d.valid && d_hasData
-    ok_b.valid := out.d.valid && !d_hasData
+      out.d.ready := Mux(d_hasData, ok_r.ready, ok_b.ready)
+      ok_r.valid := out.d.valid && d_hasData
+      ok_b.valid := out.d.valid && !d_hasData

-    ok_r.bits.id   := out.d.bits.source >> 1
-    ok_r.bits.data := out.d.bits.data
-    ok_r.bits.resp := d_resp
-    ok_r.bits.last := d_last
+      ok_r.bits.id   := out.d.bits.source >> 1
+      ok_r.bits.data := out.d.bits.data
+      ok_r.bits.resp := d_resp
+      ok_r.bits.last := d_last

-    r_err_out.ready := err_r.ready
-    err_r.valid := r_err_out.valid
-    err_r.bits.id   := r_err_out.bits.id
-    err_r.bits.data := out.d.bits.data // don't care
-    err_r.bits.resp := AXI4Parameters.RESP_DECERR
-    err_r.bits.last := r_err_out.bits.last
+      r_err_out.ready := err_r.ready
+      err_r.valid := r_err_out.valid
+      err_r.bits.id   := r_err_out.bits.id
+      err_r.bits.data := out.d.bits.data // don't care
+      err_r.bits.resp := AXI4Parameters.RESP_DECERR
+      err_r.bits.last := r_err_out.bits.last

-    // AXI4 must hold R to one source until last
-    val mux_lock_ok  = RegInit(Bool(false))
-    val mux_lock_err = RegInit(Bool(false))
-    when (ok_r .fire()) { mux_lock_ok  := !ok_r .bits.last }
-    when (err_r.fire()) { mux_lock_err := !err_r.bits.last }
-    assert (!mux_lock_ok || !mux_lock_err)
+      // AXI4 must hold R to one source until last
+      val mux_lock_ok  = RegInit(Bool(false))
+      val mux_lock_err = RegInit(Bool(false))
+      when (ok_r .fire()) { mux_lock_ok  := !ok_r .bits.last }
+      when (err_r.fire()) { mux_lock_err := !err_r.bits.last }
+      assert (!mux_lock_ok || !mux_lock_err)

-    // Prioritize err over ok (b/c err_r.valid comes from a register)
-    mux_r.valid := (!mux_lock_err && ok_r.valid) || (!mux_lock_ok && err_r.valid)
-    mux_r.bits  := Mux(!mux_lock_ok && err_r.valid, err_r.bits, ok_r.bits)
-    ok_r.ready  := mux_r.ready && (mux_lock_ok || !err_r.valid)
-    err_r.ready := mux_r.ready && !mux_lock_ok
+      // Prioritize err over ok (b/c err_r.valid comes from a register)
+      mux_r.valid := (!mux_lock_err && ok_r.valid) || (!mux_lock_ok && err_r.valid)
+      mux_r.bits  := Mux(!mux_lock_ok && err_r.valid, err_r.bits, ok_r.bits)
+      ok_r.ready  := mux_r.ready && (mux_lock_ok || !err_r.valid)
+      err_r.ready := mux_r.ready && !mux_lock_ok

-    // AXI4 needs irrevocable behaviour
-    in.r <> Queue.irrevocable(mux_r, 1, flow=true)
+      // AXI4 needs irrevocable behaviour
+      in.r <> Queue.irrevocable(mux_r, 1, flow=true)

-    ok_b.bits.id   := out.d.bits.source >> 1
-    ok_b.bits.resp := d_resp
+      ok_b.bits.id   := out.d.bits.source >> 1
+      ok_b.bits.resp := d_resp

-    w_err_out.ready := err_b.ready
-    err_b.valid := w_err_out.valid
-    err_b.bits.id   := w_err_out.bits
-    err_b.bits.resp := AXI4Parameters.RESP_DECERR
+      w_err_out.ready := err_b.ready
+      err_b.valid := w_err_out.valid
+      err_b.bits.id   := w_err_out.bits
+      err_b.bits.resp := AXI4Parameters.RESP_DECERR

-    // Prioritize err over ok (b/c err_b.valid comes from a register)
-    mux_b.valid := ok_b.valid || err_b.valid
-    mux_b.bits  := Mux(err_b.valid, err_b.bits, ok_b.bits)
-    ok_b.ready  := mux_b.ready && !err_b.valid
-    err_b.ready := mux_b.ready
+      // Prioritize err over ok (b/c err_b.valid comes from a register)
+      mux_b.valid := ok_b.valid || err_b.valid
+      mux_b.bits  := Mux(err_b.valid, err_b.bits, ok_b.bits)
+      ok_b.ready  := mux_b.ready && !err_b.valid
+      err_b.ready := mux_b.ready

-    // AXI4 needs irrevocable behaviour
-    in.b <> Queue.irrevocable(mux_b, 1, flow=true)
+      // AXI4 needs irrevocable behaviour
+      in.b <> Queue.irrevocable(mux_b, 1, flow=true)

-    // Update flight trackers
-    val r_set = in.ar.fire().asUInt << in.ar.bits.id
-    val r_clr = (in.r.fire() && in.r.bits.last).asUInt << in.r.bits.id
-    r_inflight := (r_inflight | r_set) & ~r_clr
-    val w_set = in.aw.fire().asUInt << in.aw.bits.id
-    val w_clr = in.b.fire().asUInt << in.b.bits.id
-    w_inflight := (w_inflight | w_set) & ~w_clr
+      // Update flight trackers
+      val r_set = in.ar.fire().asUInt << in.ar.bits.id
+      val r_clr = (in.r.fire() && in.r.bits.last).asUInt << in.r.bits.id
+      r_inflight := (r_inflight | r_set) & ~r_clr
+      val w_set = in.aw.fire().asUInt << in.aw.bits.id
+      val w_clr = in.b.fire().asUInt << in.b.bits.id
+      w_inflight := (w_inflight | w_set) & ~w_clr

-    // Unused channels
-    out.b.ready := Bool(true)
-    out.c.valid := Bool(false)
-    out.e.valid := Bool(false)
+      // Unused channels
+      out.b.ready := Bool(true)
+      out.c.valid := Bool(false)
+      out.e.valid := Bool(false)
+    }
  }
 }

--- a/src/main/scala/uncore/devices/Plic.scala
+++ b/src/main/scala/uncore/devices/Plic.scala
@@ -62,7 +62,7 @@ class TLPLIC(supervisor: Boolean, maxPriorities: Int, address: BigInt = 0xC00000
    beatBytes = p(rocket.XLen)/8,
    undefZero = false)

-  val intnode = IntAdapterNode(
+  val intnode = IntNexusNode(
    numSourcePorts = 0 to 1024,
    numSinkPorts   = 0 to 1024,
    sourceFn       = { _ => IntSourcePortParameters(Seq(IntSourceParameters(contextsPerHart))) },
--- a/src/main/scala/uncore/tilelink2/AtomicAutomata.scala
+++ b/src/main/scala/uncore/tilelink2/AtomicAutomata.scala
@@ -6,6 +6,7 @@ import Chisel._
 import chisel3.internal.sourceinfo.SourceInfo
 import config._
 import diplomacy._
+import util.GenericParameterizedBundle
 import scala.math.{min,max}

 // Ensures that all downstream RW managers support Atomic operationss.
@@ -15,8 +16,8 @@ class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, conc
  require (concurrency >= 1)

  val node = TLAdapterNode(
-    clientFn  = { case Seq(cp) => require (!cp.unsafeAtomics); cp.copy(unsafeAtomics = true) },
-    managerFn = { case Seq(mp) => mp.copy(managers = mp.managers.map { m =>
+    clientFn  = { case cp => require (!cp.unsafeAtomics); cp.copy(unsafeAtomics = true) },
+    managerFn = { case mp => mp.copy(managers = mp.managers.map { m =>
      val ourSupport = TransferSizes(1, mp.beatBytes)
      def widen(x: TransferSizes) = if (passthrough && x.min <= 2*mp.beatBytes) TransferSizes(1, max(mp.beatBytes, x.max)) else ourSupport
      val canDoit = m.supportsPutFull.contains(ourSupport) && m.supportsGet.contains(ourSupport)
@@ -33,245 +34,232 @@ class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, conc
      val out = node.bundleOut
    }

-    val in  = io.in(0)
-    val out = io.out(0)
-    val edgeIn  = node.edgesIn(0)
-    val edgeOut = node.edgesOut(0)
-    val managers = edgeOut.manager.managers
-    val beatBytes = edgeOut.manager.beatBytes
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      val managers = edgeOut.manager.managers
+      val beatBytes = edgeOut.manager.beatBytes

-    // To which managers are we adding atomic support?
-    val ourSupport = TransferSizes(1, edgeOut.manager.beatBytes)
-    val managersNeedingHelp = managers.filter { m =>
-      m.supportsPutFull.contains(ourSupport) &&
-      m.supportsGet.contains(ourSupport) &&
-      ((logical    && !m.supportsLogical   .contains(ourSupport)) ||
-       (arithmetic && !m.supportsArithmetic.contains(ourSupport)) ||
-       !passthrough) // we will do atomics for everyone we can
-    }
-    // We cannot add atomcis to a non-FIFO manager
-    managersNeedingHelp foreach { m => require (m.fifoId.isDefined) }
-    // We need to preserve FIFO semantics across FIFO domains, not managers
-    // Suppose you have Put(42) Atomic(+1) both inflight; valid results: 42 or 43
-    // If we allow Put(42) Get() Put(+1) concurrent; valid results: 42 43 OR undef
-    // Making non-FIFO work requires waiting for all Acks to come back (=> use FIFOFixer)
-    val domainsNeedingHelp = managersNeedingHelp.map(_.fifoId.get).distinct
-    // Don't overprovision the CAM
-    val camSize = min(domainsNeedingHelp.size, concurrency)
-    // Compact the fifoIds to only those we care about
-    val camFifoIds = managers.map(m => UInt(m.fifoId.map(id => max(0, domainsNeedingHelp.indexOf(id))).getOrElse(0)))
-
-    // CAM entry state machine
-    val FREE = UInt(0) // unused                   waiting on Atomic from A
-    val GET  = UInt(3) // Get sent down A          waiting on AccessDataAck from D
-    val AMO  = UInt(2) // AccessDataAck sent up D  waiting for A availability
-    val ACK  = UInt(1) // Put sent down A          waiting for PutAck from D
-
-    def helper(select: Seq[Bool], x: Seq[TransferSizes], lgSize: UInt) =
-      if (!passthrough) Bool(false) else
-      if (x.map(_ == x(0)).reduce(_ && _)) x(0).containsLg(lgSize) else
-      Mux1H(select, x.map(_.containsLg(lgSize))) 
-
-    // Do we need to do anything at all?
-    if (camSize > 0) {
-      class CAM_S extends Bundle {
-        val state = UInt(width = 2)
-      }
-      class CAM_A extends Bundle {
-        val bits    = new TLBundleA(out.a.bits.params)
-        val fifoId  = UInt(width = log2Up(domainsNeedingHelp.size))
-        val lut     = UInt(width = 4)
-      }
-      class CAM_D extends Bundle {
-        val data = UInt(width = out.a.bits.params.dataBits)
+      // To which managers are we adding atomic support?
+      val ourSupport = TransferSizes(1, edgeOut.manager.beatBytes)
+      val managersNeedingHelp = managers.filter { m =>
+        m.supportsPutFull.contains(ourSupport) &&
+        m.supportsGet.contains(ourSupport) &&
+        ((logical    && !m.supportsLogical   .contains(ourSupport)) ||
+         (arithmetic && !m.supportsArithmetic.contains(ourSupport)) ||
+         !passthrough) // we will do atomics for everyone we can
      }
+      // We cannot add atomcis to a non-FIFO manager
+      managersNeedingHelp foreach { m => require (m.fifoId.isDefined) }
+      // We need to preserve FIFO semantics across FIFO domains, not managers
+      // Suppose you have Put(42) Atomic(+1) both inflight; valid results: 42 or 43
+      // If we allow Put(42) Get() Put(+1) concurrent; valid results: 42 43 OR undef
+      // Making non-FIFO work requires waiting for all Acks to come back (=> use FIFOFixer)
+      val domainsNeedingHelp = managersNeedingHelp.map(_.fifoId.get).distinct
+      // Don't overprovision the CAM
+      val camSize = min(domainsNeedingHelp.size, concurrency)
+      // Compact the fifoIds to only those we care about
+      val camFifoIds = managers.map(m => UInt(m.fifoId.map(id => max(0, domainsNeedingHelp.indexOf(id))).getOrElse(0)))

-      val initval = Wire(new CAM_S)
-      initval.state := FREE
-      val cam_s = RegInit(Vec.fill(camSize)(initval))
-      val cam_a = Reg(Vec(camSize, new CAM_A))
-      val cam_d = Reg(Vec(camSize, new CAM_D))
+      // CAM entry state machine
+      val FREE = UInt(0) // unused                   waiting on Atomic from A
+      val GET  = UInt(3) // Get sent down A          waiting on AccessDataAck from D
+      val AMO  = UInt(2) // AccessDataAck sent up D  waiting for A availability
+      val ACK  = UInt(1) // Put sent down A          waiting for PutAck from D

-      val cam_free   = cam_s.map(_.state === FREE)
-      val cam_amo    = cam_s.map(_.state === AMO)
-      val cam_abusy  = cam_s.map(e => e.state === GET || e.state === AMO) // A is blocked
-      val cam_dmatch = cam_s.map(e => e.state =/= FREE) // D should inspect these entries
+      def helper(select: Seq[Bool], x: Seq[TransferSizes], lgSize: UInt) =
+        if (!passthrough) Bool(false) else
+        if (x.map(_ == x(0)).reduce(_ && _)) x(0).containsLg(lgSize) else
+        Mux1H(select, x.map(_.containsLg(lgSize))) 

-      // Can the manager already handle this message?
-      val a_size = edgeIn.size(in.a.bits)
-      val a_select = edgeOut.manager.findFast(edgeIn.address(in.a.bits))
-      val a_canLogical    = helper(a_select, managers.map(_.supportsLogical),    a_size)
-      val a_canArithmetic = helper(a_select, managers.map(_.supportsArithmetic), a_size)
-      val a_isLogical    = in.a.bits.opcode === TLMessages.LogicalData
-      val a_isArithmetic = in.a.bits.opcode === TLMessages.ArithmeticData
-      val a_isSupported = Mux(a_isLogical, a_canLogical, Mux(a_isArithmetic, a_canArithmetic, Bool(true)))
+      val params = TLAtomicAutomata.CAMParams(out.a.bits.params, domainsNeedingHelp.size)
+      // Do we need to do anything at all?
+      if (camSize > 0) {
+        val initval = Wire(new TLAtomicAutomata.CAM_S(params))
+        initval.state := FREE
+        val cam_s = RegInit(Vec.fill(camSize)(initval))
+        val cam_a = Reg(Vec(camSize, new TLAtomicAutomata.CAM_A(params)))
+        val cam_d = Reg(Vec(camSize, new TLAtomicAutomata.CAM_D(params)))

-      // Must we do a Put?
-      val a_cam_any_put = cam_amo.reduce(_ || _)
-      val a_cam_por_put = cam_amo.scanLeft(Bool(false))(_||_).init
-      val a_cam_sel_put = (cam_amo zip a_cam_por_put) map { case (a, b) => a && !b }
-      val a_cam_a = PriorityMux(cam_amo, cam_a)
-      val a_cam_d = PriorityMux(cam_amo, cam_d)
-      val a_a = a_cam_a.bits.data
-      val a_d = a_cam_d.data
+        val cam_free   = cam_s.map(_.state === FREE)
+        val cam_amo    = cam_s.map(_.state === AMO)
+        val cam_abusy  = cam_s.map(e => e.state === GET || e.state === AMO) // A is blocked
+        val cam_dmatch = cam_s.map(e => e.state =/= FREE) // D should inspect these entries

-      // Does the A request conflict with an inflight AMO?
-      val a_fifoId  = Mux1H(a_select, camFifoIds)
-      val a_cam_busy = (cam_abusy zip cam_a.map(_.fifoId === a_fifoId)) map { case (a,b) => a&&b } reduce (_||_)
+        // Can the manager already handle this message?
+        val a_size = edgeIn.size(in.a.bits)
+        val a_select = edgeOut.manager.findFast(edgeIn.address(in.a.bits))
+        val a_canLogical    = helper(a_select, managers.map(_.supportsLogical),    a_size)
+        val a_canArithmetic = helper(a_select, managers.map(_.supportsArithmetic), a_size)
+        val a_isLogical    = in.a.bits.opcode === TLMessages.LogicalData
+        val a_isArithmetic = in.a.bits.opcode === TLMessages.ArithmeticData
+        val a_isSupported = Mux(a_isLogical, a_canLogical, Mux(a_isArithmetic, a_canArithmetic, Bool(true)))

-      // (Where) are we are allocating in the CAM?
-      val a_cam_any_free = cam_free.reduce(_ || _)
-      val a_cam_por_free = cam_free.scanLeft(Bool(false))(_||_).init
-      val a_cam_sel_free = (cam_free zip a_cam_por_free) map { case (a,b) => a && !b }
+        // Must we do a Put?
+        val a_cam_any_put = cam_amo.reduce(_ || _)
+        val a_cam_por_put = cam_amo.scanLeft(Bool(false))(_||_).init
+        val a_cam_sel_put = (cam_amo zip a_cam_por_put) map { case (a, b) => a && !b }
+        val a_cam_a = PriorityMux(cam_amo, cam_a)
+        val a_cam_d = PriorityMux(cam_amo, cam_d)
+        val a_a = a_cam_a.bits.data
+        val a_d = a_cam_d.data

-      // Logical AMO
-      val indexes = Seq.tabulate(beatBytes*8) { i => Cat(a_a(i,i), a_d(i,i)) }
-      val logic_out = Cat(indexes.map(x => a_cam_a.lut(x).asUInt).reverse)
+        // Does the A request conflict with an inflight AMO?
+        val a_fifoId  = Mux1H(a_select, camFifoIds)
+        val a_cam_busy = (cam_abusy zip cam_a.map(_.fifoId === a_fifoId)) map { case (a,b) => a&&b } reduce (_||_)

-      // Arithmetic AMO
-      val unsigned = a_cam_a.bits.param(1)
-      val take_max = a_cam_a.bits.param(0)
-      val adder = a_cam_a.bits.param(2)
-      val mask = a_cam_a.bits.mask
-      val signSel = ~(~mask | (mask >> 1))
-      val signbits_a = Cat(Seq.tabulate(beatBytes) { i => a_a(8*i+7,8*i+7) } .reverse)
-      val signbits_d = Cat(Seq.tabulate(beatBytes) { i => a_d(8*i+7,8*i+7) } .reverse)
-      // Move the selected sign bit into the first byte position it will extend
-      val signbit_a = ((signbits_a & signSel) << 1)(beatBytes-1, 0)
-      val signbit_d = ((signbits_d & signSel) << 1)(beatBytes-1, 0)
-      val signext_a = FillInterleaved(8, leftOR(signbit_a))
-      val signext_d = FillInterleaved(8, leftOR(signbit_d))
-      // NOTE: sign-extension does not change the relative ordering in EITHER unsigned or signed arithmetic
-      val wide_mask = FillInterleaved(8, mask)
-      val a_a_ext = (a_a & wide_mask) | signext_a
-      val a_d_ext = (a_d & wide_mask) | signext_d
-      val a_d_inv = Mux(adder, a_d_ext, ~a_d_ext)
-      val adder_out = a_a_ext + a_d_inv
-      val h = 8*beatBytes-1 // now sign-extended; use biggest bit
-      val a_bigger_uneq = unsigned === a_a_ext(h) // result if high bits are unequal
-      val a_bigger = Mux(a_a_ext(h) === a_d_ext(h), !adder_out(h), a_bigger_uneq)
-      val pick_a = take_max === a_bigger
-      val arith_out = Mux(adder, adder_out, Mux(pick_a, a_a, a_d))
+        // (Where) are we are allocating in the CAM?
+        val a_cam_any_free = cam_free.reduce(_ || _)
+        val a_cam_por_free = cam_free.scanLeft(Bool(false))(_||_).init
+        val a_cam_sel_free = (cam_free zip a_cam_por_free) map { case (a,b) => a && !b }

-      // AMO result data
-      val amo_data =
-        if (!logical)    arith_out else
-        if (!arithmetic) logic_out else
-        Mux(a_cam_a.bits.opcode(0), logic_out, arith_out)
+        // Logical AMO
+        val indexes = Seq.tabulate(beatBytes*8) { i => Cat(a_a(i,i), a_d(i,i)) }
+        val logic_out = Cat(indexes.map(x => a_cam_a.lut(x).asUInt).reverse)

-      // Potentially mutate the message from inner
-      val source_i = Wire(in.a)
-      val a_allow = !a_cam_busy && (a_isSupported || a_cam_any_free)
-      in.a.ready := source_i.ready && a_allow
-      source_i.valid := in.a.valid && a_allow
-      source_i.bits  := in.a.bits
-      when (!a_isSupported) { // minimal mux difference
-        source_i.bits.opcode := TLMessages.Get
-        source_i.bits.param  := UInt(0)
-      }
+        // Arithmetic AMO
+        val unsigned = a_cam_a.bits.param(1)
+        val take_max = a_cam_a.bits.param(0)
+        val adder = a_cam_a.bits.param(2)
+        val mask = a_cam_a.bits.mask
+        val signSel = ~(~mask | (mask >> 1))
+        val signbits_a = Cat(Seq.tabulate(beatBytes) { i => a_a(8*i+7,8*i+7) } .reverse)
+        val signbits_d = Cat(Seq.tabulate(beatBytes) { i => a_d(8*i+7,8*i+7) } .reverse)
+        // Move the selected sign bit into the first byte position it will extend
+        val signbit_a = ((signbits_a & signSel) << 1)(beatBytes-1, 0)
+        val signbit_d = ((signbits_d & signSel) << 1)(beatBytes-1, 0)
+        val signext_a = FillInterleaved(8, leftOR(signbit_a))
+        val signext_d = FillInterleaved(8, leftOR(signbit_d))
+        // NOTE: sign-extension does not change the relative ordering in EITHER unsigned or signed arithmetic
+        val wide_mask = FillInterleaved(8, mask)
+        val a_a_ext = (a_a & wide_mask) | signext_a
+        val a_d_ext = (a_d & wide_mask) | signext_d
+        val a_d_inv = Mux(adder, a_d_ext, ~a_d_ext)
+        val adder_out = a_a_ext + a_d_inv
+        val h = 8*beatBytes-1 // now sign-extended; use biggest bit
+        val a_bigger_uneq = unsigned === a_a_ext(h) // result if high bits are unequal
+        val a_bigger = Mux(a_a_ext(h) === a_d_ext(h), !adder_out(h), a_bigger_uneq)
+        val pick_a = take_max === a_bigger
+        val arith_out = Mux(adder, adder_out, Mux(pick_a, a_a, a_d))

-      // Potentially take the message from the CAM
-      val source_c = Wire(in.a)
-      source_c.valid := a_cam_any_put
-      source_c.bits := edgeOut.Put(a_cam_a.bits.source, edgeIn.address(a_cam_a.bits), a_cam_a.bits.size, amo_data)._2
+        // AMO result data
+        val amo_data =
+          if (!logical)    arith_out else
+          if (!arithmetic) logic_out else
+          Mux(a_cam_a.bits.opcode(0), logic_out, arith_out)

-      // Finishing an AMO from the CAM has highest priority
-      TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), source_c), (edgeOut.numBeats1(in.a.bits), source_i))
+        // Potentially mutate the message from inner
+        val source_i = Wire(in.a)
+        val a_allow = !a_cam_busy && (a_isSupported || a_cam_any_free)
+        in.a.ready := source_i.ready && a_allow
+        source_i.valid := in.a.valid && a_allow
+        source_i.bits  := in.a.bits
+        when (!a_isSupported) { // minimal mux difference
+          source_i.bits.opcode := TLMessages.Get
+          source_i.bits.param  := UInt(0)
+        }

-      // Capture the A state into the CAM
-      when (source_i.fire() && !a_isSupported) {
-        (a_cam_sel_free zip cam_a) foreach { case (en, r) =>
-          when (en) {
-            r.fifoId := a_fifoId
-            r.bits   := in.a.bits
-            r.lut    := MuxLookup(in.a.bits.param(1, 0), UInt(0, width = 4), Array(
-              TLAtomics.AND  -> UInt(0x8),
-              TLAtomics.OR   -> UInt(0xe),
-              TLAtomics.XOR  -> UInt(0x6),
-              TLAtomics.SWAP -> UInt(0xc)))
+        // Potentially take the message from the CAM
+        val source_c = Wire(in.a)
+        source_c.valid := a_cam_any_put
+        source_c.bits := edgeOut.Put(a_cam_a.bits.source, edgeIn.address(a_cam_a.bits), a_cam_a.bits.size, amo_data)._2
+
+        // Finishing an AMO from the CAM has highest priority
+        TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), source_c), (edgeOut.numBeats1(in.a.bits), source_i))
+
+        // Capture the A state into the CAM
+        when (source_i.fire() && !a_isSupported) {
+          (a_cam_sel_free zip cam_a) foreach { case (en, r) =>
+            when (en) {
+              r.fifoId := a_fifoId
+              r.bits   := in.a.bits
+              r.lut    := MuxLookup(in.a.bits.param(1, 0), UInt(0, width = 4), Array(
+                TLAtomics.AND  -> UInt(0x8),
+                TLAtomics.OR   -> UInt(0xe),
+                TLAtomics.XOR  -> UInt(0x6),
+                TLAtomics.SWAP -> UInt(0xc)))
+            }
+          }
+          (a_cam_sel_free zip cam_s) foreach { case (en, r) =>
+            when (en) {
+              r.state := GET
+            }
          }
        }
-        (a_cam_sel_free zip cam_s) foreach { case (en, r) =>
-          when (en) {
-            r.state := GET
+
+        // Advance the put state
+        when (source_c.fire()) {
+          (a_cam_sel_put zip cam_s) foreach { case (en, r) =>
+            when (en) {
+              r.state := ACK
+            }
          }
        }
-      }

-      // Advance the put state
-      when (source_c.fire()) {
-        (a_cam_sel_put zip cam_s) foreach { case (en, r) =>
-          when (en) {
-            r.state := ACK
+        // We need to deal with a potential D response in the same cycle as the A request
+        val d_cam_sel_raw = cam_a.map(_.bits.source === in.d.bits.source)
+        val d_cam_sel_match = (d_cam_sel_raw zip cam_dmatch) map { case (a,b) => a&&b }
+        val d_cam_data = Mux1H(d_cam_sel_match, cam_d.map(_.data))
+        val d_cam_sel_bypass = if (edgeOut.manager.minLatency > 0) Bool(false) else
+                               out.d.bits.source === in.a.bits.source && in.a.valid && !a_isSupported
+        val d_cam_sel = (a_cam_sel_free zip d_cam_sel_match) map { case (a,d) => Mux(d_cam_sel_bypass, a, d) }
+        val d_cam_sel_any = d_cam_sel_bypass || d_cam_sel_match.reduce(_ || _)
+        val d_ackd = out.d.bits.opcode === TLMessages.AccessAckData
+        val d_ack  = out.d.bits.opcode === TLMessages.AccessAck
+
+        when (out.d.fire()) {
+          (d_cam_sel zip cam_d) foreach { case (en, r) =>
+            when (en && d_ackd) {
+              r.data := out.d.bits.data
+            }
+          }
+          (d_cam_sel zip cam_s) foreach { case (en, r) =>
+            when (en) {
+              // Note: it is important that this comes AFTER the := GET, so we can go FREE=>GET=>AMO in one cycle
+              r.state := Mux(d_ackd, AMO, FREE)
+            }
          }
        }
-      }

-      // We need to deal with a potential D response in the same cycle as the A request
-      val d_cam_sel_raw = cam_a.map(_.bits.source === in.d.bits.source)
-      val d_cam_sel_match = (d_cam_sel_raw zip cam_dmatch) map { case (a,b) => a&&b }
-      val d_cam_data = Mux1H(d_cam_sel_match, cam_d.map(_.data))
-      val d_cam_sel_bypass = if (edgeOut.manager.minLatency > 0) Bool(false) else
-                             out.d.bits.source === in.a.bits.source && in.a.valid && !a_isSupported
-      val d_cam_sel = (a_cam_sel_free zip d_cam_sel_match) map { case (a,d) => Mux(d_cam_sel_bypass, a, d) }
-      val d_cam_sel_any = d_cam_sel_bypass || d_cam_sel_match.reduce(_ || _)
-      val d_ackd = out.d.bits.opcode === TLMessages.AccessAckData
-      val d_ack  = out.d.bits.opcode === TLMessages.AccessAck
+        val d_drop = d_ackd && d_cam_sel_any
+        val d_replace = d_ack && d_cam_sel_match.reduce(_ || _)

-      when (out.d.fire()) {
-        (d_cam_sel zip cam_d) foreach { case (en, r) =>
-          when (en && d_ackd) {
-            r.data := out.d.bits.data
-          }
-        }
-        (d_cam_sel zip cam_s) foreach { case (en, r) =>
-          when (en) {
-            // Note: it is important that this comes AFTER the := GET, so we can go FREE=>GET=>AMO in one cycle
-            r.state := Mux(d_ackd, AMO, FREE)
-          }
+        in.d.valid := out.d.valid && !d_drop
+        out.d.ready := in.d.ready || d_drop
+
+        in.d.bits := out.d.bits
+        when (d_replace) { // minimal muxes
+          in.d.bits.opcode := TLMessages.AccessAckData
+          in.d.bits.data := d_cam_data
        }
+      } else {
+        out.a.valid := in.a.valid
+        in.a.ready := out.a.ready
+        out.a.bits := in.a.bits
+
+        in.d.valid := out.d.valid
+        out.d.ready := in.d.ready
+        in.d.bits := out.d.bits
      }

-      val d_drop = d_ackd && d_cam_sel_any
-      val d_replace = d_ack && d_cam_sel_match.reduce(_ || _)
+      if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) {
+        in.b.valid := out.b.valid
+        out.b.ready := in.b.ready
+        in.b.bits := out.b.bits

-      in.d.valid := out.d.valid && !d_drop
-      out.d.ready := in.d.ready || d_drop
+        out.c.valid := in.c.valid
+        in.c.ready := out.c.ready
+        out.c.bits := in.c.bits

-      in.d.bits := out.d.bits
-      when (d_replace) { // minimal muxes
-        in.d.bits.opcode := TLMessages.AccessAckData
-        in.d.bits.data := d_cam_data
+        out.e.valid := in.e.valid
+        in.e.ready := out.e.ready
+        out.e.bits := in.e.bits
+      } else {
+        in.b.valid := Bool(false)
+        in.c.ready := Bool(true)
+        in.e.ready := Bool(true)
+        out.b.ready := Bool(true)
+        out.c.valid := Bool(false)
+        out.e.valid := Bool(false)
      }
-    } else {
-      out.a.valid := in.a.valid
-      in.a.ready := out.a.ready
-      out.a.bits := in.a.bits
-
-      in.d.valid := out.d.valid
-      out.d.ready := in.d.ready
-      in.d.bits := out.d.bits
-    }
-
-    if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) {
-      in.b.valid := out.b.valid
-      out.b.ready := in.b.ready
-      in.b.bits := out.b.bits
-
-      out.c.valid := in.c.valid
-      in.c.ready := out.c.ready
-      out.c.bits := in.c.bits
-
-      out.e.valid := in.e.valid
-      in.e.ready := out.e.ready
-      out.e.bits := in.e.bits
-    } else {
-      in.b.valid := Bool(false)
-      in.c.ready := Bool(true)
-      in.e.ready := Bool(true)
-      out.b.ready := Bool(true)
-      out.c.valid := Bool(false)
-      out.e.valid := Bool(false)
    }
  }
 }
@@ -284,6 +272,20 @@ object TLAtomicAutomata
    atomics.node := x
    atomics.node
  }
+
+  case class CAMParams(a: TLBundleParameters, domainsNeedingHelp: Int)
+
+  class CAM_S(params: CAMParams) extends GenericParameterizedBundle(params) {
+    val state = UInt(width = 2)
+  }
+  class CAM_A(params: CAMParams) extends GenericParameterizedBundle(params) {
+    val bits    = new TLBundleA(params.a)
+    val fifoId  = UInt(width = log2Up(params.domainsNeedingHelp))
+    val lut     = UInt(width = 4)
+  }
+  class CAM_D(params: CAMParams) extends GenericParameterizedBundle(params) {
+    val data = UInt(width = params.a.dataBits)
+  }
 }

 /** Synthesizeable unit tests */
--- a/src/main/scala/uncore/tilelink2/Broadcast.scala
+++ b/src/main/scala/uncore/tilelink2/Broadcast.scala
@@ -13,11 +13,11 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa
  require (numTrackers > 0)

  val node = TLAdapterNode(
-    clientFn  = { case Seq(cp) =>
+    clientFn  = { cp =>
      cp.copy(clients = Seq(TLClientParameters(
        sourceId = IdRange(0, 1 << log2Ceil(cp.endSourceId*4)))))
    },
-    managerFn = { case Seq(mp) =>
+    managerFn = { mp =>
      mp.copy(
        endSinkId  = numTrackers,
        managers   = mp.managers.map { m =>
@@ -56,154 +56,152 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa
      val out = node.bundleOut
    }

-    val in = io.in(0)
-    val out = io.out(0)
-    val edgeIn = node.edgesIn(0)
-    val edgeOut = node.edgesOut(0)
-    val clients = edgeIn.client.clients
-    val managers = edgeOut.manager.managers
-    val lineShift = log2Ceil(lineBytes)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      val clients = edgeIn.client.clients
+      val managers = edgeOut.manager.managers
+      val lineShift = log2Ceil(lineBytes)

-    import TLBroadcastConstants._
+      import TLBroadcastConstants._

-    require (lineBytes >= edgeOut.manager.beatBytes)
-    // For the probe walker, we need to identify all the caches
-    val caches = clients.filter(_.supportsProbe).map(_.sourceId)
-    val cache_targets = caches.map(c => UInt(c.start))
+      require (lineBytes >= edgeOut.manager.beatBytes)
+      // For the probe walker, we need to identify all the caches
+      val caches = clients.filter(_.supportsProbe).map(_.sourceId)
+      val cache_targets = caches.map(c => UInt(c.start))

-    // Create the request tracker queues
-    val trackers = Seq.tabulate(numTrackers) { id =>
-      Module(new TLBroadcastTracker(id, lineBytes, log2Up(caches.size+1), bufferless, edgeIn, edgeOut)).io
+      // Create the request tracker queues
+      val trackers = Seq.tabulate(numTrackers) { id =>
+        Module(new TLBroadcastTracker(id, lineBytes, log2Up(caches.size+1), bufferless, edgeIn, edgeOut)).io
+      }
+
+      // We always accept E
+      in.e.ready := Bool(true)
+      (trackers zip UIntToOH(in.e.bits.sink).toBools) foreach { case (tracker, select) =>
+        tracker.e_last := select && in.e.fire()
+      }
+
+      // Depending on the high source bits, we might transform D
+      val d_high = log2Ceil(edgeIn.client.endSourceId)
+      val d_what = out.d.bits.source(d_high+1, d_high)
+      val d_drop = d_what === DROP
+      val d_hasData = edgeOut.hasData(out.d.bits)
+      val d_normal = Wire(in.d)
+      val d_trackerOH = Vec(trackers.map { t => !t.idle && t.source === d_normal.bits.source }).asUInt
+
+      assert (!out.d.valid || !d_drop || out.d.bits.opcode === TLMessages.AccessAck)
+
+      out.d.ready := d_normal.ready || d_drop
+      d_normal.valid := out.d.valid && !d_drop
+      d_normal.bits := out.d.bits // truncates source
+      when (d_what(1)) { // TRANSFORM_*
+        d_normal.bits.opcode := Mux(d_hasData, TLMessages.GrantData, TLMessages.ReleaseAck)
+        d_normal.bits.param  := Mux(d_hasData, Mux(d_what(0), TLPermissions.toT, TLPermissions.toB), UInt(0))
+      }
+      d_normal.bits.sink := OHToUInt(d_trackerOH)
+      assert (!d_normal.valid || (d_trackerOH.orR() || d_normal.bits.opcode === TLMessages.ReleaseAck))
+
+      // A tracker response is anything neither dropped nor a ReleaseAck
+      val d_response = d_hasData || !d_what(1)
+      val d_last = edgeIn.last(d_normal)
+      (trackers zip d_trackerOH.toBools) foreach { case (tracker, select) =>
+        tracker.d_last := select && d_normal.fire() && d_response && d_last
+        tracker.probedack := select && out.d.fire() && d_drop
+      }
+
+      // Incoming C can be:
+      // ProbeAck     => decrement tracker, drop 
+      // ProbeAckData => decrement tracker, send out A as PutFull(DROP)
+      // ReleaseData  =>                    send out A as PutFull(TRANSFORM)
+      // Release      => send out D as ReleaseAck
+
+      val c_probeack     = in.c.bits.opcode === TLMessages.ProbeAck
+      val c_probeackdata = in.c.bits.opcode === TLMessages.ProbeAckData
+      val c_releasedata  = in.c.bits.opcode === TLMessages.ReleaseData
+      val c_release      = in.c.bits.opcode === TLMessages.Release
+      val c_trackerOH    = trackers.map { t => t.line === (in.c.bits.address >> lineShift) }
+      val c_trackerSrc   = Mux1H(c_trackerOH, trackers.map { _.source })
+
+      // Decrement the tracker's outstanding probe counter
+      (trackers zip c_trackerOH) foreach { case (tracker, select) =>
+        tracker.probenack := in.c.fire() && c_probeack && select
+      }
+
+      val releaseack = Wire(in.d)
+      val putfull = Wire(out.a)
+
+      in.c.ready := c_probeack || Mux(c_release, releaseack.ready, putfull.ready)
+
+      releaseack.valid := in.c.valid && c_release
+      releaseack.bits  := edgeIn.ReleaseAck(in.c.bits.address, UInt(0), in.c.bits.source, in.c.bits.size)
+
+      val put_what = Mux(c_releasedata, TRANSFORM_B, DROP)
+      val put_who  = Mux(c_releasedata, in.c.bits.source, c_trackerSrc)
+      putfull.valid := in.c.valid && (c_probeackdata || c_releasedata)
+      putfull.bits := edgeOut.Put(Cat(put_what, put_who), in.c.bits.address, in.c.bits.size, in.c.bits.data)._2
+
+      // Combine ReleaseAck or the modified D
+      TLArbiter.lowest(edgeOut, in.d, releaseack, d_normal)
+      // Combine the PutFull with the trackers
+      TLArbiter.lowestFromSeq(edgeOut, out.a, putfull +: trackers.map(_.out_a))
+
+      // The Probe FSM walks all caches and probes them
+      val probe_todo = RegInit(UInt(0, width = max(1, caches.size)))
+      val probe_line = Reg(UInt())
+      val probe_perms = Reg(UInt(width = 2))
+      val probe_next = probe_todo & ~(leftOR(probe_todo) << 1)
+      val probe_busy = probe_todo.orR()
+      val probe_target = if (caches.size == 0) UInt(0) else Mux1H(probe_next, cache_targets)
+
+      // Probe whatever the FSM wants to do next
+      in.b.valid := probe_busy
+      if (caches.size != 0) {
+        in.b.bits := edgeIn.Probe(probe_line << lineShift, probe_target, UInt(lineShift), probe_perms)._2
+      }
+      when (in.b.fire()) { probe_todo := probe_todo & ~probe_next }
+
+      // Which cache does a request come from?
+      val a_cache = if (caches.size == 0) UInt(1) else Vec(caches.map(_.contains(in.a.bits.source))).asUInt
+      val a_first = edgeIn.first(in.a)
+
+      // To accept a request from A, the probe FSM must be idle and there must be a matching tracker
+      val freeTrackers = Vec(trackers.map { t => t.idle }).asUInt
+      val freeTracker = freeTrackers.orR()
+      val matchTrackers = Vec(trackers.map { t => t.line === in.a.bits.address >> lineShift }).asUInt
+      val matchTracker = matchTrackers.orR()
+      val allocTracker = freeTrackers & ~(leftOR(freeTrackers) << 1)
+      val selectTracker = Mux(matchTracker, matchTrackers, allocTracker)
+
+      val trackerReady = Vec(trackers.map(_.in_a.ready)).asUInt
+      in.a.ready := (!a_first || !probe_busy) && (selectTracker & trackerReady).orR()
+      (trackers zip selectTracker.toBools) foreach { case (t, select) =>
+        t.in_a.valid := in.a.valid && select && (!a_first || !probe_busy)
+        t.in_a.bits := in.a.bits
+        t.in_a_first := a_first
+        t.probe := (if (caches.size == 0) UInt(0) else Mux(a_cache.orR(), UInt(caches.size-1), UInt(caches.size)))
+      }
+
+      when (in.a.fire() && a_first) {
+        probe_todo  := ~a_cache // probe all but the cache who poked us
+        probe_line  := in.a.bits.address >> lineShift
+        probe_perms := MuxLookup(in.a.bits.opcode, Wire(UInt(width = 2)), Array(
+          TLMessages.PutFullData    -> TLPermissions.toN,
+          TLMessages.PutPartialData -> TLPermissions.toN,
+          TLMessages.ArithmeticData -> TLPermissions.toN,
+          TLMessages.LogicalData    -> TLPermissions.toN,
+          TLMessages.Get            -> TLPermissions.toB,
+          TLMessages.Hint           -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
+            TLHints.PREFETCH_READ   -> TLPermissions.toB,
+            TLHints.PREFETCH_WRITE  -> TLPermissions.toN)),
+          TLMessages.Acquire        -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
+            TLPermissions.NtoB      -> TLPermissions.toB,
+            TLPermissions.NtoT      -> TLPermissions.toN,
+            TLPermissions.BtoT      -> TLPermissions.toN))))
+      }
+
+      // The outer TL connections may not be cached
+      out.b.ready := Bool(true)
+      out.c.valid := Bool(false)
+      out.e.valid := Bool(false)
    }
-
-    // We always accept E
-    in.e.ready := Bool(true)
-    (trackers zip UIntToOH(in.e.bits.sink).toBools) foreach { case (tracker, select) =>
-      tracker.e_last := select && in.e.fire()
-    }
-
-    // Depending on the high source bits, we might transform D
-    val d_high = log2Ceil(edgeIn.client.endSourceId)
-    val d_what = out.d.bits.source(d_high+1, d_high)
-    val d_drop = d_what === DROP
-    val d_hasData = edgeOut.hasData(out.d.bits)
-    val d_normal = Wire(in.d)
-    val d_trackerOH = Vec(trackers.map { t => !t.idle && t.source === d_normal.bits.source }).asUInt
-
-    assert (!out.d.valid || !d_drop || out.d.bits.opcode === TLMessages.AccessAck)
-
-    out.d.ready := d_normal.ready || d_drop
-    d_normal.valid := out.d.valid && !d_drop
-    d_normal.bits := out.d.bits // truncates source
-    when (d_what(1)) { // TRANSFORM_*
-      d_normal.bits.opcode := Mux(d_hasData, TLMessages.GrantData, TLMessages.ReleaseAck)
-      d_normal.bits.param  := Mux(d_hasData, Mux(d_what(0), TLPermissions.toT, TLPermissions.toB), UInt(0))
-    }
-    d_normal.bits.sink := OHToUInt(d_trackerOH)
-    assert (!d_normal.valid || (d_trackerOH.orR() || d_normal.bits.opcode === TLMessages.ReleaseAck))
-
-    // A tracker response is anything neither dropped nor a ReleaseAck
-    val d_response = d_hasData || !d_what(1)
-    val d_last = edgeIn.last(d_normal)
-    (trackers zip d_trackerOH.toBools) foreach { case (tracker, select) =>
-      tracker.d_last := select && d_normal.fire() && d_response && d_last
-      tracker.probedack := select && out.d.fire() && d_drop
-    }
-
-    // Incoming C can be:
-    // ProbeAck     => decrement tracker, drop 
-    // ProbeAckData => decrement tracker, send out A as PutFull(DROP)
-    // ReleaseData  =>                    send out A as PutFull(TRANSFORM)
-    // Release      => send out D as ReleaseAck
-
-    val c_probeack     = in.c.bits.opcode === TLMessages.ProbeAck
-    val c_probeackdata = in.c.bits.opcode === TLMessages.ProbeAckData
-    val c_releasedata  = in.c.bits.opcode === TLMessages.ReleaseData
-    val c_release      = in.c.bits.opcode === TLMessages.Release
-    val c_trackerOH    = trackers.map { t => t.line === (in.c.bits.address >> lineShift) }
-    val c_trackerSrc   = Mux1H(c_trackerOH, trackers.map { _.source })
-
-    // Decrement the tracker's outstanding probe counter
-    (trackers zip c_trackerOH) foreach { case (tracker, select) =>
-      tracker.probenack := in.c.fire() && c_probeack && select
-    }
-
-    val releaseack = Wire(in.d)
-    val putfull = Wire(out.a)
-
-    in.c.ready := c_probeack || Mux(c_release, releaseack.ready, putfull.ready)
-
-    releaseack.valid := in.c.valid && c_release
-    releaseack.bits  := edgeIn.ReleaseAck(in.c.bits.address, UInt(0), in.c.bits.source, in.c.bits.size)
-
-    val put_what = Mux(c_releasedata, TRANSFORM_B, DROP)
-    val put_who  = Mux(c_releasedata, in.c.bits.source, c_trackerSrc)
-    putfull.valid := in.c.valid && (c_probeackdata || c_releasedata)
-    putfull.bits := edgeOut.Put(Cat(put_what, put_who), in.c.bits.address, in.c.bits.size, in.c.bits.data)._2
-
-    // Combine ReleaseAck or the modified D
-    TLArbiter.lowest(edgeOut, in.d, releaseack, d_normal)
-    // Combine the PutFull with the trackers
-    TLArbiter.lowestFromSeq(edgeOut, out.a, putfull +: trackers.map(_.out_a))
-
-    // The Probe FSM walks all caches and probes them
-    val probe_todo = RegInit(UInt(0, width = max(1, caches.size)))
-    val probe_line = Reg(UInt())
-    val probe_perms = Reg(UInt(width = 2))
-    val probe_next = probe_todo & ~(leftOR(probe_todo) << 1)
-    val probe_busy = probe_todo.orR()
-    val probe_target = if (caches.size == 0) UInt(0) else Mux1H(probe_next, cache_targets)
-
-    // Probe whatever the FSM wants to do next
-    in.b.valid := probe_busy
-    if (caches.size != 0) {
-      in.b.bits := edgeIn.Probe(probe_line << lineShift, probe_target, UInt(lineShift), probe_perms)._2
-    }
-    when (in.b.fire()) { probe_todo := probe_todo & ~probe_next }
-
-    // Which cache does a request come from?
-    val a_cache = if (caches.size == 0) UInt(1) else Vec(caches.map(_.contains(in.a.bits.source))).asUInt
-    val a_first = edgeIn.first(in.a)
-
-    // To accept a request from A, the probe FSM must be idle and there must be a matching tracker
-    val freeTrackers = Vec(trackers.map { t => t.idle }).asUInt
-    val freeTracker = freeTrackers.orR()
-    val matchTrackers = Vec(trackers.map { t => t.line === in.a.bits.address >> lineShift }).asUInt
-    val matchTracker = matchTrackers.orR()
-    val allocTracker = freeTrackers & ~(leftOR(freeTrackers) << 1)
-    val selectTracker = Mux(matchTracker, matchTrackers, allocTracker)
-
-    val trackerReady = Vec(trackers.map(_.in_a.ready)).asUInt
-    in.a.ready := (!a_first || !probe_busy) && (selectTracker & trackerReady).orR()
-    (trackers zip selectTracker.toBools) foreach { case (t, select) =>
-      t.in_a.valid := in.a.valid && select && (!a_first || !probe_busy)
-      t.in_a.bits := in.a.bits
-      t.in_a_first := a_first
-      t.probe := (if (caches.size == 0) UInt(0) else Mux(a_cache.orR(), UInt(caches.size-1), UInt(caches.size)))
-    }
-
-    when (in.a.fire() && a_first) {
-      probe_todo  := ~a_cache // probe all but the cache who poked us
-      probe_line  := in.a.bits.address >> lineShift
-      probe_perms := MuxLookup(in.a.bits.opcode, Wire(UInt(width = 2)), Array(
-        TLMessages.PutFullData    -> TLPermissions.toN,
-        TLMessages.PutPartialData -> TLPermissions.toN,
-        TLMessages.ArithmeticData -> TLPermissions.toN,
-        TLMessages.LogicalData    -> TLPermissions.toN,
-        TLMessages.Get            -> TLPermissions.toB,
-        TLMessages.Hint           -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
-          TLHints.PREFETCH_READ   -> TLPermissions.toB,
-          TLHints.PREFETCH_WRITE  -> TLPermissions.toN)),
-        TLMessages.Acquire        -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
-          TLPermissions.NtoB      -> TLPermissions.toB,
-          TLPermissions.NtoT      -> TLPermissions.toN,
-          TLPermissions.BtoT      -> TLPermissions.toN))))
-    }
-
-    // The outer TL connections may not be cached
-    out.b.ready := Bool(true)
-    out.c.valid := Bool(false)
-    out.e.valid := Bool(false)
  }
 }

--- a/src/main/scala/uncore/tilelink2/Buffer.scala
+++ b/src/main/scala/uncore/tilelink2/Buffer.scala
@@ -18,8 +18,8 @@ class TLBuffer(a: Int = 2, b: Int = 2, c: Int = 2, d: Int = 2, e: Int = 2, pipe:
  require (e >= 0)

  val node = TLAdapterNode(
-    clientFn  = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,b) + min(1,c)) },
-    managerFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,a) + min(1,d)) })
+    clientFn  = { p => p.copy(minLatency = p.minLatency + min(1,b) + min(1,c)) },
+    managerFn = { p => p.copy(minLatency = p.minLatency + min(1,a) + min(1,d)) })

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
--- a/src/main/scala/uncore/tilelink2/CacheCork.scala
+++ b/src/main/scala/uncore/tilelink2/CacheCork.scala
@@ -12,10 +12,10 @@ import TLMessages._
 class TLCacheCork(unsafe: Boolean = false)(implicit p: Parameters) extends LazyModule
 {
  val node = TLAdapterNode(
-    clientFn  = { case Seq(cp) =>
+    clientFn  = { case cp =>
      cp.copy(clients = cp.clients.map { c => c.copy(
        sourceId = IdRange(c.sourceId.start*2, c.sourceId.end*2))})},
-    managerFn = { case Seq(mp) =>
+    managerFn = { case mp =>
      mp.copy(managers = mp.managers.map { m => m.copy(
        regionType         = if (m.regionType == RegionType.UNCACHED) RegionType.TRACKED else m.regionType,
        supportsAcquireB   = m.supportsGet,
@@ -27,93 +27,89 @@ class TLCacheCork(unsafe: Boolean = false)(implicit p: Parameters) extends LazyM
      val out = node.bundleOut
    }

-    val edgeIn = node.edgesIn(0)
-    val edgeOut = node.edgesOut(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      require (edgeIn.client.clients.size == 1 || unsafe, "Only one client can safely use a TLCacheCork")
+      require (edgeIn.client.clients.filter(_.supportsProbe).size == 1, "Only one caching client allowed")
+      edgeOut.manager.managers.foreach { case m =>
+        require (!m.supportsAcquireB, "Cannot support caches beyond the Cork")
+      }

-    require (edgeIn.client.clients.size == 1 || unsafe, "Only one client can safely use a TLCacheCork")
-    require (edgeIn.client.clients.filter(_.supportsProbe).size == 1, "Only one caching client allowed")
-    edgeOut.manager.managers.foreach { case m =>
-      require (!m.supportsAcquireB, "Cannot support caches beyond the Cork")
+      // The Cork turns [Acquire=>Get] => [AccessAckData=>GrantData]
+      //            and [ReleaseData=>PutFullData] => [AccessAck=>ReleaseAck]
+      // We need to encode information sufficient to reverse the transformation in output.
+      // A caveat is that we get Acquire+Release with the same source and must keep the
+      // source unique after transformation onto the A channel.
+      // The coding scheme is:
+      //   Put: 1, Release: 0 => AccessAck
+      //   *: 0, Acquire: 1 => AccessAckData
+
+      // Take requests from A to A
+      val isPut = in.a.bits.opcode === PutFullData || in.a.bits.opcode === PutPartialData
+      val a_a = Wire(out.a)
+      a_a <> in.a
+      a_a.bits.source := in.a.bits.source << 1 | Mux(isPut, UInt(1), UInt(0))
+
+      // Transform Acquire into Get
+      when (in.a.bits.opcode === Acquire) {
+        a_a.bits.opcode := Get
+        a_a.bits.param  := UInt(0)
+        a_a.bits.source := in.a.bits.source << 1 | UInt(1)
+      }
+
+      // Take ReleaseData from C to A; Release from C to D
+      val c_a = Wire(out.a)
+      c_a.valid := in.c.valid && in.c.bits.opcode === ReleaseData
+      c_a.bits.opcode  := PutFullData
+      c_a.bits.param   := UInt(0)
+      c_a.bits.size    := in.c.bits.size
+      c_a.bits.source  := in.c.bits.source << 1
+      c_a.bits.address := in.c.bits.address
+      c_a.bits.mask    := edgeOut.mask(in.c.bits.address, in.c.bits.size)
+      c_a.bits.data    := in.c.bits.data
+
+      val c_d = Wire(in.d)
+      c_d.valid := in.c.valid && in.c.bits.opcode === Release
+      c_d.bits.opcode  := ReleaseAck
+      c_d.bits.param   := UInt(0)
+      c_d.bits.size    := in.c.bits.size
+      c_d.bits.source  := in.c.bits.source
+      c_d.bits.sink    := UInt(0)
+      c_d.bits.addr_lo := in.c.bits.address
+      c_d.bits.data    := UInt(0)
+      c_d.bits.error   := Bool(false)
+
+      assert (!in.c.valid || in.c.bits.opcode === Release || in.c.bits.opcode === ReleaseData)
+      in.c.ready := Mux(in.c.bits.opcode === Release, c_d.ready, c_a.ready)
+
+      // Discard E
+      in.e.ready := Bool(true)
+
+      // Block B; should never happen
+      out.b.ready := Bool(false)
+      assert (!out.b.valid)
+
+      // Take responses from D and transform them
+      val d_d = Wire(in.d)
+      d_d <> out.d
+      d_d.bits.source := out.d.bits.source >> 1
+
+      when (out.d.bits.opcode === AccessAckData && out.d.bits.source(0)) {
+        d_d.bits.opcode := GrantData
+        d_d.bits.param  := TLPermissions.toT
+      }
+      when (out.d.bits.opcode === AccessAck && !out.d.bits.source(0)) {
+        d_d.bits.opcode := ReleaseAck
+      }
+
+      // Combine the sources of messages into the channels
+      TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (edgeOut.numBeats1(c_a.bits), c_a), (edgeOut.numBeats1(a_a.bits), a_a))
+      TLArbiter(TLArbiter.lowestIndexFirst)(in.d,  (edgeIn .numBeats1(d_d.bits), d_d), (UInt(0), Queue(c_d, 2)))
+
+      // Tie off unused ports
+      in.b.valid := Bool(false)
+      out.c.valid := Bool(false)
+      out.e.valid := Bool(false)
    }
-
-    val out = io.out(0)
-    val in = io.in(0)
-
-    // The Cork turns [Acquire=>Get] => [AccessAckData=>GrantData]
-    //            and [ReleaseData=>PutFullData] => [AccessAck=>ReleaseAck]
-    // We need to encode information sufficient to reverse the transformation in output.
-    // A caveat is that we get Acquire+Release with the same source and must keep the
-    // source unique after transformation onto the A channel.
-    // The coding scheme is:
-    //   Put: 1, Release: 0 => AccessAck
-    //   *: 0, Acquire: 1 => AccessAckData
-
-    // Take requests from A to A
-    val isPut = in.a.bits.opcode === PutFullData || in.a.bits.opcode === PutPartialData
-    val a_a = Wire(out.a)
-    a_a <> in.a
-    a_a.bits.source := in.a.bits.source << 1 | Mux(isPut, UInt(1), UInt(0))
-
-    // Transform Acquire into Get
-    when (in.a.bits.opcode === Acquire) {
-      a_a.bits.opcode := Get
-      a_a.bits.param  := UInt(0)
-      a_a.bits.source := in.a.bits.source << 1 | UInt(1)
-    }
-
-    // Take ReleaseData from C to A; Release from C to D
-    val c_a = Wire(out.a)
-    c_a.valid := in.c.valid && in.c.bits.opcode === ReleaseData
-    c_a.bits.opcode  := PutFullData
-    c_a.bits.param   := UInt(0)
-    c_a.bits.size    := in.c.bits.size
-    c_a.bits.source  := in.c.bits.source << 1
-    c_a.bits.address := in.c.bits.address
-    c_a.bits.mask    := edgeOut.mask(in.c.bits.address, in.c.bits.size)
-    c_a.bits.data    := in.c.bits.data
-
-    val c_d = Wire(in.d)
-    c_d.valid := in.c.valid && in.c.bits.opcode === Release
-    c_d.bits.opcode  := ReleaseAck
-    c_d.bits.param   := UInt(0)
-    c_d.bits.size    := in.c.bits.size
-    c_d.bits.source  := in.c.bits.source
-    c_d.bits.sink    := UInt(0)
-    c_d.bits.addr_lo := in.c.bits.address
-    c_d.bits.data    := UInt(0)
-    c_d.bits.error   := Bool(false)
-
-    assert (!in.c.valid || in.c.bits.opcode === Release || in.c.bits.opcode === ReleaseData)
-    in.c.ready := Mux(in.c.bits.opcode === Release, c_d.ready, c_a.ready)
-
-    // Discard E
-    in.e.ready := Bool(true)
-
-    // Block B; should never happen
-    out.b.ready := Bool(false)
-    assert (!out.b.valid)
-
-    // Take responses from D and transform them
-    val d_d = Wire(in.d)
-    d_d <> out.d
-    d_d.bits.source := out.d.bits.source >> 1
-
-    when (out.d.bits.opcode === AccessAckData && out.d.bits.source(0)) {
-      d_d.bits.opcode := GrantData
-      d_d.bits.param  := TLPermissions.toT
-    }
-    when (out.d.bits.opcode === AccessAck && !out.d.bits.source(0)) {
-      d_d.bits.opcode := ReleaseAck
-    }
-
-    // Combine the sources of messages into the channels
-    TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (edgeOut.numBeats1(c_a.bits), c_a), (edgeOut.numBeats1(a_a.bits), a_a))
-    TLArbiter(TLArbiter.lowestIndexFirst)(in.d,  (edgeIn .numBeats1(d_d.bits), d_d), (UInt(0), Queue(c_d, 2)))
-
-    // Tie off unused ports
-    in.b.valid := Bool(false)
-    out.c.valid := Bool(false)
-    out.e.valid := Bool(false)
  }
 }

--- a/src/main/scala/uncore/tilelink2/Filter.scala
+++ b/src/main/scala/uncore/tilelink2/Filter.scala
@@ -11,8 +11,8 @@ import scala.math.{min,max}
 class TLFilter(select: AddressSet)(implicit p: Parameters) extends LazyModule
 {
  val node = TLAdapterNode(
-    clientFn  = { case Seq(cp) => cp },
-    managerFn = { case Seq(mp) =>
+    clientFn  = { cp => cp },
+    managerFn = { mp =>
      mp.copy(managers = mp.managers.map { m =>
        val filtered = m.address.map(_.intersect(select)).flatten
        val alignment = select.alignment /* alignment 0 means 'select' selected everything */
--- a/src/main/scala/uncore/tilelink2/Fragmenter.scala
+++ b/src/main/scala/uncore/tilelink2/Fragmenter.scala
@@ -41,8 +41,8 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean =
    sourceId = IdRange(c.sourceId.start << fragmentBits, c.sourceId.end << fragmentBits))

  val node = TLAdapterNode(
-    clientFn  = { case Seq(c) => c.copy(clients = c.clients.map(mapClient)) },
-    managerFn = { case Seq(m) => m.copy(managers = m.managers.map(mapManager)) })
+    clientFn  = { c => c.copy(clients = c.clients.map(mapClient)) },
+    managerFn = { m => m.copy(managers = m.managers.map(mapManager)) })

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
@@ -50,204 +50,201 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean =
      val out = node.bundleOut
    }

-    // All managers must share a common FIFO domain (responses might end up interleaved)
-    val edgeOut   = node.edgesOut(0)
-    val edgeIn    = node.edgesIn(0)
-    val manager   = edgeOut.manager
-    val managers  = manager.managers
-    val beatBytes = manager.beatBytes
-    val fifoId = managers(0).fifoId
-    require (fifoId.isDefined && managers.map(_.fifoId == fifoId).reduce(_ && _))
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      // All managers must share a common FIFO domain (responses might end up interleaved)
+      val manager   = edgeOut.manager
+      val managers  = manager.managers
+      val beatBytes = manager.beatBytes
+      val fifoId = managers(0).fifoId
+      require (fifoId.isDefined && managers.map(_.fifoId == fifoId).reduce(_ && _))

-    // We don't support fragmenting to sub-beat accesses
-    require (minSize >= beatBytes)
-    // We can't support devices which are cached on both sides of us
-    require (!edgeOut.manager.anySupportAcquireB || !edgeIn.client.anySupportProbe)
+      // We don't support fragmenting to sub-beat accesses
+      require (minSize >= beatBytes)
+      // We can't support devices which are cached on both sides of us
+      require (!edgeOut.manager.anySupportAcquireB || !edgeIn.client.anySupportProbe)

-    /* The Fragmenter is a bit tricky, because there are 5 sizes in play:
-     *   max  size -- the maximum transfer size possible
-     *   orig size -- the original pre-fragmenter size
-     *   frag size -- the modified post-fragmenter size
-     *   min  size -- the threshold below which frag=orig
-     *   beat size -- the amount transfered on any given beat
-     *
-     * The relationships are as follows:
-     *   max >= orig >= frag
-     *   max >  min  >= beat
-     * It IS possible that orig <= min (then frag=orig; ie: no fragmentation)
-     *
-     * The fragment# (sent via TL.source) is measured in multiples of min size.
-     * Meanwhile, to track the progress, counters measure in multiples of beat size.
-     *
-     * Here is an example of a bus with max=256, min=8, beat=4 and a device supporting 16.
-     *
-     * in.A    out.A (frag#)  out.D (frag#)  in.D     gen# ack#
-     * get64   get16  6       ackD16  6      ackD64    12   15
-     *                        ackD16  6      ackD64         14
-     *                        ackD16  6      ackD64         13
-     *                        ackD16  6      ackD64         12
-     *         get16  4       ackD16  4      ackD64    8    11
-     *                        ackD16  4      ackD64         10
-     *                        ackD16  4      ackD64         9
-     *                        ackD16  4      ackD64         8
-     *         get16  2       ackD16  2      ackD64    4    7
-     *                        ackD16  2      ackD64         6
-     *                        ackD16  2      ackD64         5
-     *                        ackD16  2      ackD64         4
-     *         get16  0       ackD16  0      ackD64    0    3
-     *                        ackD16  0      ackD64         2
-     *                        ackD16  0      ackD64         1
-     *                        ackD16  0      ackD64         0
-     *
-     * get8    get8   0       ackD8   0      ackD8     0    1
-     *                        ackD8   0      ackD8          0
-     *
-     * get4    get4   0       ackD4   0      ackD4     0    0
-     * get1    get1   0       ackD1   0      ackD1     0    0
-     *
-     * put64   put16  6                                15   
-     * put64   put16  6                                14
-     * put64   put16  6                                13
-     * put64   put16  6       ack16   6                12    12
-     * put64   put16  4                                11
-     * put64   put16  4                                10
-     * put64   put16  4                                9
-     * put64   put16  4       ack16   4                8     8
-     * put64   put16  2                                7
-     * put64   put16  2                                6
-     * put64   put16  2                                5
-     * put64   put16  2       ack16   2                4     4
-     * put64   put16  0                                3
-     * put64   put16  0                                2
-     * put64   put16  0                                1
-     * put64   put16  0       ack16   0      ack64     0     0
-     *
-     * put8    put8   0                                1
-     * put8    put8   0       ack8    0      ack8      0     0
-     *
-     * put4    put4   0       ack4    0      ack4      0     0
-     * put1    put1   0       ack1    0      ack1      0     0
-     */
+      /* The Fragmenter is a bit tricky, because there are 5 sizes in play:
+       *   max  size -- the maximum transfer size possible
+       *   orig size -- the original pre-fragmenter size
+       *   frag size -- the modified post-fragmenter size
+       *   min  size -- the threshold below which frag=orig
+       *   beat size -- the amount transfered on any given beat
+       *
+       * The relationships are as follows:
+       *   max >= orig >= frag
+       *   max >  min  >= beat
+       * It IS possible that orig <= min (then frag=orig; ie: no fragmentation)
+       *
+       * The fragment# (sent via TL.source) is measured in multiples of min size.
+       * Meanwhile, to track the progress, counters measure in multiples of beat size.
+       *
+       * Here is an example of a bus with max=256, min=8, beat=4 and a device supporting 16.
+       *
+       * in.A    out.A (frag#)  out.D (frag#)  in.D     gen# ack#
+       * get64   get16  6       ackD16  6      ackD64    12   15
+       *                        ackD16  6      ackD64         14
+       *                        ackD16  6      ackD64         13
+       *                        ackD16  6      ackD64         12
+       *         get16  4       ackD16  4      ackD64    8    11
+       *                        ackD16  4      ackD64         10
+       *                        ackD16  4      ackD64         9
+       *                        ackD16  4      ackD64         8
+       *         get16  2       ackD16  2      ackD64    4    7
+       *                        ackD16  2      ackD64         6
+       *                        ackD16  2      ackD64         5
+       *                        ackD16  2      ackD64         4
+       *         get16  0       ackD16  0      ackD64    0    3
+       *                        ackD16  0      ackD64         2
+       *                        ackD16  0      ackD64         1
+       *                        ackD16  0      ackD64         0
+       *
+       * get8    get8   0       ackD8   0      ackD8     0    1
+       *                        ackD8   0      ackD8          0
+       *
+       * get4    get4   0       ackD4   0      ackD4     0    0
+       * get1    get1   0       ackD1   0      ackD1     0    0
+       *
+       * put64   put16  6                                15   
+       * put64   put16  6                                14
+       * put64   put16  6                                13
+       * put64   put16  6       ack16   6                12    12
+       * put64   put16  4                                11
+       * put64   put16  4                                10
+       * put64   put16  4                                9
+       * put64   put16  4       ack16   4                8     8
+       * put64   put16  2                                7
+       * put64   put16  2                                6
+       * put64   put16  2                                5
+       * put64   put16  2       ack16   2                4     4
+       * put64   put16  0                                3
+       * put64   put16  0                                2
+       * put64   put16  0                                1
+       * put64   put16  0       ack16   0      ack64     0     0
+       *
+       * put8    put8   0                                1
+       * put8    put8   0       ack8    0      ack8      0     0
+       *
+       * put4    put4   0       ack4    0      ack4      0     0
+       * put1    put1   0       ack1    0      ack1      0     0
+       */

-    val in = io.in(0)
-    val out = io.out(0)
+      val counterBits = log2Up(maxSize/beatBytes)
+      val maxDownSize = if (alwaysMin) minSize else manager.maxTransfer

-    val counterBits = log2Up(maxSize/beatBytes)
-    val maxDownSize = if (alwaysMin) minSize else manager.maxTransfer
+      // First, handle the return path
+      val acknum = RegInit(UInt(0, width = counterBits))
+      val dOrig = Reg(UInt())
+      val dFragnum = out.d.bits.source(fragmentBits-1, 0)
+      val dFirst = acknum === UInt(0)
+      val dsizeOH  = UIntToOH (out.d.bits.size, log2Ceil(maxDownSize)+1)
+      val dsizeOH1 = UIntToOH1(out.d.bits.size, log2Up(maxDownSize))
+      val dHasData = edgeOut.hasData(out.d.bits)

-    // First, handle the return path
-    val acknum = RegInit(UInt(0, width = counterBits))
-    val dOrig = Reg(UInt())
-    val dFragnum = out.d.bits.source(fragmentBits-1, 0)
-    val dFirst = acknum === UInt(0)
-    val dsizeOH  = UIntToOH (out.d.bits.size, log2Ceil(maxDownSize)+1)
-    val dsizeOH1 = UIntToOH1(out.d.bits.size, log2Up(maxDownSize))
-    val dHasData = edgeOut.hasData(out.d.bits)
+      // calculate new acknum
+      val acknum_fragment = dFragnum << log2Ceil(minSize/beatBytes)
+      val acknum_size = dsizeOH1 >> log2Ceil(beatBytes)
+      assert (!out.d.valid || (acknum_fragment & acknum_size) === UInt(0))
+      val dFirst_acknum = acknum_fragment | Mux(dHasData, acknum_size, UInt(0))
+      val ack_decrement = Mux(dHasData, UInt(1), dsizeOH >> log2Ceil(beatBytes))
+      // calculate the original size
+      val dFirst_size = OH1ToUInt((dFragnum << log2Ceil(minSize)) | dsizeOH1)

-    // calculate new acknum
-    val acknum_fragment = dFragnum << log2Ceil(minSize/beatBytes)
-    val acknum_size = dsizeOH1 >> log2Ceil(beatBytes)
-    assert (!out.d.valid || (acknum_fragment & acknum_size) === UInt(0))
-    val dFirst_acknum = acknum_fragment | Mux(dHasData, acknum_size, UInt(0))
-    val ack_decrement = Mux(dHasData, UInt(1), dsizeOH >> log2Ceil(beatBytes))
-    // calculate the original size
-    val dFirst_size = OH1ToUInt((dFragnum << log2Ceil(minSize)) | dsizeOH1)
+      when (out.d.fire()) {
+        acknum := Mux(dFirst, dFirst_acknum, acknum - ack_decrement)
+        when (dFirst) { dOrig := dFirst_size }
+      }

-    when (out.d.fire()) {
-      acknum := Mux(dFirst, dFirst_acknum, acknum - ack_decrement)
-      when (dFirst) { dOrig := dFirst_size }
+      // Swallow up non-data ack fragments
+      val drop = !dHasData && (dFragnum =/= UInt(0))
+      out.d.ready := in.d.ready || drop
+      in.d.valid  := out.d.valid && !drop
+      in.d.bits   := out.d.bits // pass most stuff unchanged
+      in.d.bits.addr_lo := out.d.bits.addr_lo & ~dsizeOH1
+      in.d.bits.source := out.d.bits.source >> fragmentBits
+      in.d.bits.size   := Mux(dFirst, dFirst_size, dOrig)
+
+      // Combine the error flag
+      val r_error = RegInit(Bool(false))
+      val d_error = r_error | out.d.bits.error
+      when (out.d.fire()) { r_error := Mux(drop, d_error, UInt(0)) }
+      in.d.bits.error := d_error
+
+      // What maximum transfer sizes do downstream devices support?
+      val maxArithmetics = managers.map(_.supportsArithmetic.max)
+      val maxLogicals    = managers.map(_.supportsLogical.max)
+      val maxGets        = managers.map(_.supportsGet.max)
+      val maxPutFulls    = managers.map(_.supportsPutFull.max)
+      val maxPutPartials = managers.map(_.supportsPutPartial.max)
+      val maxHints       = managers.map(m => if (m.supportsHint) maxDownSize else 0)
+
+      // We assume that the request is valid => size 0 is impossible
+      val lgMinSize = UInt(log2Ceil(minSize))
+      val maxLgArithmetics = maxArithmetics.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
+      val maxLgLogicals    = maxLogicals   .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
+      val maxLgGets        = maxGets       .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
+      val maxLgPutFulls    = maxPutFulls   .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
+      val maxLgPutPartials = maxPutPartials.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
+      val maxLgHints       = maxHints      .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
+
+      // Make the request repeatable
+      val repeater = Module(new Repeater(in.a.bits))
+      repeater.io.enq <> in.a
+      val in_a = repeater.io.deq
+
+      // If this is infront of a single manager, these become constants
+      val find = manager.findFast(edgeIn.address(in_a.bits))
+      val maxLgArithmetic  = Mux1H(find, maxLgArithmetics)
+      val maxLgLogical     = Mux1H(find, maxLgLogicals)
+      val maxLgGet         = Mux1H(find, maxLgGets)
+      val maxLgPutFull     = Mux1H(find, maxLgPutFulls)
+      val maxLgPutPartial  = Mux1H(find, maxLgPutPartials)
+      val maxLgHint        = Mux1H(find, maxLgHints)
+
+      val limit = if (alwaysMin) lgMinSize else 
+        MuxLookup(in_a.bits.opcode, lgMinSize, Array(
+          TLMessages.PutFullData    -> maxLgPutFull,
+          TLMessages.PutPartialData -> maxLgPutPartial,
+          TLMessages.ArithmeticData -> maxLgArithmetic,
+          TLMessages.LogicalData    -> maxLgLogical,
+          TLMessages.Get            -> maxLgGet,
+          TLMessages.Hint           -> maxLgHint))
+
+      val aOrig = in_a.bits.size
+      val aFrag = Mux(aOrig > limit, limit, aOrig)
+      val aOrigOH1 = UIntToOH1(aOrig, log2Ceil(maxSize))
+      val aFragOH1 = UIntToOH1(aFrag, log2Up(maxDownSize))
+      val aHasData = node.edgesIn(0).hasData(in_a.bits)
+      val aMask = Mux(aHasData, UInt(0), aFragOH1)
+
+      val gennum = RegInit(UInt(0, width = counterBits))
+      val aFirst = gennum === UInt(0)
+      val old_gennum1 = Mux(aFirst, aOrigOH1 >> log2Ceil(beatBytes), gennum - UInt(1))
+      val new_gennum = ~(~old_gennum1 | (aMask >> log2Ceil(beatBytes))) // ~(~x|y) is width safe
+      val aFragnum = ~(~(old_gennum1 >> log2Ceil(minSize/beatBytes)) | (aFragOH1 >> log2Ceil(minSize)))
+
+      when (out.a.fire()) { gennum := new_gennum }
+
+      repeater.io.repeat := !aHasData && aFragnum =/= UInt(0)
+      out.a <> in_a
+      out.a.bits.address := in_a.bits.address | (~aFragnum << log2Ceil(minSize) & aOrigOH1)
+      out.a.bits.source := Cat(in_a.bits.source, aFragnum)
+      out.a.bits.size := aFrag
+
+      // Optimize away some of the Repeater's registers
+      assert (!repeater.io.full || !aHasData)
+      out.a.bits.data := in.a.bits.data
+      val fullMask = UInt((BigInt(1) << beatBytes) - 1)
+      assert (!repeater.io.full || in_a.bits.mask === fullMask)
+      out.a.bits.mask := Mux(repeater.io.full, fullMask, in.a.bits.mask)
+
+      // Tie off unused channels
+      in.b.valid := Bool(false)
+      in.c.ready := Bool(true)
+      in.e.ready := Bool(true)
+      out.b.ready := Bool(true)
+      out.c.valid := Bool(false)
+      out.e.valid := Bool(false)
    }
-
-    // Swallow up non-data ack fragments
-    val drop = !dHasData && (dFragnum =/= UInt(0))
-    out.d.ready := in.d.ready || drop
-    in.d.valid  := out.d.valid && !drop
-    in.d.bits   := out.d.bits // pass most stuff unchanged
-    in.d.bits.addr_lo := out.d.bits.addr_lo & ~dsizeOH1
-    in.d.bits.source := out.d.bits.source >> fragmentBits
-    in.d.bits.size   := Mux(dFirst, dFirst_size, dOrig)
-
-    // Combine the error flag
-    val r_error = RegInit(Bool(false))
-    val d_error = r_error | out.d.bits.error
-    when (out.d.fire()) { r_error := Mux(drop, d_error, UInt(0)) }
-    in.d.bits.error := d_error
-
-    // What maximum transfer sizes do downstream devices support?
-    val maxArithmetics = managers.map(_.supportsArithmetic.max)
-    val maxLogicals    = managers.map(_.supportsLogical.max)
-    val maxGets        = managers.map(_.supportsGet.max)
-    val maxPutFulls    = managers.map(_.supportsPutFull.max)
-    val maxPutPartials = managers.map(_.supportsPutPartial.max)
-    val maxHints       = managers.map(m => if (m.supportsHint) maxDownSize else 0)
-
-    // We assume that the request is valid => size 0 is impossible
-    val lgMinSize = UInt(log2Ceil(minSize))
-    val maxLgArithmetics = maxArithmetics.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
-    val maxLgLogicals    = maxLogicals   .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
-    val maxLgGets        = maxGets       .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
-    val maxLgPutFulls    = maxPutFulls   .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
-    val maxLgPutPartials = maxPutPartials.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
-    val maxLgHints       = maxHints      .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
-
-    // Make the request repeatable
-    val repeater = Module(new Repeater(in.a.bits))
-    repeater.io.enq <> in.a
-    val in_a = repeater.io.deq
-
-    // If this is infront of a single manager, these become constants
-    val find = manager.findFast(edgeIn.address(in_a.bits))
-    val maxLgArithmetic  = Mux1H(find, maxLgArithmetics)
-    val maxLgLogical     = Mux1H(find, maxLgLogicals)
-    val maxLgGet         = Mux1H(find, maxLgGets)
-    val maxLgPutFull     = Mux1H(find, maxLgPutFulls)
-    val maxLgPutPartial  = Mux1H(find, maxLgPutPartials)
-    val maxLgHint        = Mux1H(find, maxLgHints)
-
-    val limit = if (alwaysMin) lgMinSize else 
-      MuxLookup(in_a.bits.opcode, lgMinSize, Array(
-        TLMessages.PutFullData    -> maxLgPutFull,
-        TLMessages.PutPartialData -> maxLgPutPartial,
-        TLMessages.ArithmeticData -> maxLgArithmetic,
-        TLMessages.LogicalData    -> maxLgLogical,
-        TLMessages.Get            -> maxLgGet,
-        TLMessages.Hint           -> maxLgHint))
-
-    val aOrig = in_a.bits.size
-    val aFrag = Mux(aOrig > limit, limit, aOrig)
-    val aOrigOH1 = UIntToOH1(aOrig, log2Ceil(maxSize))
-    val aFragOH1 = UIntToOH1(aFrag, log2Up(maxDownSize))
-    val aHasData = node.edgesIn(0).hasData(in_a.bits)
-    val aMask = Mux(aHasData, UInt(0), aFragOH1)
-
-    val gennum = RegInit(UInt(0, width = counterBits))
-    val aFirst = gennum === UInt(0)
-    val old_gennum1 = Mux(aFirst, aOrigOH1 >> log2Ceil(beatBytes), gennum - UInt(1))
-    val new_gennum = ~(~old_gennum1 | (aMask >> log2Ceil(beatBytes))) // ~(~x|y) is width safe
-    val aFragnum = ~(~(old_gennum1 >> log2Ceil(minSize/beatBytes)) | (aFragOH1 >> log2Ceil(minSize)))
-
-    when (out.a.fire()) { gennum := new_gennum }
-
-    repeater.io.repeat := !aHasData && aFragnum =/= UInt(0)
-    out.a <> in_a
-    out.a.bits.address := in_a.bits.address | (~aFragnum << log2Ceil(minSize) & aOrigOH1)
-    out.a.bits.source := Cat(in_a.bits.source, aFragnum)
-    out.a.bits.size := aFrag
-
-    // Optimize away some of the Repeater's registers
-    assert (!repeater.io.full || !aHasData)
-    out.a.bits.data := in.a.bits.data
-    val fullMask = UInt((BigInt(1) << beatBytes) - 1)
-    assert (!repeater.io.full || in_a.bits.mask === fullMask)
-    out.a.bits.mask := Mux(repeater.io.full, fullMask, in.a.bits.mask)
-
-    // Tie off unused channels
-    in.b.valid := Bool(false)
-    in.c.ready := Bool(true)
-    in.e.ready := Bool(true)
-    out.b.ready := Bool(true)
-    out.c.valid := Bool(false)
-    out.e.valid := Bool(false)
  }
 }

--- a/src/main/scala/uncore/tilelink2/HintHandler.scala
+++ b/src/main/scala/uncore/tilelink2/HintHandler.scala
@@ -12,8 +12,8 @@ import diplomacy._
 class TLHintHandler(supportManagers: Boolean = true, supportClients: Boolean = false, passthrough: Boolean = true)(implicit p: Parameters) extends LazyModule
 {
  val node = TLAdapterNode(
-    clientFn  = { case Seq(c) => if (!supportClients)  c else c.copy(minLatency = min(1, c.minLatency), clients  = c.clients .map(_.copy(supportsHint = TransferSizes(1, c.maxTransfer)))) },
-    managerFn = { case Seq(m) => if (!supportManagers) m else m.copy(minLatency = min(1, m.minLatency), managers = m.managers.map(_.copy(supportsHint = TransferSizes(1, m.maxTransfer)))) })
+    clientFn  = { c => if (!supportClients)  c else c.copy(minLatency = min(1, c.minLatency), clients  = c.clients .map(_.copy(supportsHint = TransferSizes(1, c.maxTransfer)))) },
+    managerFn = { m => if (!supportManagers) m else m.copy(minLatency = min(1, m.minLatency), managers = m.managers.map(_.copy(supportsHint = TransferSizes(1, m.maxTransfer)))) })

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
@@ -21,79 +21,76 @@ class TLHintHandler(supportManagers: Boolean = true, supportClients: Boolean = f
      val out = node.bundleOut
    }

-    val in  = io.in(0)
-    val out = io.out(0)
-    val edgeIn  = node.edgesIn(0)
-    val edgeOut = node.edgesOut(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      // Don't add support for clients if there is no BCE channel
+      val bce = edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe
+      require (!supportClients || bce)

-    // Don't add support for clients if there is no BCE channel
-    val bce = edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe
-    require (!supportClients || bce)
+      // Does it even make sense to add the HintHandler?
+      val smartClients = edgeIn.client.clients.map(_.supportsHint.max == edgeIn.client.maxTransfer).reduce(_&&_)
+      val smartManagers = edgeOut.manager.managers.map(_.supportsHint.max == edgeOut.manager.maxTransfer).reduce(_&&_)

-    // Does it even make sense to add the HintHandler?
-    val smartClients = edgeIn.client.clients.map(_.supportsHint.max == edgeIn.client.maxTransfer).reduce(_&&_)
-    val smartManagers = edgeOut.manager.managers.map(_.supportsHint.max == edgeOut.manager.maxTransfer).reduce(_&&_)
+      if (supportManagers && !(passthrough && smartManagers)) {
+        val address = edgeIn.address(in.a.bits)
+        val handleA = if (passthrough) !edgeOut.manager.supportsHintFast(address, edgeIn.size(in.a.bits)) else Bool(true)
+        val hintBitsAtA = handleA && in.a.bits.opcode === TLMessages.Hint
+        val hint = Wire(out.d)

-    if (supportManagers && !(passthrough && smartManagers)) {
-      val address = edgeIn.address(in.a.bits)
-      val handleA = if (passthrough) !edgeOut.manager.supportsHintFast(address, edgeIn.size(in.a.bits)) else Bool(true)
-      val hintBitsAtA = handleA && in.a.bits.opcode === TLMessages.Hint
-      val hint = Wire(out.d)
+        hint.valid  := in.a.valid &&  hintBitsAtA
+        out.a.valid := in.a.valid && !hintBitsAtA
+        in.a.ready := Mux(hintBitsAtA, hint.ready, out.a.ready)

-      hint.valid  := in.a.valid &&  hintBitsAtA
-      out.a.valid := in.a.valid && !hintBitsAtA
-      in.a.ready := Mux(hintBitsAtA, hint.ready, out.a.ready)
+        hint.bits := edgeIn.HintAck(in.a.bits, UInt(0))
+        out.a.bits := in.a.bits

-      hint.bits := edgeIn.HintAck(in.a.bits, UInt(0))
-      out.a.bits := in.a.bits
+        TLArbiter(TLArbiter.lowestIndexFirst)(in.d, (edgeOut.numBeats1(out.d.bits), out.d), (UInt(0), Queue(hint, 1)))
+      } else {
+        out.a.valid := in.a.valid
+        in.a.ready := out.a.ready
+        out.a.bits := in.a.bits

-      TLArbiter(TLArbiter.lowestIndexFirst)(in.d, (edgeOut.numBeats1(out.d.bits), out.d), (UInt(0), Queue(hint, 1)))
-    } else {
-      out.a.valid := in.a.valid
-      in.a.ready := out.a.ready
-      out.a.bits := in.a.bits
+        in.d.valid := out.d.valid
+        out.d.ready := in.d.ready
+        in.d.bits := out.d.bits
+      }

-      in.d.valid := out.d.valid
-      out.d.ready := in.d.ready
-      in.d.bits := out.d.bits
-    }
+      if (supportClients && !(passthrough && smartClients)) {
+        val handleB = if (passthrough) !edgeIn.client.supportsHint(out.b.bits.source, edgeOut.size(out.b.bits)) else Bool(true)
+        val hintBitsAtB = handleB && out.b.bits.opcode === TLMessages.Hint
+        val hint = Wire(in.c)

-    if (supportClients && !(passthrough && smartClients)) {
-      val handleB = if (passthrough) !edgeIn.client.supportsHint(out.b.bits.source, edgeOut.size(out.b.bits)) else Bool(true)
-      val hintBitsAtB = handleB && out.b.bits.opcode === TLMessages.Hint
-      val hint = Wire(in.c)
+        hint.valid := out.b.valid &&  hintBitsAtB
+        in.b.valid := out.b.valid && !hintBitsAtB
+        out.b.ready := Mux(hintBitsAtB, hint.ready, in.b.ready)

-      hint.valid := out.b.valid &&  hintBitsAtB
-      in.b.valid := out.b.valid && !hintBitsAtB
-      out.b.ready := Mux(hintBitsAtB, hint.ready, in.b.ready)
+        hint.bits := edgeOut.HintAck(out.b.bits)
+        in.b.bits := out.b.bits

-      hint.bits := edgeOut.HintAck(out.b.bits)
-      in.b.bits := out.b.bits
+        TLArbiter(TLArbiter.lowestIndexFirst)(out.c, (edgeIn.numBeats1(in.c.bits), in.c), (UInt(0), Queue(hint, 1)))
+      } else if (bce) {
+        in.b.valid := out.b.valid
+        out.b.ready := in.b.ready
+        in.b.bits := out.b.bits

-      TLArbiter(TLArbiter.lowestIndexFirst)(out.c, (edgeIn.numBeats1(in.c.bits), in.c), (UInt(0), Queue(hint, 1)))
-    } else if (bce) {
-      in.b.valid := out.b.valid
-      out.b.ready := in.b.ready
-      in.b.bits := out.b.bits
+        out.c.valid := in.c.valid
+        in.c.ready := out.c.ready
+        out.c.bits := in.c.bits
+      } else {
+        in.b.valid := Bool(false)
+        in.c.ready := Bool(true)
+        out.b.ready := Bool(true)
+        out.c.valid := Bool(false)
+      }

-      out.c.valid := in.c.valid
-      in.c.ready := out.c.ready
-      out.c.bits := in.c.bits
-    } else {
-      in.b.valid := Bool(false)
-      in.c.ready := Bool(true)
-      out.b.ready := Bool(true)
-      out.c.valid := Bool(false)
-    }
-
-    if (bce) {
-      // Pass E through unchanged
-      out.e.valid := in.e.valid
-      in.e.ready := out.e.ready
-      out.e.bits := in.e.bits
-    } else {
-      in.e.ready := Bool(true)
-      out.e.valid := Bool(false)
+      if (bce) {
+        // Pass E through unchanged
+        out.e.valid := in.e.valid
+        in.e.ready := out.e.ready
+        out.e.bits := in.e.bits
+      } else {
+        in.e.ready := Bool(true)
+        out.e.valid := Bool(false)
+      }
    }
  }
 }
--- a/src/main/scala/uncore/tilelink2/IntNodes.scala
+++ b/src/main/scala/uncore/tilelink2/IntNodes.scala
@@ -81,16 +81,16 @@ object IntImp extends NodeImp[IntSourcePortParameters, IntSinkPortParameters, In

 case class IntIdentityNode() extends IdentityNode(IntImp)
 case class IntSourceNode(num: Int) extends SourceNode(IntImp)(
-  IntSourcePortParameters(Seq(IntSourceParameters(num))), (if (num == 0) 0 else 1) to 1)
+  if (num == 0) Seq() else Seq(IntSourcePortParameters(Seq(IntSourceParameters(num)))))
 case class IntSinkNode() extends SinkNode(IntImp)(
-  IntSinkPortParameters(Seq(IntSinkParameters())))
+  Seq(IntSinkPortParameters(Seq(IntSinkParameters()))))

-case class IntAdapterNode(
+case class IntNexusNode(
  sourceFn:       Seq[IntSourcePortParameters] => IntSourcePortParameters,
  sinkFn:         Seq[IntSinkPortParameters]   => IntSinkPortParameters,
-  numSourcePorts: Range.Inclusive = 1 to 1,
-  numSinkPorts:   Range.Inclusive = 1 to 1)
-  extends InteriorNode(IntImp)(sourceFn, sinkFn, numSourcePorts, numSinkPorts)
+  numSourcePorts: Range.Inclusive = 0 to 128,
+  numSinkPorts:   Range.Inclusive = 0 to 128)
+  extends NexusNode(IntImp)(sourceFn, sinkFn, numSourcePorts, numSinkPorts)

 case class IntOutputNode() extends OutputNode(IntImp)
 case class IntInputNode() extends InputNode(IntImp)
@@ -103,9 +103,7 @@ case class IntInternalInputNode(num: Int) extends InternalInputNode(IntImp)(Seq(

 class IntXbar()(implicit p: Parameters) extends LazyModule
 {
-  val intnode = IntAdapterNode(
-    numSourcePorts = 0 to 128,
-    numSinkPorts   = 0 to 128,
+  val intnode = IntNexusNode(
    sinkFn         = { _ => IntSinkPortParameters(Seq(IntSinkParameters())) },
    sourceFn       = { seq =>
      IntSourcePortParameters((seq zip seq.map(_.num).scanLeft(0)(_+_).init).map {
--- a/src/main/scala/uncore/tilelink2/Nodes.scala
+++ b/src/main/scala/uncore/tilelink2/Nodes.scala
@@ -86,29 +86,33 @@ object TLImp extends NodeImp[TLClientPortParameters, TLManagerPortParameters, TL

 // Nodes implemented inside modules
 case class TLIdentityNode() extends IdentityNode(TLImp)
-case class TLClientNode(portParams: TLClientPortParameters, numPorts: Range.Inclusive = 1 to 1)
-  extends SourceNode(TLImp)(portParams, numPorts)
-case class TLManagerNode(portParams: TLManagerPortParameters, numPorts: Range.Inclusive = 1 to 1)
-  extends SinkNode(TLImp)(portParams, numPorts)
+case class TLClientNode(portParams: Seq[TLClientPortParameters]) extends SourceNode(TLImp)(portParams)
+case class TLManagerNode(portParams: Seq[TLManagerPortParameters]) extends SinkNode(TLImp)(portParams)

 object TLClientNode
 {
  def apply(params: TLClientParameters) =
-    new TLClientNode(TLClientPortParameters(Seq(params)), 1 to 1)
+    new TLClientNode(Seq(TLClientPortParameters(Seq(params))))
 }

 object TLManagerNode
 {
  def apply(beatBytes: Int, params: TLManagerParameters) =
-    new TLManagerNode(TLManagerPortParameters(Seq(params), beatBytes, minLatency = 0), 1 to 1)
+    new TLManagerNode(Seq(TLManagerPortParameters(Seq(params), beatBytes, minLatency = 0)))
 }

 case class TLAdapterNode(
+  clientFn:  TLClientPortParameters  => TLClientPortParameters,
+  managerFn: TLManagerPortParameters => TLManagerPortParameters,
+  num:       Range.Inclusive = 0 to 999)
+  extends AdapterNode(TLImp)(clientFn, managerFn, num)
+
+case class TLNexusNode(
  clientFn:        Seq[TLClientPortParameters]  => TLClientPortParameters,
  managerFn:       Seq[TLManagerPortParameters] => TLManagerPortParameters,
-  numClientPorts:  Range.Inclusive = 1 to 1,
-  numManagerPorts: Range.Inclusive = 1 to 1)
-  extends InteriorNode(TLImp)(clientFn, managerFn, numClientPorts, numManagerPorts)
+  numClientPorts:  Range.Inclusive = 1 to 999,
+  numManagerPorts: Range.Inclusive = 1 to 999)
+  extends NexusNode(TLImp)(clientFn, managerFn, numClientPorts, numManagerPorts)

 // Nodes passed from an inner module
 case class TLOutputNode() extends OutputNode(TLImp)
@@ -169,17 +173,15 @@ case class TLAsyncIdentityNode() extends IdentityNode(TLAsyncImp)
 case class TLAsyncOutputNode() extends OutputNode(TLAsyncImp)
 case class TLAsyncInputNode() extends InputNode(TLAsyncImp)

-case class TLAsyncSourceNode(sync: Int) extends MixedNode(TLImp, TLAsyncImp)(
-  dFn = { case (1, Seq(p)) => Seq(TLAsyncClientPortParameters(p)) },
-  uFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) }, // discard cycles in other clock domain
-  numPO = 1 to 1,
-  numPI = 1 to 1)
+case class TLAsyncSourceNode(sync: Int)
+  extends MixedAdapterNode(TLImp, TLAsyncImp)(
+    dFn = { p => TLAsyncClientPortParameters(p) },
+    uFn = { p => p.base.copy(minLatency = sync+1) }) // discard cycles in other clock domain

-case class TLAsyncSinkNode(depth: Int, sync: Int) extends MixedNode(TLAsyncImp, TLImp)(
-  dFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) },
-  uFn = { case (1, Seq(p)) => Seq(TLAsyncManagerPortParameters(depth, p)) },
-  numPO = 1 to 1,
-  numPI = 1 to 1)
+case class TLAsyncSinkNode(depth: Int, sync: Int)
+  extends MixedAdapterNode(TLAsyncImp, TLImp)(
+    dFn = { p => p.base.copy(minLatency = sync+1) },
+    uFn = { p => TLAsyncManagerPortParameters(depth, p) })

 object TLRationalImp extends NodeImp[TLClientPortParameters, TLManagerPortParameters, TLEdgeParameters, TLEdgeParameters, TLRationalBundle]
 {
@@ -205,14 +207,12 @@ case class TLRationalIdentityNode() extends IdentityNode(TLRationalImp)
 case class TLRationalOutputNode() extends OutputNode(TLRationalImp)
 case class TLRationalInputNode() extends InputNode(TLRationalImp)

-case class TLRationalSourceNode() extends MixedNode(TLImp, TLRationalImp)(
-  dFn = { case (_, s) => s },
-  uFn = { case (_, s) => s.map(p => p.copy(minLatency = 1)) }, // discard cycles from other clock domain
-  numPO = 0 to 999,
-  numPI = 0 to 999)
+case class TLRationalSourceNode()
+  extends MixedAdapterNode(TLImp, TLRationalImp)(
+    dFn = { p => p },
+    uFn = { p => p.copy(minLatency = 1) }) // discard cycles from other clock domain

-case class TLRationalSinkNode() extends MixedNode(TLRationalImp, TLImp)(
-  dFn = { case (_, s) => s.map(p => p.copy(minLatency = 1)) },
-  uFn = { case (_, s) => s },
-  numPO = 0 to 999,
-  numPI = 0 to 999)
+case class TLRationalSinkNode()
+  extends MixedAdapterNode(TLRationalImp, TLImp)(
+    dFn = { p => p.copy(minLatency = 1) },
+    uFn = { p => p })
--- a/src/main/scala/uncore/tilelink2/RAMModel.scala
+++ b/src/main/scala/uncore/tilelink2/RAMModel.scala
@@ -5,6 +5,7 @@ package uncore.tilelink2
 import Chisel._
 import config._
 import diplomacy._
+import util.GenericParameterizedBundle

 // We detect concurrent puts that put memory into an undefined state.
 // put0, put0Ack, put1, put1Ack => ok: defined
@@ -31,268 +32,271 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule
      val out = node.bundleOut
    }

-    // !!! support multiple clients via clock division
-    require (io.out.size == 1)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      val edge         = edgeIn
+      val endAddress   = edge.manager.maxAddress + 1
+      val endSourceId  = edge.client.endSourceId
+      val maxTransfer  = edge.manager.maxTransfer
+      val beatBytes    = edge.manager.beatBytes
+      val endAddressHi = (endAddress / beatBytes).intValue
+      val maxLgBeats   = log2Up(maxTransfer/beatBytes)
+      val shift        = log2Ceil(beatBytes)
+      val decTrees     = log2Up(maxTransfer/beatBytes)
+      val addressBits  = log2Up(endAddress)
+      val countBits    = log2Up(endSourceId)
+      val sizeBits     = edge.bundle.sizeBits

-    val in = io.in(0)
-    val out = io.out(0)
+      // Reset control logic
+      val wipeIndex = RegInit(UInt(0, width = log2Ceil(endAddressHi) + 1))
+      val wipe = !wipeIndex(log2Ceil(endAddressHi))
+      wipeIndex := wipeIndex + wipe.asUInt

-    val edge         = node.edgesIn(0)
-    val endAddress   = edge.manager.maxAddress + 1
-    val endSourceId  = edge.client.endSourceId
-    val maxTransfer  = edge.manager.maxTransfer
-    val beatBytes    = edge.manager.beatBytes
-    val endAddressHi = (endAddress / beatBytes).intValue
-    val maxLgBeats   = log2Up(maxTransfer/beatBytes)
-    val shift        = log2Ceil(beatBytes)
-    val decTrees     = log2Up(maxTransfer/beatBytes)
-    val addressBits  = log2Up(endAddress)
-    val countBits    = log2Up(endSourceId)
-    val sizeBits     = edge.bundle.sizeBits
+      // Block traffic while wiping Mems
+      in.a.ready := out.a.ready && !wipe
+      out.a.valid := in.a.valid && !wipe
+      out.a.bits  := in.a.bits
+      out.d.ready := in.d.ready && !wipe
+      in.d.valid := out.d.valid && !wipe
+      in.d.bits  := out.d.bits

-    // Reset control logic
-    val wipeIndex = RegInit(UInt(0, width = log2Ceil(endAddressHi) + 1))
-    val wipe = !wipeIndex(log2Ceil(endAddressHi))
-    wipeIndex := wipeIndex + wipe.asUInt
+      // BCE unsupported
+      in.b.valid := Bool(false)
+      out.c.valid := Bool(false)
+      out.e.valid := Bool(false)
+      out.b.ready := Bool(true)
+      in.c.ready := Bool(true)
+      in.e.ready := Bool(true)

-    // Block traffic while wiping Mems
-    in.a.ready := out.a.ready && !wipe
-    out.a.valid := in.a.valid && !wipe
-    out.a.bits  := in.a.bits
-    out.d.ready := in.d.ready && !wipe
-    in.d.valid := out.d.valid && !wipe
-    in.d.bits  := out.d.bits
+      val params = TLRAMModel.MonitorParameters(addressBits, sizeBits)

-    // BCE unsupported
-    in.b.valid := Bool(false)
-    out.c.valid := Bool(false)
-    out.e.valid := Bool(false)
-    out.b.ready := Bool(true)
-    in.c.ready := Bool(true)
-    in.e.ready := Bool(true)
+      // Infer as simple dual port BRAM/M10k with write-first/new-data semantics (bypass needed)
+      val shadow = Seq.fill(beatBytes) { Mem(endAddressHi, new TLRAMModel.ByteMonitor(params)) }
+      val inc_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) }
+      val dec_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) }
+      val inc_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }
+      val dec_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }

-    class ByteMonitor extends Bundle {
-      val valid = Bool()
-      val value = UInt(width = 8)
-    }
-    class FlightMonitor extends Bundle {
-      val base    = UInt(width = addressBits)
-      val size    = UInt(width = sizeBits)
-      val opcode  = UInt(width = 3)
-    }
+      val shadow_wen = Wire(init = Fill(beatBytes, wipe))
+      val inc_bytes_wen = Wire(init = Fill(beatBytes, wipe))
+      val dec_bytes_wen = Wire(init = Fill(beatBytes, wipe))
+      val inc_trees_wen = Wire(init = Fill(decTrees, wipe))
+      val dec_trees_wen = Wire(init = Fill(decTrees, wipe))

-    // Infer as simple dual port BRAM/M10k with write-first/new-data semantics (bypass needed)
-    val shadow = Seq.fill(beatBytes) { Mem(endAddressHi, new ByteMonitor) }
-    val inc_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) }
-    val dec_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) }
-    val inc_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }
-    val dec_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }
+      // This must be registers b/c we build a CAM from it
+      val flight = Reg(Vec(endSourceId, new TLRAMModel.FlightMonitor(params)))
+      val valid = Reg(Vec(endSourceId, Bool()))

-    val shadow_wen = Wire(init = Fill(beatBytes, wipe))
-    val inc_bytes_wen = Wire(init = Fill(beatBytes, wipe))
-    val dec_bytes_wen = Wire(init = Fill(beatBytes, wipe))
-    val inc_trees_wen = Wire(init = Fill(decTrees, wipe))
-    val dec_trees_wen = Wire(init = Fill(decTrees, wipe))
+      // We want to cross flight data from A to D in the same cycle (for combinational TL2 devices)
+      val a_flight = Wire(new TLRAMModel.FlightMonitor(params))
+      a_flight.base   := edge.address(in.a.bits)
+      a_flight.size   := edge.size(in.a.bits)
+      a_flight.opcode := in.a.bits.opcode

-    // This must be registers b/c we build a CAM from it
-    val flight = Reg(Vec(endSourceId, new FlightMonitor))
-    val valid = Reg(Vec(endSourceId, Bool()))
+      when (in.a.fire()) { flight(in.a.bits.source) := a_flight }
+      val bypass = if (edge.manager.minLatency > 0) Bool(false) else in.a.valid && in.a.bits.source === out.d.bits.source
+      val d_flight = RegNext(Mux(bypass, a_flight, flight(out.d.bits.source)))

-    // We want to cross flight data from A to D in the same cycle (for combinational TL2 devices)
-    val a_flight = Wire(new FlightMonitor)
-    a_flight.base   := edge.address(in.a.bits)
-    a_flight.size   := edge.size(in.a.bits)
-    a_flight.opcode := in.a.bits.opcode
+      // Process A access requests
+      val a = Reg(next = in.a.bits)
+      val a_fire = Reg(next = in.a.fire(), init = Bool(false))
+      val (a_first, a_last, _, a_address_inc) = edge.addr_inc(a, a_fire)
+      val a_size = edge.size(a)
+      val a_sizeOH = UIntToOH(a_size)
+      val a_address = a.address | a_address_inc
+      val a_addr_hi = edge.addr_hi(a_address)
+      val a_base = edge.address(a)
+      val a_mask = edge.mask(a_base, a_size)
+      val a_fifo = edge.manager.hasFifoIdFast(a_base)

-    when (in.a.fire()) { flight(in.a.bits.source) := a_flight }
-    val bypass = if (edge.manager.minLatency > 0) Bool(false) else in.a.valid && in.a.bits.source === out.d.bits.source
-    val d_flight = RegNext(Mux(bypass, a_flight, flight(out.d.bits.source)))
+      // Grab the concurrency state we need
+      val a_inc_bytes = inc_bytes.map(_.read(a_addr_hi))
+      val a_dec_bytes = dec_bytes.map(_.read(a_addr_hi))
+      val a_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
+      val a_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
+      val a_inc_tree = a_inc_trees.fold(UInt(0))(_ + _)
+      val a_dec_tree = a_dec_trees.fold(UInt(0))(_ + _)
+      val a_inc = a_inc_bytes.map(_ + a_inc_tree)
+      val a_dec = a_dec_bytes.map(_ + a_dec_tree)

-    // Process A access requests
-    val a = Reg(next = in.a.bits)
-    val a_fire = Reg(next = in.a.fire(), init = Bool(false))
-    val (a_first, a_last, _, a_address_inc) = edge.addr_inc(a, a_fire)
-    val a_size = edge.size(a)
-    val a_sizeOH = UIntToOH(a_size)
-    val a_address = a.address | a_address_inc
-    val a_addr_hi = edge.addr_hi(a_address)
-    val a_base = edge.address(a)
-    val a_mask = edge.mask(a_base, a_size)
-    val a_fifo = edge.manager.hasFifoIdFast(a_base)
+      when (a_fire) {
+        // Record the request so we can handle it's response
+        assert (a.opcode =/= TLMessages.Acquire)

-    // Grab the concurrency state we need
-    val a_inc_bytes = inc_bytes.map(_.read(a_addr_hi))
-    val a_dec_bytes = dec_bytes.map(_.read(a_addr_hi))
-    val a_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
-    val a_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
-    val a_inc_tree = a_inc_trees.fold(UInt(0))(_ + _)
-    val a_dec_tree = a_dec_trees.fold(UInt(0))(_ + _)
-    val a_inc = a_inc_bytes.map(_ + a_inc_tree)
-    val a_dec = a_dec_bytes.map(_ + a_dec_tree)
+        // Mark the operation as valid
+        valid(a.source) := Bool(true)

-    when (a_fire) {
-      // Record the request so we can handle it's response
-      assert (a.opcode =/= TLMessages.Acquire)
-
-      // Mark the operation as valid
-      valid(a.source) := Bool(true)
-
-      // Increase the per-byte flight counter for the whole transaction
-      when (a_first && a.opcode =/= TLMessages.Hint && a.opcode =/= TLMessages.Get) {
-        when (a_size <= UInt(shift)) {
-          inc_bytes_wen := a_mask
+        // Increase the per-byte flight counter for the whole transaction
+        when (a_first && a.opcode =/= TLMessages.Hint && a.opcode =/= TLMessages.Get) {
+          when (a_size <= UInt(shift)) {
+            inc_bytes_wen := a_mask
+          }
+          inc_trees_wen := a_sizeOH >> (shift+1)
        }
-        inc_trees_wen := a_sizeOH >> (shift+1)
-      }

-      when (a.opcode === TLMessages.PutFullData || a.opcode === TLMessages.PutPartialData ||
-            a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData) {
-        shadow_wen := a.mask
-        for (i <- 0 until beatBytes) {
-          val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt
-          val byte = a.data(8*(i+1)-1, 8*i)
-          when (a.mask(i)) {
-            printf(log + " ")
-            when (a.opcode === TLMessages.PutFullData) { printf("PF") }
-            when (a.opcode === TLMessages.PutPartialData) { printf("PP") }
-            when (a.opcode === TLMessages.ArithmeticData) { printf("A ") }
-            when (a.opcode === TLMessages.LogicalData) { printf("L ") }
-            printf(" 0x%x := 0x%x #%d %x\n", a_addr_hi << shift | UInt(i), byte, busy, a.param)
+        when (a.opcode === TLMessages.PutFullData || a.opcode === TLMessages.PutPartialData ||
+              a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData) {
+          shadow_wen := a.mask
+          for (i <- 0 until beatBytes) {
+            val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt
+            val byte = a.data(8*(i+1)-1, 8*i)
+            when (a.mask(i)) {
+              printf(log + " ")
+              when (a.opcode === TLMessages.PutFullData) { printf("PF") }
+              when (a.opcode === TLMessages.PutPartialData) { printf("PP") }
+              when (a.opcode === TLMessages.ArithmeticData) { printf("A ") }
+              when (a.opcode === TLMessages.LogicalData) { printf("L ") }
+              printf(" 0x%x := 0x%x #%d %x\n", a_addr_hi << shift | UInt(i), byte, busy, a.param)
+            }
          }
        }
-      }

-      when (a.opcode === TLMessages.Get) {
-        printf(log + " G  0x%x - 0%x\n", a_base, a_base | UIntToOH1(a_size, addressBits))
-      }
-    }
-
-    val a_waddr = Mux(wipe, wipeIndex, a_addr_hi)
-    for (i <- 0 until beatBytes) {
-      val data = Wire(new ByteMonitor)
-      val busy = a_inc(i) =/= a_dec(i) + (!a_first).asUInt
-      val amo = a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData
-      data.valid := Mux(wipe, Bool(false), (!busy || a_fifo) && !amo)
-      // !!! calculate the AMO?
-      data.value := a.data(8*(i+1)-1, 8*i)
-      when (shadow_wen(i)) {
-        shadow(i).write(a_waddr, data)
-      }
-    }
-
-    for (i <- 0 until beatBytes) {
-      val data = Mux(wipe, UInt(0), a_inc_bytes(i) + UInt(1))
-      when (inc_bytes_wen(i)) {
-        inc_bytes(i).write(a_waddr, data)
-      }
-    }
-
-    for (i <- 0 until inc_trees.size) {
-      val data = Mux(wipe, UInt(0), a_inc_trees(i) + UInt(1))
-      when (inc_trees_wen(i)) {
-        inc_trees(i).write(a_waddr >> (i+1), data)
-      }
-    }
-
-    // Process D access responses
-    val d = RegNext(out.d.bits)
-    val d_fire = Reg(next = out.d.fire(), init = Bool(false))
-    val (d_first, d_last, _, d_address_inc) = edge.addr_inc(d, d_fire)
-    val d_size = edge.size(d)
-    val d_sizeOH = UIntToOH(d_size)
-    val d_base = d_flight.base
-    val d_address = d_base | d_address_inc
-    val d_addr_hi = edge.addr_hi(d_address)
-    val d_mask = edge.mask(d_base, d_size)
-    val d_fifo = edge.manager.hasFifoIdFast(d_flight.base)
-
-    // Grab the concurrency state we need
-    val d_inc_bytes = inc_bytes.map(_.read(d_addr_hi))
-    val d_dec_bytes = dec_bytes.map(_.read(d_addr_hi))
-    val d_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
-    val d_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
-    val d_inc_tree = d_inc_trees.fold(UInt(0))(_ + _)
-    val d_dec_tree = d_dec_trees.fold(UInt(0))(_ + _)
-    val d_inc = d_inc_bytes.map(_ + d_inc_tree)
-    val d_dec = d_dec_bytes.map(_ + d_dec_tree)
-    val d_shadow = shadow.map(_.read(d_addr_hi))
-    val d_valid = valid(d.source)
-
-    when (d_fire) {
-      // Check the response is correct
-      assert (d_size === d_flight.size)
-      // addr_lo is allowed to differ
-
-      when (d_flight.opcode === TLMessages.Hint) {
-        assert (d.opcode === TLMessages.HintAck)
-      }
-
-      // Decrease the per-byte flight counter for the whole transaction
-      when (d_last && d_flight.opcode =/= TLMessages.Hint && d_flight.opcode =/= TLMessages.Get) {
-        when (d_size <= UInt(shift)) {
-          dec_bytes_wen := d_mask
-        }
-        dec_trees_wen := d_sizeOH >> (shift+1)
-        // NOTE: D channel carries uninterrupted multibeast op, so updating on last is fine
-        for (i <- 0 until endSourceId) {
-          // Does this modification overlap a Get? => wipe it's valid
-          val f_base = flight(i).base
-          val f_size = flight(i).size
-          val f_bits = UIntToOH1(f_size, addressBits)
-          val d_bits = UIntToOH1(d_size, addressBits)
-          val overlap = ~(~(f_base ^ d_base) | (f_bits | d_bits)) === UInt(0)
-          when (overlap) { valid(i) := Bool(false) }
+        when (a.opcode === TLMessages.Get) {
+          printf(log + " G  0x%x - 0%x\n", a_base, a_base | UIntToOH1(a_size, addressBits))
        }
      }

-      when (d_flight.opcode === TLMessages.PutFullData || d_flight.opcode === TLMessages.PutPartialData) {
-        assert (d.opcode === TLMessages.AccessAck)
-        printf(log + " ")
-        when (d_flight.opcode === TLMessages.PutFullData) { printf("pf") }
-        when (d_flight.opcode === TLMessages.PutPartialData) { printf("pp") }
-        printf(" 0x%x - 0x%x\n", d_base, d_base | UIntToOH1(d_size, addressBits))
+      val a_waddr = Mux(wipe, wipeIndex, a_addr_hi)
+      for (i <- 0 until beatBytes) {
+        val data = Wire(new TLRAMModel.ByteMonitor(params))
+        val busy = a_inc(i) =/= a_dec(i) + (!a_first).asUInt
+        val amo = a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData
+        data.valid := Mux(wipe, Bool(false), (!busy || a_fifo) && !amo)
+        // !!! calculate the AMO?
+        data.value := a.data(8*(i+1)-1, 8*i)
+        when (shadow_wen(i)) {
+          shadow(i).write(a_waddr, data)
+        }
      }

-      when (d_flight.opcode === TLMessages.Get || d_flight.opcode === TLMessages.ArithmeticData || d_flight.opcode === TLMessages.LogicalData) {
-        assert (d.opcode === TLMessages.AccessAckData)
-        for (i <- 0 until beatBytes) {
-          val got = d.data(8*(i+1)-1, 8*i)
-          val shadow = Wire(init = d_shadow(i))
-          when (d_mask(i)) {
-            val d_addr = d_addr_hi << shift | UInt(i)
-            printf(log + " ")
-            when (d_flight.opcode === TLMessages.Get) { printf("g ") }
-            when (d_flight.opcode === TLMessages.ArithmeticData) { printf("a ") }
-            when (d_flight.opcode === TLMessages.LogicalData) { printf("l ") }
-            printf(" 0x%x := 0x%x", d_addr, got)
-            when (!shadow.valid) {
-              printf(", undefined (uninitialized or prior overlapping puts)\n")
-            } .elsewhen (d_inc(i) =/= d_dec(i)) {
-              printf(", undefined (concurrent incomplete puts #%d)\n", d_inc(i) - d_dec(i))
-            } .elsewhen (!d_fifo && !d_valid) {
-              printf(", undefined (concurrent completed put)\n")
-            } .otherwise {
-              printf("\n")
-              assert (shadow.value === got)
+      for (i <- 0 until beatBytes) {
+        val data = Mux(wipe, UInt(0), a_inc_bytes(i) + UInt(1))
+        when (inc_bytes_wen(i)) {
+          inc_bytes(i).write(a_waddr, data)
+        }
+      }
+
+      for (i <- 0 until inc_trees.size) {
+        val data = Mux(wipe, UInt(0), a_inc_trees(i) + UInt(1))
+        when (inc_trees_wen(i)) {
+          inc_trees(i).write(a_waddr >> (i+1), data)
+        }
+      }
+
+      // Process D access responses
+      val d = RegNext(out.d.bits)
+      val d_fire = Reg(next = out.d.fire(), init = Bool(false))
+      val (d_first, d_last, _, d_address_inc) = edge.addr_inc(d, d_fire)
+      val d_size = edge.size(d)
+      val d_sizeOH = UIntToOH(d_size)
+      val d_base = d_flight.base
+      val d_address = d_base | d_address_inc
+      val d_addr_hi = edge.addr_hi(d_address)
+      val d_mask = edge.mask(d_base, d_size)
+      val d_fifo = edge.manager.hasFifoIdFast(d_flight.base)
+
+      // Grab the concurrency state we need
+      val d_inc_bytes = inc_bytes.map(_.read(d_addr_hi))
+      val d_dec_bytes = dec_bytes.map(_.read(d_addr_hi))
+      val d_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
+      val d_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
+      val d_inc_tree = d_inc_trees.fold(UInt(0))(_ + _)
+      val d_dec_tree = d_dec_trees.fold(UInt(0))(_ + _)
+      val d_inc = d_inc_bytes.map(_ + d_inc_tree)
+      val d_dec = d_dec_bytes.map(_ + d_dec_tree)
+      val d_shadow = shadow.map(_.read(d_addr_hi))
+      val d_valid = valid(d.source)
+
+      when (d_fire) {
+        // Check the response is correct
+        assert (d_size === d_flight.size)
+        // addr_lo is allowed to differ
+
+        when (d_flight.opcode === TLMessages.Hint) {
+          assert (d.opcode === TLMessages.HintAck)
+        }
+
+        // Decrease the per-byte flight counter for the whole transaction
+        when (d_last && d_flight.opcode =/= TLMessages.Hint && d_flight.opcode =/= TLMessages.Get) {
+          when (d_size <= UInt(shift)) {
+            dec_bytes_wen := d_mask
+          }
+          dec_trees_wen := d_sizeOH >> (shift+1)
+          // NOTE: D channel carries uninterrupted multibeast op, so updating on last is fine
+          for (i <- 0 until endSourceId) {
+            // Does this modification overlap a Get? => wipe it's valid
+            val f_base = flight(i).base
+            val f_size = flight(i).size
+            val f_bits = UIntToOH1(f_size, addressBits)
+            val d_bits = UIntToOH1(d_size, addressBits)
+            val overlap = ~(~(f_base ^ d_base) | (f_bits | d_bits)) === UInt(0)
+            when (overlap) { valid(i) := Bool(false) }
+          }
+        }
+
+        when (d_flight.opcode === TLMessages.PutFullData || d_flight.opcode === TLMessages.PutPartialData) {
+          assert (d.opcode === TLMessages.AccessAck)
+          printf(log + " ")
+          when (d_flight.opcode === TLMessages.PutFullData) { printf("pf") }
+          when (d_flight.opcode === TLMessages.PutPartialData) { printf("pp") }
+          printf(" 0x%x - 0x%x\n", d_base, d_base | UIntToOH1(d_size, addressBits))
+        }
+
+        when (d_flight.opcode === TLMessages.Get || d_flight.opcode === TLMessages.ArithmeticData || d_flight.opcode === TLMessages.LogicalData) {
+          assert (d.opcode === TLMessages.AccessAckData)
+          for (i <- 0 until beatBytes) {
+            val got = d.data(8*(i+1)-1, 8*i)
+            val shadow = Wire(init = d_shadow(i))
+            when (d_mask(i)) {
+              val d_addr = d_addr_hi << shift | UInt(i)
+              printf(log + " ")
+              when (d_flight.opcode === TLMessages.Get) { printf("g ") }
+              when (d_flight.opcode === TLMessages.ArithmeticData) { printf("a ") }
+              when (d_flight.opcode === TLMessages.LogicalData) { printf("l ") }
+              printf(" 0x%x := 0x%x", d_addr, got)
+              when (!shadow.valid) {
+                printf(", undefined (uninitialized or prior overlapping puts)\n")
+              } .elsewhen (d_inc(i) =/= d_dec(i)) {
+                printf(", undefined (concurrent incomplete puts #%d)\n", d_inc(i) - d_dec(i))
+              } .elsewhen (!d_fifo && !d_valid) {
+                printf(", undefined (concurrent completed put)\n")
+              } .otherwise {
+                printf("\n")
+                assert (shadow.value === got)
+              }
            }
          }
        }
      }
-    }

-    val d_waddr = Mux(wipe, wipeIndex, d_addr_hi)
-    for (i <- 0 until beatBytes) {
-      val data = Mux(wipe, UInt(0), d_dec_bytes(i) + UInt(1))
-      when (dec_bytes_wen(i)) {
-        dec_bytes(i).write(d_waddr, data)
+      val d_waddr = Mux(wipe, wipeIndex, d_addr_hi)
+      for (i <- 0 until beatBytes) {
+        val data = Mux(wipe, UInt(0), d_dec_bytes(i) + UInt(1))
+        when (dec_bytes_wen(i)) {
+          dec_bytes(i).write(d_waddr, data)
+        }
      }
-    }

-    for (i <- 0 until dec_trees.size) {
-      val data = Mux(wipe, UInt(0), d_dec_trees(i) + UInt(1))
-      when (dec_trees_wen(i)) {
-        dec_trees(i).write(d_waddr >> (i+1), data)
+      for (i <- 0 until dec_trees.size) {
+        val data = Mux(wipe, UInt(0), d_dec_trees(i) + UInt(1))
+        when (dec_trees_wen(i)) {
+          dec_trees(i).write(d_waddr >> (i+1), data)
+        }
      }
    }
  }
 }
+
+object TLRAMModel
+{
+  case class MonitorParameters(addressBits: Int, sizeBits: Int)
+
+  class ByteMonitor(params: MonitorParameters) extends GenericParameterizedBundle(params) {
+    val valid = Bool()
+    val value = UInt(width = 8)
+  }
+  class FlightMonitor(params: MonitorParameters) extends GenericParameterizedBundle(params) {
+    val base    = UInt(width = params.addressBits)
+    val size    = UInt(width = params.sizeBits)
+    val opcode  = UInt(width = 3)
+  }
+}
--- a/src/main/scala/uncore/tilelink2/RegisterRouter.scala
+++ b/src/main/scala/uncore/tilelink2/RegisterRouter.scala
@@ -9,7 +9,7 @@ import regmapper._
 import scala.math.{min,max}

 class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
-  extends TLManagerNode(TLManagerPortParameters(
+  extends TLManagerNode(Seq(TLManagerPortParameters(
    Seq(TLManagerParameters(
      address            = Seq(address),
      executable         = executable,
@@ -18,7 +18,7 @@ class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int =
      supportsPutFull    = TransferSizes(1, beatBytes),
      fifoId             = Some(0))), // requests are handled in order
    beatBytes  = beatBytes,
-    minLatency = min(concurrency, 1))) // the Queue adds at most one cycle
+    minLatency = min(concurrency, 1)))) // the Queue adds at most one cycle
 {
  require (address.contiguous)

--- a/src/main/scala/uncore/tilelink2/SRAM.scala
+++ b/src/main/scala/uncore/tilelink2/SRAM.scala
@@ -8,7 +8,7 @@ import diplomacy._

 class TLRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
 {
-  val node = TLManagerNode(TLManagerPortParameters(
+  val node = TLManagerNode(Seq(TLManagerPortParameters(
    Seq(TLManagerParameters(
      address            = List(address),
      regionType         = RegionType.UNCACHED,
@@ -18,7 +18,7 @@ class TLRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)
      supportsPutFull    = TransferSizes(1, beatBytes),
      fifoId             = Some(0))), // requests are handled in order
    beatBytes  = beatBytes,
-    minLatency = 1)) // no bypass needed for this device
+    minLatency = 1))) // no bypass needed for this device

  // We require the address range to include an entire beat (for the write mask)
  require ((address.mask & (beatBytes-1)) == beatBytes-1)
--- a/src/main/scala/uncore/tilelink2/SourceShrinker.scala
+++ b/src/main/scala/uncore/tilelink2/SourceShrinker.scala
@@ -15,8 +15,8 @@ class TLSourceShrinker(maxInFlight: Int)(implicit p: Parameters) extends LazyMod
  private val client = TLClientParameters(sourceId = IdRange(0, maxInFlight))
  val node = TLAdapterNode(
    // We erase all client information since we crush the source Ids
-    clientFn  = { case _ => TLClientPortParameters(clients = Seq(client)) },
-    managerFn = { case Seq(mp) => mp })
+    clientFn  = { _ => TLClientPortParameters(clients = Seq(client)) },
+    managerFn = { mp => mp })

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
@@ -24,54 +24,51 @@ class TLSourceShrinker(maxInFlight: Int)(implicit p: Parameters) extends LazyMod
      val out = node.bundleOut
    }

-    val edgeIn = node.edgesIn(0)
-    val edgeOut = node.edgesOut(0)
-    val in = io.in(0)
-    val out = io.out(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      // Acquires cannot pass this adapter; it makes Probes impossible
+      require (!edgeIn.client.anySupportProbe || 
+               !edgeOut.manager.anySupportAcquireB)

-    // Acquires cannot pass this adapter; it makes Probes impossible
-    require (!edgeIn.client.anySupportProbe || 
-             !edgeOut.manager.anySupportAcquireB)
+      out.b.ready := Bool(true)
+      out.c.valid := Bool(false)
+      out.e.valid := Bool(false)
+      in.b.valid := Bool(false)
+      in.c.ready := Bool(true)
+      in.e.ready := Bool(true)

-    out.b.ready := Bool(true)
-    out.c.valid := Bool(false)
-    out.e.valid := Bool(false)
-    in.b.valid := Bool(false)
-    in.c.ready := Bool(true)
-    in.e.ready := Bool(true)
+      if (maxInFlight >= edgeIn.client.endSourceId) {
+        out.a <> in.a
+        in.d <> out.d
+      } else {
+        // State tracking
+        val sourceIdMap = Mem(maxInFlight, in.a.bits.source)
+        val allocated = RegInit(UInt(0, width = maxInFlight))
+        val nextFreeOH = ~(leftOR(~allocated) << 1) & ~allocated
+        val nextFree = OHToUInt(nextFreeOH)
+        val full = allocated.andR()

-    if (maxInFlight >= edgeIn.client.endSourceId) {
-      out.a <> in.a
-      in.d <> out.d
-    } else {
-      // State tracking
-      val sourceIdMap = Mem(maxInFlight, in.a.bits.source)
-      val allocated = RegInit(UInt(0, width = maxInFlight))
-      val nextFreeOH = ~(leftOR(~allocated) << 1) & ~allocated
-      val nextFree = OHToUInt(nextFreeOH)
-      val full = allocated.andR()
+        val a_first = edgeIn.first(in.a)
+        val d_last  = edgeIn.last(in.d)

-      val a_first = edgeIn.first(in.a)
-      val d_last  = edgeIn.last(in.d)
+        val block = a_first && full
+        in.a.ready := out.a.ready && !block
+        out.a.valid := in.a.valid && !block
+        out.a.bits := in.a.bits
+        out.a.bits.source := holdUnless(nextFree, a_first)

-      val block = a_first && full
-      in.a.ready := out.a.ready && !block
-      out.a.valid := in.a.valid && !block
-      out.a.bits := in.a.bits
-      out.a.bits.source := holdUnless(nextFree, a_first)
+        in.d <> out.d
+        in.d.bits.source := sourceIdMap(out.d.bits.source)

-      in.d <> out.d
-      in.d.bits.source := sourceIdMap(out.d.bits.source)
+        when (a_first && in.a.fire()) {
+          sourceIdMap(nextFree) := in.a.bits.source
+        }

-      when (a_first && in.a.fire()) {
-        sourceIdMap(nextFree) := in.a.bits.source
+        val alloc = a_first && in.a.fire()
+        val free = d_last && in.d.fire()
+        val alloc_id = Mux(alloc, nextFreeOH, UInt(0))
+        val free_id = Mux(free, UIntToOH(out.d.bits.source), UInt(0))
+        allocated := (allocated | alloc_id) & ~free_id
      }
-
-      val alloc = a_first && in.a.fire()
-      val free = d_last && in.d.fire()
-      val alloc_id = Mux(alloc, nextFreeOH, UInt(0))
-      val free_id = Mux(free, UIntToOH(out.d.bits.source), UInt(0))
-      allocated := (allocated | alloc_id) & ~free_id
    }
  }
 }
--- a/src/main/scala/uncore/tilelink2/ToAHB.scala
+++ b/src/main/scala/uncore/tilelink2/ToAHB.scala
@@ -10,12 +10,12 @@ import uncore.ahb._
 import scala.math.{min, max}
 import AHBParameters._

-case class TLToAHBNode() extends MixedNode(TLImp, AHBImp)(
-  dFn = { case (1, Seq(TLClientPortParameters(clients, unsafeAtomics, minLatency))) =>
+case class TLToAHBNode() extends MixedAdapterNode(TLImp, AHBImp)(
+  dFn = { case TLClientPortParameters(clients, unsafeAtomics, minLatency) =>
    val masters = clients.map { case c => AHBMasterParameters(nodePath = c.nodePath) }
-    Seq(AHBMasterPortParameters(masters))
+    AHBMasterPortParameters(masters)
  },
-  uFn = { case (1, Seq(AHBSlavePortParameters(slaves, beatBytes))) =>
+  uFn = { case AHBSlavePortParameters(slaves, beatBytes) =>
    val managers = slaves.map { case s =>
      TLManagerParameters(
        address            = s.address,
@@ -26,10 +26,8 @@ case class TLToAHBNode() extends MixedNode(TLImp, AHBImp)(
        supportsPutFull    = s.supportsWrite, // but not PutPartial
        fifoId             = Some(0)) // a common FIFO domain
    }
-    Seq(TLManagerPortParameters(managers, beatBytes, 1, 1))
-  },
-  numPO = 1 to 1,
-  numPI = 1 to 1)
+    TLManagerPortParameters(managers, beatBytes, 1, 1)
+  })

 class TLToAHB(combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
 {
@@ -41,91 +39,89 @@ class TLToAHB(combinational: Boolean = true)(implicit p: Parameters) extends Laz
      val out = node.bundleOut
    }

-    val in = io.in(0)
-    val out = io.out(0)
-    val edgeIn  = node.edgesIn(0)
-    val edgeOut = node.edgesOut(0)
-    val beatBytes = edgeOut.slave.beatBytes
-    val maxTransfer = edgeOut.slave.maxTransfer
-    val lgMax = log2Ceil(maxTransfer)
-    val lgBytes = log2Ceil(beatBytes)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      val beatBytes = edgeOut.slave.beatBytes
+      val maxTransfer = edgeOut.slave.maxTransfer
+      val lgMax = log2Ceil(maxTransfer)
+      val lgBytes = log2Ceil(beatBytes)

-    // AHB has no cache coherence
-    in.b.valid := Bool(false)
-    in.c.ready := Bool(true)
-    in.e.ready := Bool(true)
+      // AHB has no cache coherence
+      in.b.valid := Bool(false)
+      in.c.ready := Bool(true)
+      in.e.ready := Bool(true)

-    // We need a skidpad to capture D output:
-    // We cannot know if the D response will be accepted until we have
-    // presented it on D as valid.  We also can't back-pressure AHB in the
-    // data phase.  Therefore, we must have enough space to save the data
-    // phase result.  Whenever we have a queued response, we can not allow
-    // AHB to present new responses, so we must quash the address phase.
-    val d = Wire(in.d)
-    in.d <> Queue(d, 1, flow = true)
-    val a_quash = in.d.valid && !in.d.ready
+      // We need a skidpad to capture D output:
+      // We cannot know if the D response will be accepted until we have
+      // presented it on D as valid.  We also can't back-pressure AHB in the
+      // data phase.  Therefore, we must have enough space to save the data
+      // phase result.  Whenever we have a queued response, we can not allow
+      // AHB to present new responses, so we must quash the address phase.
+      val d = Wire(in.d)
+      in.d <> Queue(d, 1, flow = true)
+      val a_quash = in.d.valid && !in.d.ready

-    // Record what is coming out in d_phase
-    val d_valid   = RegInit(Bool(false))
-    val d_hasData = Reg(Bool())
-    val d_error   = Reg(Bool())
-    val d_addr_lo = Reg(UInt(width = lgBytes))
-    val d_source  = Reg(UInt())
-    val d_size    = Reg(UInt())
+      // Record what is coming out in d_phase
+      val d_valid   = RegInit(Bool(false))
+      val d_hasData = Reg(Bool())
+      val d_error   = Reg(Bool())
+      val d_addr_lo = Reg(UInt(width = lgBytes))
+      val d_source  = Reg(UInt())
+      val d_size    = Reg(UInt())

-    when (out.hreadyout) { d_error := d_error || out.hresp }
-    when (d.fire()) { d_valid := Bool(false) }
+      when (out.hreadyout) { d_error := d_error || out.hresp }
+      when (d.fire()) { d_valid := Bool(false) }

-    d.valid := d_valid && out.hreadyout
-    d.bits  := edgeIn.AccessAck(d_addr_lo, UInt(0), d_source, d_size, out.hrdata, out.hresp || d_error)
-    d.bits.opcode := Mux(d_hasData, TLMessages.AccessAckData, TLMessages.AccessAck)
+      d.valid := d_valid && out.hreadyout
+      d.bits  := edgeIn.AccessAck(d_addr_lo, UInt(0), d_source, d_size, out.hrdata, out.hresp || d_error)
+      d.bits.opcode := Mux(d_hasData, TLMessages.AccessAckData, TLMessages.AccessAck)

-    // We need an irrevocable input for AHB to stall on read bursts
-    // We also need the values to NOT change when valid goes low => 1 entry only
-    val a = Queue(in.a, 1, flow = combinational, pipe = !combinational)
-    val a_valid = a.valid && !a_quash
+      // We need an irrevocable input for AHB to stall on read bursts
+      // We also need the values to NOT change when valid goes low => 1 entry only
+      val a = Queue(in.a, 1, flow = combinational, pipe = !combinational)
+      val a_valid = a.valid && !a_quash

-    // This is lot like TLEdge.firstlast, but counts beats also for single-beat TL types
-    val a_size = edgeIn.size(a.bits)
-    val a_beats1 = UIntToOH1(a_size, lgMax) >> lgBytes
-    val a_counter = RegInit(UInt(0, width = log2Up(maxTransfer/beatBytes)))
-    val a_counter1 = a_counter - UInt(1)
-    val a_first = a_counter === UInt(0)
-    val a_last = a_counter === UInt(1) || a_beats1 === UInt(0)
-    val a_offset = (a_beats1 & ~a_counter1) << lgBytes
-    val a_hasData = edgeIn.hasData(a.bits)
+      // This is lot like TLEdge.firstlast, but counts beats also for single-beat TL types
+      val a_size = edgeIn.size(a.bits)
+      val a_beats1 = UIntToOH1(a_size, lgMax) >> lgBytes
+      val a_counter = RegInit(UInt(0, width = log2Up(maxTransfer/beatBytes)))
+      val a_counter1 = a_counter - UInt(1)
+      val a_first = a_counter === UInt(0)
+      val a_last = a_counter === UInt(1) || a_beats1 === UInt(0)
+      val a_offset = (a_beats1 & ~a_counter1) << lgBytes
+      val a_hasData = edgeIn.hasData(a.bits)

-    // Expand no-data A-channel requests into multiple beats
-    a.ready := (a_hasData || a_last) && out.hreadyout && !a_quash
-    when (a_valid && out.hreadyout) {
-      a_counter := Mux(a_first, a_beats1, a_counter1)
-      d_valid := !a_hasData || a_last
-      // Record what will be in the data phase
-      when (a_first) {
-        d_hasData := !a_hasData
-        d_error   := Bool(false)
-        d_addr_lo := a.bits.address
-        d_source  := a.bits.source
-        d_size    := a.bits.size
+      // Expand no-data A-channel requests into multiple beats
+      a.ready := (a_hasData || a_last) && out.hreadyout && !a_quash
+      when (a_valid && out.hreadyout) {
+        a_counter := Mux(a_first, a_beats1, a_counter1)
+        d_valid := !a_hasData || a_last
+        // Record what will be in the data phase
+        when (a_first) {
+          d_hasData := !a_hasData
+          d_error   := Bool(false)
+          d_addr_lo := a.bits.address
+          d_source  := a.bits.source
+          d_size    := a.bits.size
+        }
      }
+
+      // Transform TL size into AHB hsize+hburst
+      val a_size_bits = a_size.getWidth
+      val a_sizeDelta = Cat(UInt(0, width = 1), a_size) - UInt(lgBytes+1)
+      val a_singleBeat = a_sizeDelta(a_size_bits)
+      val a_logBeats1 = a_sizeDelta(a_size_bits-1, 0)
+
+      out.hmastlock := Bool(false) // for now
+      out.htrans    := Mux(a_valid, Mux(a_first, TRANS_NONSEQ, TRANS_SEQ), Mux(a_first, TRANS_IDLE, TRANS_BUSY))
+      out.hsel      := a_valid || !a_first
+      out.hready    := out.hreadyout
+      out.hwrite    := a_hasData
+      out.haddr     := a.bits.address | a_offset
+      out.hsize     := Mux(a_singleBeat, a.bits.size, UInt(lgBytes))
+      out.hburst    := Mux(a_singleBeat, BURST_SINGLE, (a_logBeats1<<1) | UInt(1))
+      out.hprot     := PROT_DEFAULT
+      out.hwdata    := RegEnable(a.bits.data, a.fire())
    }
-
-    // Transform TL size into AHB hsize+hburst
-    val a_size_bits = a_size.getWidth
-    val a_sizeDelta = Cat(UInt(0, width = 1), a_size) - UInt(lgBytes+1)
-    val a_singleBeat = a_sizeDelta(a_size_bits)
-    val a_logBeats1 = a_sizeDelta(a_size_bits-1, 0)
-
-    out.hmastlock := Bool(false) // for now
-    out.htrans    := Mux(a_valid, Mux(a_first, TRANS_NONSEQ, TRANS_SEQ), Mux(a_first, TRANS_IDLE, TRANS_BUSY))
-    out.hsel      := a_valid || !a_first
-    out.hready    := out.hreadyout
-    out.hwrite    := a_hasData
-    out.haddr     := a.bits.address | a_offset
-    out.hsize     := Mux(a_singleBeat, a.bits.size, UInt(lgBytes))
-    out.hburst    := Mux(a_singleBeat, BURST_SINGLE, (a_logBeats1<<1) | UInt(1))
-    out.hprot     := PROT_DEFAULT
-    out.hwdata    := RegEnable(a.bits.data, a.fire())
  }
 }

--- a/src/main/scala/uncore/tilelink2/ToAPB.scala
+++ b/src/main/scala/uncore/tilelink2/ToAPB.scala
@@ -10,12 +10,12 @@ import uncore.apb._
 import scala.math.{min, max}
 import APBParameters._

-case class TLToAPBNode() extends MixedNode(TLImp, APBImp)(
-  dFn = { case (1, Seq(TLClientPortParameters(clients, unsafeAtomics, minLatency))) =>
+case class TLToAPBNode() extends MixedAdapterNode(TLImp, APBImp)(
+  dFn = { case TLClientPortParameters(clients, unsafeAtomics, minLatency) =>
    val masters = clients.map { case c => APBMasterParameters(nodePath = c.nodePath) }
-    Seq(APBMasterPortParameters(masters))
+    APBMasterPortParameters(masters)
  },
-  uFn = { case (1, Seq(APBSlavePortParameters(slaves, beatBytes))) =>
+  uFn = { case APBSlavePortParameters(slaves, beatBytes) =>
    val managers = slaves.map { case s =>
      TLManagerParameters(
        address            = s.address,
@@ -27,10 +27,8 @@ case class TLToAPBNode() extends MixedNode(TLImp, APBImp)(
        supportsPutFull    = if (s.supportsWrite) TransferSizes(1, beatBytes) else TransferSizes.none,
        fifoId             = Some(0)) // a common FIFO domain
    }
-    Seq(TLManagerPortParameters(managers, beatBytes, 1, 0))
-  },
-  numPO = 1 to 1,
-  numPI = 1 to 1)
+    TLManagerPortParameters(managers, beatBytes, 1, 0)
+  })

 class TLToAPB(combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
 {
@@ -42,51 +40,49 @@ class TLToAPB(combinational: Boolean = true)(implicit p: Parameters) extends Laz
      val out = node.bundleOut
    }

-    val in = io.in(0)
-    val out = io.out(0)
-    val edgeIn  = node.edgesIn(0)
-    val edgeOut = node.edgesOut(0)
-    val beatBytes = edgeOut.slave.beatBytes
-    val lgBytes = log2Ceil(beatBytes)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      val beatBytes = edgeOut.slave.beatBytes
+      val lgBytes = log2Ceil(beatBytes)

-    // APB has no cache coherence
-    in.b.valid := Bool(false)
-    in.c.ready := Bool(true)
-    in.e.ready := Bool(true)
+      // APB has no cache coherence
+      in.b.valid := Bool(false)
+      in.c.ready := Bool(true)
+      in.e.ready := Bool(true)

-    // We need a skidpad to capture D output:
-    // We cannot know if the D response will be accepted until we have
-    // presented it on D as valid.  We also can't back-pressure APB in the
-    // data phase.  Therefore, we must have enough space to save the data
-    // phase result.  Whenever we have a queued response, we can not allow
-    // APB to present new responses, so we must quash the address phase.
-    val d = Wire(in.d)
-    in.d <> Queue(d, 1, flow = true)
+      // We need a skidpad to capture D output:
+      // We cannot know if the D response will be accepted until we have
+      // presented it on D as valid.  We also can't back-pressure APB in the
+      // data phase.  Therefore, we must have enough space to save the data
+      // phase result.  Whenever we have a queued response, we can not allow
+      // APB to present new responses, so we must quash the address phase.
+      val d = Wire(in.d)
+      in.d <> Queue(d, 1, flow = true)

-    // We need an irrevocable input for APB to stall
-    val a = Queue(in.a, 1, flow = combinational, pipe = !combinational)
+      // We need an irrevocable input for APB to stall
+      val a = Queue(in.a, 1, flow = combinational, pipe = !combinational)

-    val a_enable = RegInit(Bool(false))
-    val a_sel    = a.valid && RegNext(!in.d.valid || in.d.ready)
-    val a_write  = edgeIn.hasData(a.bits)
+      val a_enable = RegInit(Bool(false))
+      val a_sel    = a.valid && RegNext(!in.d.valid || in.d.ready)
+      val a_write  = edgeIn.hasData(a.bits)

-    when (a_sel)    { a_enable := Bool(true) }
-    when (d.fire()) { a_enable := Bool(false) }
+      when (a_sel)    { a_enable := Bool(true) }
+      when (d.fire()) { a_enable := Bool(false) }

-    out.psel    := a_sel
-    out.penable := a_enable
-    out.pwrite  := a_write
-    out.paddr   := a.bits.address
-    out.pprot   := PROT_DEFAULT
-    out.pwdata  := a.bits.data
-    out.pstrb   := Mux(a_write, a.bits.mask, UInt(0))
+      out.psel    := a_sel
+      out.penable := a_enable
+      out.pwrite  := a_write
+      out.paddr   := a.bits.address
+      out.pprot   := PROT_DEFAULT
+      out.pwdata  := a.bits.data
+      out.pstrb   := Mux(a_write, a.bits.mask, UInt(0))

-    a.ready := a_enable && out.pready
-    d.valid := a_enable && out.pready
-    assert (!d.valid || d.ready)
+      a.ready := a_enable && out.pready
+      d.valid := a_enable && out.pready
+      assert (!d.valid || d.ready)

-    d.bits := edgeIn.AccessAck(a.bits, UInt(0), out.prdata, out.pslverr)
-    d.bits.opcode := Mux(a_write, TLMessages.AccessAck, TLMessages.AccessAckData)
+      d.bits := edgeIn.AccessAck(a.bits, UInt(0), out.prdata, out.pslverr)
+      d.bits.opcode := Mux(a_write, TLMessages.AccessAck, TLMessages.AccessAckData)
+    }
  }
 }

--- a/src/main/scala/uncore/tilelink2/ToAXI4.scala
+++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala
@@ -10,16 +10,16 @@ import util.PositionalMultiQueue
 import uncore.axi4._
 import scala.math.{min, max}

-case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)(
-  dFn = { case (1, _) =>
+case class TLToAXI4Node(idBits: Int) extends MixedAdapterNode(TLImp, AXI4Imp)(
+  dFn = { _ =>
    // We must erase all client information, because we crush their source Ids
    val masters = Seq(
      AXI4MasterParameters(
        id      = IdRange(0, 1 << idBits),
        aligned = true))
-    Seq(AXI4MasterPortParameters(masters))
+    AXI4MasterPortParameters(masters)
  },
-  uFn = { case (1, Seq(p)) => Seq(TLManagerPortParameters(
+  uFn = { p => TLManagerPortParameters(
    managers = p.slaves.map { case s =>
      TLManagerParameters(
        address            = s.address,
@@ -31,10 +31,8 @@ case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)(
        supportsPutPartial = s.supportsWrite)},
        // AXI4 is NEVER fifo in TL sense (R+W are independent)
      beatBytes = p.beatBytes,
-      minLatency = p.minLatency))
-  },
-  numPO = 1 to 1,
-  numPI = 1 to 1)
+      minLatency = p.minLatency)
+  })

 class TLToAXI4(idBits: Int, combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
 {
@@ -46,185 +44,182 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true)(implicit p: Parameter
      val out = node.bundleOut
    }

-    val in = io.in(0)
-    val out = io.out(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      val slaves  = edgeOut.slave.slaves

-    val edgeIn  = node.edgesIn(0)
-    val edgeOut = node.edgesOut(0)
-    val slaves  = edgeOut.slave.slaves
+      // All pairs of slaves must promise that they will never interleave data
+      require (slaves(0).interleavedId.isDefined)
+      slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) }

-    // All pairs of slaves must promise that they will never interleave data
-    require (slaves(0).interleavedId.isDefined)
-    slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) }
+      // We need to ensure that a slave does not stall trying to send B while we need to receive R
+      // Since R&W have independent flow control, it is possible for a W to cut in-line and get into
+      // a slave's buffers, preventing us from getting all the R responses we need to release D for B.
+      // This risk is compounded by an AXI fragmentation. Even a slave which responds completely to
+      // AR before working on AW might have an AW slipped between two AR fragments.
+      val out_b = Queue.irrevocable(out.b, entries=edgeIn.client.endSourceId, flow=combinational)

-    // We need to ensure that a slave does not stall trying to send B while we need to receive R
-    // Since R&W have independent flow control, it is possible for a W to cut in-line and get into
-    // a slave's buffers, preventing us from getting all the R responses we need to release D for B.
-    // This risk is compounded by an AXI fragmentation. Even a slave which responds completely to
-    // AR before working on AW might have an AW slipped between two AR fragments.
-    val out_b = Queue.irrevocable(out.b, entries=edgeIn.client.endSourceId, flow=combinational)
+      // We need to keep the following state from A => D: (addr_lo, size, source)
+      // All of those fields could potentially require 0 bits (argh. Chisel.)
+      // We will pack as many of the lowest bits of state as fit into the AXI ID.
+      // Any bits left-over must be put into a bank of Queues.
+      // The Queues are indexed by as many of the source bits as fit into the AXI ID.
+      // The Queues are deep enough that every source has guaranteed space in its Queue.

-    // We need to keep the following state from A => D: (addr_lo, size, source)
-    // All of those fields could potentially require 0 bits (argh. Chisel.)
-    // We will pack as many of the lowest bits of state as fit into the AXI ID.
-    // Any bits left-over must be put into a bank of Queues.
-    // The Queues are indexed by as many of the source bits as fit into the AXI ID.
-    // The Queues are deep enough that every source has guaranteed space in its Queue.
+      val sourceBits = log2Ceil(edgeIn.client.endSourceId)
+      val sizeBits = log2Ceil(edgeIn.maxLgSize+1)
+      val addrBits = log2Ceil(edgeIn.manager.beatBytes)
+      val stateBits = addrBits + sizeBits + sourceBits // could be 0

-    val sourceBits = log2Ceil(edgeIn.client.endSourceId)
-    val sizeBits = log2Ceil(edgeIn.maxLgSize+1)
-    val addrBits = log2Ceil(edgeIn.manager.beatBytes)
-    val stateBits = addrBits + sizeBits + sourceBits // could be 0
+      val a_address = edgeIn.address(in.a.bits)
+      val a_addr_lo = edgeIn.addr_lo(a_address)
+      val a_source  = in.a.bits.source
+      val a_size    = edgeIn.size(in.a.bits)
+      val a_isPut   = edgeIn.hasData(in.a.bits)
+      val a_last    = edgeIn.last(in.a)

-    val a_address = edgeIn.address(in.a.bits)
-    val a_addr_lo = edgeIn.addr_lo(a_address)
-    val a_source  = in.a.bits.source
-    val a_size    = edgeIn.size(in.a.bits)
-    val a_isPut   = edgeIn.hasData(in.a.bits)
-    val a_last    = edgeIn.last(in.a)
+      // Make sure the fields are within the bounds we assumed
+      assert (a_source  < UInt(BigInt(1) << sourceBits))
+      assert (a_size    < UInt(BigInt(1) << sizeBits))
+      assert (a_addr_lo < UInt(BigInt(1) << addrBits))

-    // Make sure the fields are within the bounds we assumed
-    assert (a_source  < UInt(BigInt(1) << sourceBits))
-    assert (a_size    < UInt(BigInt(1) << sizeBits))
-    assert (a_addr_lo < UInt(BigInt(1) << addrBits))
+      // Carefully pack/unpack fields into the state we send
+      val baseEnd = 0
+      val (sourceEnd, sourceOff) = (sourceBits + baseEnd,   baseEnd)
+      val (sizeEnd,   sizeOff)   = (sizeBits   + sourceEnd, sourceEnd)
+      val (addrEnd,   addrOff)   = (addrBits   + sizeEnd,   sizeEnd)
+      require (addrEnd == stateBits)

-    // Carefully pack/unpack fields into the state we send
-    val baseEnd = 0
-    val (sourceEnd, sourceOff) = (sourceBits + baseEnd,   baseEnd)
-    val (sizeEnd,   sizeOff)   = (sizeBits   + sourceEnd, sourceEnd)
-    val (addrEnd,   addrOff)   = (addrBits   + sizeEnd,   sizeEnd)
-    require (addrEnd == stateBits)
+      val a_state = (a_source << sourceOff) | (a_size << sizeOff) | (a_addr_lo << addrOff)
+      val a_id = if (idBits == 0) UInt(0) else a_state

-    val a_state = (a_source << sourceOff) | (a_size << sizeOff) | (a_addr_lo << addrOff)
-    val a_id = if (idBits == 0) UInt(0) else a_state
+      val r_state = Wire(UInt(width = stateBits))
+      val r_source  = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0)
+      val r_size    = if (sizeBits   > 0) r_state(sizeEnd  -1, sizeOff)   else UInt(0)
+      val r_addr_lo = if (addrBits   > 0) r_state(addrEnd  -1, addrOff)   else UInt(0)

-    val r_state = Wire(UInt(width = stateBits))
-    val r_source  = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0)
-    val r_size    = if (sizeBits   > 0) r_state(sizeEnd  -1, sizeOff)   else UInt(0)
-    val r_addr_lo = if (addrBits   > 0) r_state(addrEnd  -1, addrOff)   else UInt(0)
+      val b_state = Wire(UInt(width = stateBits))
+      val b_source  = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0)
+      val b_size    = if (sizeBits   > 0) b_state(sizeEnd  -1, sizeOff)   else UInt(0)
+      val b_addr_lo = if (addrBits   > 0) b_state(addrEnd  -1, addrOff)   else UInt(0)

-    val b_state = Wire(UInt(width = stateBits))
-    val b_source  = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0)
-    val b_size    = if (sizeBits   > 0) b_state(sizeEnd  -1, sizeOff)   else UInt(0)
-    val b_addr_lo = if (addrBits   > 0) b_state(addrEnd  -1, addrOff)   else UInt(0)
+      val r_last = out.r.bits.last
+      val r_id = out.r.bits.id
+      val b_id = out_b.bits.id

-    val r_last = out.r.bits.last
-    val r_id = out.r.bits.id
-    val b_id = out_b.bits.id
+      if (stateBits <= idBits) { // No need for any state tracking
+        r_state := r_id
+        b_state := b_id
+      } else {
+        val bankIndexBits = min(sourceBits, idBits)
+        val posBits = max(0, sourceBits - idBits)
+        val implicitBits = max(idBits, sourceBits)
+        val bankBits = stateBits - implicitBits
+        val numBanks = min(1 << bankIndexBits, edgeIn.client.endSourceId)
+        def bankEntries(i: Int) = (edgeIn.client.endSourceId+numBanks-i-1) / numBanks

-    if (stateBits <= idBits) { // No need for any state tracking
-      r_state := r_id
-      b_state := b_id
-    } else {
-      val bankIndexBits = min(sourceBits, idBits)
-      val posBits = max(0, sourceBits - idBits)
-      val implicitBits = max(idBits, sourceBits)
-      val bankBits = stateBits - implicitBits
-      val numBanks = min(1 << bankIndexBits, edgeIn.client.endSourceId)
-      def bankEntries(i: Int) = (edgeIn.client.endSourceId+numBanks-i-1) / numBanks
+        val banks = Seq.tabulate(numBanks) { i =>
+          // We know there can only be as many outstanding requests as TL sources
+          // However, AXI read and write queues are not mutually FIFO.
+          // Therefore, we want to pop them individually, but share the storage.
+          val bypass = combinational && edgeOut.slave.minLatency == 0
+          PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass)
+        }

-      val banks = Seq.tabulate(numBanks) { i =>
-        // We know there can only be as many outstanding requests as TL sources
-        // However, AXI read and write queues are not mutually FIFO.
-        // Therefore, we want to pop them individually, but share the storage.
-        val bypass = combinational && edgeOut.slave.minLatency == 0
-        PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass)
+        val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits)
+        val a_bankIndex = if (bankIndexBits == 0) UInt(0) else a_source(bankIndexBits-1, 0)
+        val r_bankIndex = if (bankIndexBits == 0) UInt(0) else r_id(bankIndexBits-1, 0)
+        val b_bankIndex = if (bankIndexBits == 0) UInt(0) else b_id(bankIndexBits-1, 0)
+        val a_bankSelect = UIntToOH(a_bankIndex, numBanks)
+        val r_bankSelect = UIntToOH(r_bankIndex, numBanks)
+        val b_bankSelect = UIntToOH(b_bankIndex, numBanks)
+
+        banks.zipWithIndex.foreach { case (q, i) =>
+          // Push a_state into the banks
+          q.io.enq.valid := in.a.fire() && a_last && a_bankSelect(i)
+          q.io.enq.bits.pos  := a_bankPosition
+          q.io.enq.bits.data := a_state >> implicitBits
+          q.io.enq.bits.way  := Mux(a_isPut, UInt(0), UInt(1))
+          // Pop the bank's ways
+          q.io.deq(0).ready := out_b.fire() && b_bankSelect(i)
+          q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last
+          // The FIFOs must be valid when we're ready to pop them...
+          assert (q.io.deq(0).valid || !q.io.deq(0).ready)
+          assert (q.io.deq(1).valid || !q.io.deq(1).ready)
+        }
+
+        val b_bankData = Vec(banks.map(_.io.deq(0).bits.data))(b_bankIndex)
+        val b_bankPos  = Vec(banks.map(_.io.deq(0).bits.pos ))(b_bankIndex)
+        val r_bankData = Vec(banks.map(_.io.deq(1).bits.data))(r_bankIndex)
+        val r_bankPos  = Vec(banks.map(_.io.deq(1).bits.pos ))(r_bankIndex)
+
+        def optCat(x: (Boolean, UInt)*) = { Cat(x.toList.filter(_._1).map(_._2)) }
+        b_state := optCat((bankBits > 0, b_bankData), (posBits > 0, b_bankPos), (idBits > 0, b_id))
+        r_state := optCat((bankBits > 0, r_bankData), (posBits > 0, r_bankPos), (idBits > 0, r_id))
      }

-      val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits)
-      val a_bankIndex = if (bankIndexBits == 0) UInt(0) else a_source(bankIndexBits-1, 0)
-      val r_bankIndex = if (bankIndexBits == 0) UInt(0) else r_id(bankIndexBits-1, 0)
-      val b_bankIndex = if (bankIndexBits == 0) UInt(0) else b_id(bankIndexBits-1, 0)
-      val a_bankSelect = UIntToOH(a_bankIndex, numBanks)
-      val r_bankSelect = UIntToOH(r_bankIndex, numBanks)
-      val b_bankSelect = UIntToOH(b_bankIndex, numBanks)
+      // We need these Queues because AXI4 queues are irrevocable
+      val depth = if (combinational) 1 else 2
+      val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params)))
+      val out_w = Wire(out.w)
+      out.w <> Queue.irrevocable(out_w, entries=depth, flow=combinational)
+      val queue_arw = Queue.irrevocable(out_arw, entries=depth, flow=combinational)

-      banks.zipWithIndex.foreach { case (q, i) =>
-        // Push a_state into the banks
-        q.io.enq.valid := in.a.fire() && a_last && a_bankSelect(i)
-        q.io.enq.bits.pos  := a_bankPosition
-        q.io.enq.bits.data := a_state >> implicitBits
-        q.io.enq.bits.way  := Mux(a_isPut, UInt(0), UInt(1))
-        // Pop the bank's ways
-        q.io.deq(0).ready := out_b.fire() && b_bankSelect(i)
-        q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last
-        // The FIFOs must be valid when we're ready to pop them...
-        assert (q.io.deq(0).valid || !q.io.deq(0).ready)
-        assert (q.io.deq(1).valid || !q.io.deq(1).ready)
-      }
+      // Fan out the ARW channel to AR and AW
+      out.ar.bits := queue_arw.bits
+      out.aw.bits := queue_arw.bits
+      out.ar.valid := queue_arw.valid && !queue_arw.bits.wen
+      out.aw.valid := queue_arw.valid &&  queue_arw.bits.wen
+      queue_arw.ready := Mux(queue_arw.bits.wen, out.aw.ready, out.ar.ready)

-      val b_bankData = Vec(banks.map(_.io.deq(0).bits.data))(b_bankIndex)
-      val b_bankPos  = Vec(banks.map(_.io.deq(0).bits.pos ))(b_bankIndex)
-      val r_bankData = Vec(banks.map(_.io.deq(1).bits.data))(r_bankIndex)
-      val r_bankPos  = Vec(banks.map(_.io.deq(1).bits.pos ))(r_bankIndex)
+      val beatBytes = edgeIn.manager.beatBytes
+      val maxSize   = UInt(log2Ceil(beatBytes))
+      val doneAW    = RegInit(Bool(false))
+      when (in.a.fire()) { doneAW := !a_last }

-      def optCat(x: (Boolean, UInt)*) = { Cat(x.toList.filter(_._1).map(_._2)) }
-      b_state := optCat((bankBits > 0, b_bankData), (posBits > 0, b_bankPos), (idBits > 0, b_id))
-      r_state := optCat((bankBits > 0, r_bankData), (posBits > 0, r_bankPos), (idBits > 0, r_id))
+      val arw = out_arw.bits
+      arw.wen   := a_isPut
+      arw.id    := a_id // truncated
+      arw.addr  := a_address
+      arw.len   := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes)
+      arw.size  := Mux(a_size >= maxSize, maxSize, a_size)
+      arw.burst := AXI4Parameters.BURST_INCR
+      arw.lock  := UInt(0) // not exclusive (LR/SC unsupported b/c no forward progress guarantee)
+      arw.cache := UInt(0) // do not allow AXI to modify our transactions
+      arw.prot  := AXI4Parameters.PROT_PRIVILEDGED
+      arw.qos   := UInt(0) // no QoS
+
+      in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready)
+      out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true))
+
+      out_w.valid := in.a.valid && a_isPut && (doneAW || out_arw.ready)
+      out_w.bits.data := in.a.bits.data
+      out_w.bits.strb := in.a.bits.mask
+      out_w.bits.last := a_last
+
+      // R and B => D arbitration
+      val r_holds_d = RegInit(Bool(false))
+      when (out.r.fire()) { r_holds_d := !out.r.bits.last }
+      // Give R higher priority than B
+      val r_wins = out.r.valid || r_holds_d
+
+      out.r.ready := in.d.ready
+      out_b.ready := in.d.ready && !r_wins
+      in.d.valid := Mux(r_wins, out.r.valid, out_b.valid)
+
+      val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY
+      val b_error = out_b.bits.resp =/= AXI4Parameters.RESP_OKAY
+
+      val r_d = edgeIn.AccessAck(r_addr_lo, UInt(0), r_source, r_size, UInt(0), r_error)
+      val b_d = edgeIn.AccessAck(b_addr_lo, UInt(0), b_source, b_size, b_error)
+
+      in.d.bits := Mux(r_wins, r_d, b_d)
+      in.d.bits.data := out.r.bits.data // avoid a costly Mux
+
+      // Tie off unused channels
+      in.b.valid := Bool(false)
+      in.c.ready := Bool(true)
+      in.e.ready := Bool(true)
    }
-
-    // We need these Queues because AXI4 queues are irrevocable
-    val depth = if (combinational) 1 else 2
-    val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params)))
-    val out_w = Wire(out.w)
-    out.w <> Queue.irrevocable(out_w, entries=depth, flow=combinational)
-    val queue_arw = Queue.irrevocable(out_arw, entries=depth, flow=combinational)
-
-    // Fan out the ARW channel to AR and AW
-    out.ar.bits := queue_arw.bits
-    out.aw.bits := queue_arw.bits
-    out.ar.valid := queue_arw.valid && !queue_arw.bits.wen
-    out.aw.valid := queue_arw.valid &&  queue_arw.bits.wen
-    queue_arw.ready := Mux(queue_arw.bits.wen, out.aw.ready, out.ar.ready)
-
-    val beatBytes = edgeIn.manager.beatBytes
-    val maxSize   = UInt(log2Ceil(beatBytes))
-    val doneAW    = RegInit(Bool(false))
-    when (in.a.fire()) { doneAW := !a_last }
-
-    val arw = out_arw.bits
-    arw.wen   := a_isPut
-    arw.id    := a_id // truncated
-    arw.addr  := a_address
-    arw.len   := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes)
-    arw.size  := Mux(a_size >= maxSize, maxSize, a_size)
-    arw.burst := AXI4Parameters.BURST_INCR
-    arw.lock  := UInt(0) // not exclusive (LR/SC unsupported b/c no forward progress guarantee)
-    arw.cache := UInt(0) // do not allow AXI to modify our transactions
-    arw.prot  := AXI4Parameters.PROT_PRIVILEDGED
-    arw.qos   := UInt(0) // no QoS
-
-    in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready)
-    out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true))
-
-    out_w.valid := in.a.valid && a_isPut && (doneAW || out_arw.ready)
-    out_w.bits.data := in.a.bits.data
-    out_w.bits.strb := in.a.bits.mask
-    out_w.bits.last := a_last
-
-    // R and B => D arbitration
-    val r_holds_d = RegInit(Bool(false))
-    when (out.r.fire()) { r_holds_d := !out.r.bits.last }
-    // Give R higher priority than B
-    val r_wins = out.r.valid || r_holds_d
-
-    out.r.ready := in.d.ready
-    out_b.ready := in.d.ready && !r_wins
-    in.d.valid := Mux(r_wins, out.r.valid, out_b.valid)
-
-    val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY
-    val b_error = out_b.bits.resp =/= AXI4Parameters.RESP_OKAY
-
-    val r_d = edgeIn.AccessAck(r_addr_lo, UInt(0), r_source, r_size, UInt(0), r_error)
-    val b_d = edgeIn.AccessAck(b_addr_lo, UInt(0), b_source, b_size, b_error)
-
-    in.d.bits := Mux(r_wins, r_d, b_d)
-    in.d.bits.data := out.r.bits.data // avoid a costly Mux
-
-    // Tie off unused channels
-    in.b.valid := Bool(false)
-    in.c.ready := Bool(true)
-    in.e.ready := Bool(true)
  }
 }

--- a/src/main/scala/uncore/tilelink2/WidthWidget.scala
+++ b/src/main/scala/uncore/tilelink2/WidthWidget.scala
@@ -12,8 +12,8 @@ import scala.math.{min,max}
 class TLWidthWidget(innerBeatBytes: Int)(implicit p: Parameters) extends LazyModule
 {
  val node = TLAdapterNode(
-    clientFn  = { case Seq(c) => c },
-    managerFn = { case Seq(m) => m.copy(beatBytes = innerBeatBytes) })
+    clientFn  = { case c => c },
+    managerFn = { case m => m.copy(beatBytes = innerBeatBytes) })

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
@@ -139,27 +139,24 @@ class TLWidthWidget(innerBeatBytes: Int)(implicit p: Parameters) extends LazyMod
      }
    }

-    val edgeOut = node.edgesOut(0)
-    val edgeIn = node.edgesIn(0)
-    val in = io.in(0)
-    val out = io.out(0)
+    ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
+      splice(edgeIn,  in.a,  edgeOut, out.a)
+      splice(edgeOut, out.d, edgeIn,  in.d)

-    splice(edgeIn,  in.a,  edgeOut, out.a)
-    splice(edgeOut, out.d, edgeIn,  in.d)
-
-    if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) {
-      splice(edgeOut, out.b, edgeIn,  in.b)
-      splice(edgeIn,  in.c,  edgeOut, out.c)
-      in.e.ready := out.e.ready
-      out.e.valid := in.e.valid
-      out.e.bits := in.e.bits
-    } else {
-      in.b.valid := Bool(false)
-      in.c.ready := Bool(true)
-      in.e.ready := Bool(true)
-      out.b.ready := Bool(true)
-      out.c.valid := Bool(false)
-      out.e.valid := Bool(false)
+      if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) {
+        splice(edgeOut, out.b, edgeIn,  in.b)
+        splice(edgeIn,  in.c,  edgeOut, out.c)
+        in.e.ready := out.e.ready
+        out.e.valid := in.e.valid
+        out.e.bits := in.e.bits
+      } else {
+        in.b.valid := Bool(false)
+        in.c.ready := Bool(true)
+        in.e.ready := Bool(true)
+        out.b.ready := Bool(true)
+        out.c.valid := Bool(false)
+        out.e.valid := Bool(false)
+      }
    }
  }
 }
--- a/src/main/scala/uncore/tilelink2/Xbar.scala
+++ b/src/main/scala/uncore/tilelink2/Xbar.scala
@@ -34,7 +34,7 @@ class TLXbar(policy: TLArbiter.Policy = TLArbiter.lowestIndexFirst)(implicit p:
    }
  }

-  val node = TLAdapterNode(
+  val node = TLNexusNode(
    numClientPorts  = 1 to 32,
    numManagerPorts = 1 to 32,
    clientFn  = { seq =>