commit 1e05fc052510c2a5daea779564996d3986970082
Author: Henry Cook <hcook@eecs.berkeley.edu>
Date:   Wed Apr 29 13:18:26 2015 -0700

    First pages commit

diff --git a/uncore/.gitignore b/uncore/.gitignore
new file mode 100644
index 00000000..555feb41
--- /dev/null
+++ b/uncore/.gitignore
@@ -0,0 +1,2 @@
+target/
+project/target/
diff --git a/uncore/LICENSE b/uncore/LICENSE
new file mode 100644
index 00000000..7cff15e4
--- /dev/null
+++ b/uncore/LICENSE
@@ -0,0 +1,24 @@
+Copyright (c) 2012-2014, The Regents of the University of California
+(Regents).  All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. Neither the name of the Regents nor the
+   names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
+SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
+OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS
+BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
+HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
+MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
diff --git a/uncore/README.md b/uncore/README.md
new file mode 100644
index 00000000..003f9caa
--- /dev/null
+++ b/uncore/README.md
@@ -0,0 +1,11 @@
+Uncore Library
+==============
+
+This is the repository for uncore components assosciated with Rocket chip
+project. To uses these modules, include this repo as a git submodule within
+the your chip repository and add it as Project in your chip's build.scala. 
+These components are only dependent on Chisel, i.e.
+
+    lazy val uncore = Project("uncore", file("uncore"), settings = buildSettings) dependsOn(chisel)
+
+Documentation about the uncore library will come in the near future.
diff --git a/uncore/build.sbt b/uncore/build.sbt
new file mode 100644
index 00000000..d78d02ca
--- /dev/null
+++ b/uncore/build.sbt
@@ -0,0 +1,13 @@
+organization := "edu.berkeley.cs"
+
+version := "2.0"
+
+name := "uncore"
+
+scalaVersion := "2.10.2"
+
+site.settings
+
+ghpages.settings
+
+git.remoteRepo := "git@github.com:ucb-bar/uncore.git"
diff --git a/uncore/chisel-dependent.sbt b/uncore/chisel-dependent.sbt
new file mode 100644
index 00000000..88eb615c
--- /dev/null
+++ b/uncore/chisel-dependent.sbt
@@ -0,0 +1,8 @@
+// Provide a managed dependency on chisel if -DchiselVersion="" is
+// supplied on the command line.
+
+val chiselVersion_u = System.getProperty("chiselVersion", "None")
+
+// _u a temporary fix until sbt 13.6 https://github.com/sbt/sbt/issues/1465
+
+libraryDependencies ++= ( if (chiselVersion_u != "None" ) ("edu.berkeley.cs" %% "chisel" % chiselVersion_u) :: Nil; else Nil)
diff --git a/uncore/doc/TileLink0.3.1Specification.pdf b/uncore/doc/TileLink0.3.1Specification.pdf
new file mode 100644
index 00000000..23666814
Binary files /dev/null and b/uncore/doc/TileLink0.3.1Specification.pdf differ
diff --git a/uncore/index.html b/uncore/index.html
new file mode 100644
index 00000000..03f98016
--- /dev/null
+++ b/uncore/index.html
@@ -0,0 +1 @@
+My GitHub Page
diff --git a/uncore/src/main/scala/bigmem.scala b/uncore/src/main/scala/bigmem.scala
new file mode 100644
index 00000000..fbbc4c07
--- /dev/null
+++ b/uncore/src/main/scala/bigmem.scala
@@ -0,0 +1,80 @@
+// See LICENSE for license details.
+
+package uncore
+import Chisel._
+
+class BigMem[T <: Data](n: Int, preLatency: Int, postLatency: Int, leaf: Mem[UInt], noMask: Boolean = false)(gen: => T) extends Module
+{
+  class Inputs extends Bundle {
+    val addr = UInt(INPUT, log2Up(n))
+    val rw = Bool(INPUT)
+    val wdata = gen.asInput
+    val wmask = gen.asInput
+    override def clone = new Inputs().asInstanceOf[this.type]
+  }
+  val io = new Bundle {
+    val in = Valid(new Inputs).flip
+    val rdata = gen.asOutput
+  }
+  val data = gen
+  val colMux = if (2*data.getWidth <= leaf.data.getWidth && n > leaf.n) 1 << math.floor(math.log(leaf.data.getWidth/data.getWidth)/math.log(2)).toInt else 1
+  val nWide = if (data.getWidth > leaf.data.getWidth) 1+(data.getWidth-1)/leaf.data.getWidth else 1
+  val nDeep = if (n > colMux*leaf.n) 1+(n-1)/(colMux*leaf.n) else 1
+  if (nDeep > 1 || colMux > 1)
+    require(isPow2(n) && isPow2(leaf.n))
+
+  val rdataDeep = Vec.fill(nDeep){Bits()}
+  val rdataSel = Vec.fill(nDeep){Bool()}
+  for (i <- 0 until nDeep) {
+    val in = Pipe(io.in.valid && (if (nDeep == 1) Bool(true) else UInt(i) === io.in.bits.addr(log2Up(n)-1, log2Up(n/nDeep))), io.in.bits, preLatency)
+    val idx = in.bits.addr(log2Up(n/nDeep/colMux)-1, 0)
+    val wdata = in.bits.wdata.toBits
+    val wmask = in.bits.wmask.toBits
+    val ren = in.valid && !in.bits.rw
+    val reg_ren = Reg(next=ren)
+    val rdata = Vec.fill(nWide){Bits()}
+
+    val r = Pipe(ren, in.bits.addr, postLatency)
+
+    for (j <- 0 until nWide) {
+      val mem = leaf.clone
+      var dout: Bits = null
+      val ridx = if (postLatency > 0) Reg(Bits()) else null
+
+      var wmask0 = Fill(colMux, wmask(math.min(wmask.getWidth, leaf.data.getWidth*(j+1))-1, leaf.data.getWidth*j))
+      if (colMux > 1)
+        wmask0 = wmask0 & FillInterleaved(gen.getWidth, UIntToOH(in.bits.addr(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux)), log2Up(colMux)))
+      val wdata0 = Fill(colMux, wdata(math.min(wdata.getWidth, leaf.data.getWidth*(j+1))-1, leaf.data.getWidth*j))
+      when (in.valid) {
+        when (in.bits.rw) {
+          if (noMask)
+            mem.write(idx, wdata0)
+          else
+            mem.write(idx, wdata0, wmask0)
+        }
+        .otherwise { if (postLatency > 0) ridx := idx }
+      }
+
+      if (postLatency == 0) {
+        dout = mem(idx)
+      } else if (postLatency == 1) {
+        dout = mem(ridx)
+      } else
+        dout = Pipe(reg_ren, mem(ridx), postLatency-1).bits
+
+      rdata(j) := dout
+    }
+    val rdataWide = rdata.reduceLeft((x, y) => Cat(y, x))
+
+    var colMuxOut = rdataWide
+    if (colMux > 1) {
+      val colMuxIn = Vec((0 until colMux).map(k => rdataWide(gen.getWidth*(k+1)-1, gen.getWidth*k)))
+      colMuxOut = colMuxIn(r.bits(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux)))
+    }
+
+    rdataDeep(i) := colMuxOut
+    rdataSel(i) := r.valid
+  }
+
+  io.rdata := Mux1H(rdataSel, rdataDeep)
+}
diff --git a/uncore/src/main/scala/broadcast.scala b/uncore/src/main/scala/broadcast.scala
new file mode 100644
index 00000000..b554b231
--- /dev/null
+++ b/uncore/src/main/scala/broadcast.scala
@@ -0,0 +1,387 @@
+// See LICENSE for license details.
+
+package uncore
+import Chisel._
+
+case object L2StoreDataQueueDepth extends Field[Int]
+
+trait BroadcastHubParameters extends CoherenceAgentParameters {
+  val sdqDepth = params(L2StoreDataQueueDepth)*innerDataBeats
+  val dqIdxBits = math.max(log2Up(nReleaseTransactors) + 1, log2Up(sdqDepth))
+  val nDataQueueLocations = 3 //Stores, VoluntaryWBs, Releases
+}
+
+class DataQueueLocation extends Bundle with BroadcastHubParameters {
+  val idx = UInt(width = dqIdxBits)
+  val loc = UInt(width = log2Ceil(nDataQueueLocations))
+} 
+
+object DataQueueLocation {
+  def apply(idx: UInt, loc: UInt) = {
+    val d = new DataQueueLocation
+    d.idx := idx
+    d.loc := loc
+    d
+  }
+}
+
+class L2BroadcastHub extends ManagerCoherenceAgent
+    with BroadcastHubParameters {
+  val internalDataBits = new DataQueueLocation().getWidth
+  val inStoreQueue :: inVolWBQueue :: inClientReleaseQueue :: Nil = Enum(UInt(), nDataQueueLocations)
+
+  // Create SHRs for outstanding transactions
+  val trackerList = (0 until nReleaseTransactors).map(id =>
+    Module(new BroadcastVoluntaryReleaseTracker(id), {case TLDataBits => internalDataBits})) ++
+      (nReleaseTransactors until nTransactors).map(id =>
+        Module(new BroadcastAcquireTracker(id), {case TLDataBits => internalDataBits}))
+  
+  // Propagate incoherence flags
+  trackerList.map(_.io.incoherent := io.incoherent.toBits)
+
+  // Queue to store impending Put data
+  val sdq = Vec.fill(sdqDepth){ Reg(io.iacq().data) }
+  val sdq_val = Reg(init=Bits(0, sdqDepth))
+  val sdq_alloc_id = PriorityEncoder(~sdq_val)
+  val sdq_rdy = !sdq_val.andR
+  val sdq_enq = io.inner.acquire.fire() && io.iacq().hasData()
+  when (sdq_enq) { sdq(sdq_alloc_id) := io.iacq().data }
+
+  // Handle acquire transaction initiation
+  val trackerAcquireIOs = trackerList.map(_.io.inner.acquire)
+  val acquireConflicts = Vec(trackerList.map(_.io.has_acquire_conflict)).toBits
+  val acquireMatches = Vec(trackerList.map(_.io.has_acquire_match)).toBits
+  val acquireReadys = Vec(trackerAcquireIOs.map(_.ready)).toBits
+  val acquire_idx = Mux(acquireMatches.orR,
+                      PriorityEncoder(acquireMatches),
+                      PriorityEncoder(acquireReadys))
+
+  val block_acquires = acquireConflicts.orR || !sdq_rdy
+  io.inner.acquire.ready := acquireReadys.orR && !block_acquires
+  trackerAcquireIOs.zipWithIndex.foreach {
+    case(tracker, i) =>
+      tracker.bits := io.inner.acquire.bits
+      tracker.bits.data := DataQueueLocation(sdq_alloc_id, inStoreQueue).toBits
+      tracker.valid := io.inner.acquire.valid && !block_acquires && (acquire_idx === UInt(i))
+  }
+
+  // Queue to store impending Voluntary Release data
+  val voluntary = io.irel().isVoluntary()
+  val vwbdq_enq = io.inner.release.fire() && voluntary && io.irel().hasData()
+  val (rel_data_cnt, rel_data_done) = Counter(vwbdq_enq, innerDataBeats) //TODO Zero width
+  val vwbdq = Vec.fill(innerDataBeats){ Reg(io.irel().data) } //TODO Assumes nReleaseTransactors == 1 
+  when(vwbdq_enq) { vwbdq(rel_data_cnt) := io.irel().data }
+
+  // Handle releases, which might be voluntary and might have data
+  val trackerReleaseIOs = trackerList.map(_.io.inner.release)
+  val releaseReadys = Vec(trackerReleaseIOs.map(_.ready)).toBits
+  val releaseMatches = Vec(trackerList.map(_.io.has_release_match)).toBits
+  val release_idx = PriorityEncoder(releaseMatches)
+  io.inner.release.ready := releaseReadys(release_idx)
+  trackerReleaseIOs.zipWithIndex.foreach {
+    case(tracker, i) =>
+      tracker.valid := io.inner.release.valid && (release_idx === UInt(i))
+      tracker.bits := io.inner.release.bits
+      tracker.bits.data := DataQueueLocation(rel_data_cnt,
+                                     (if(i < nReleaseTransactors) inVolWBQueue
+                                      else inClientReleaseQueue)).toBits
+  }
+  assert(!(io.inner.release.valid && !releaseMatches.orR),
+    "Non-voluntary release should always have a Tracker waiting for it.")
+
+  // Wire probe requests and grant reply to clients, finish acks from clients
+  // Note that we bypass the Grant data subbundles
+  io.inner.grant.bits.data := io.outer.grant.bits.data
+  io.inner.grant.bits.addr_beat := io.outer.grant.bits.addr_beat
+  doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant))
+  doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe))
+  doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish))
+
+  // Create an arbiter for the one memory port
+  val outer_arb = Module(new ClientUncachedTileLinkIOArbiter(trackerList.size),
+                         { case TLId => params(OuterTLId)
+                           case TLDataBits => internalDataBits })
+  outer_arb.io.in <> trackerList.map(_.io.outer)
+  // Get the pending data out of the store data queue
+  val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.data)
+  val is_in_sdq = outer_data_ptr.loc === inStoreQueue
+  val free_sdq = io.outer.acquire.fire() &&
+                  io.outer.acquire.bits.hasData() &&
+                  outer_data_ptr.loc === inStoreQueue
+  io.outer.acquire.bits.data := MuxLookup(outer_data_ptr.loc, io.irel().data, Array(
+                                          inStoreQueue -> sdq(outer_data_ptr.idx),
+                                          inVolWBQueue -> vwbdq(outer_data_ptr.idx)))
+  io.outer <> outer_arb.io.out
+
+  // Update SDQ valid bits
+  when (io.outer.acquire.valid || sdq_enq) {
+    sdq_val := sdq_val & ~(UIntToOH(outer_data_ptr.idx) & Fill(sdqDepth, free_sdq)) | 
+               PriorityEncoderOH(~sdq_val(sdqDepth-1,0)) & Fill(sdqDepth, sdq_enq)
+  }
+}
+
+class BroadcastXactTracker extends XactTracker {
+  val io = new ManagerXactTrackerIO
+}
+
+class BroadcastVoluntaryReleaseTracker(trackerId: Int) extends BroadcastXactTracker {
+  val s_idle :: s_outer :: s_grant :: s_ack :: Nil = Enum(UInt(), 4)
+  val state = Reg(init=s_idle)
+
+  val xact = Reg(Bundle(new ReleaseFromSrc, { case TLId => params(InnerTLId); case TLDataBits => 0 }))
+  val data_buffer = Vec.fill(innerDataBeats){ Reg(io.irel().data.clone) }
+  val coh = ManagerMetadata.onReset
+
+  val collect_irel_data = Reg(init=Bool(false))
+  val irel_data_valid = Reg(init=Bits(0, width = innerDataBeats))
+  val irel_data_done = connectIncomingDataBeatCounter(io.inner.release)
+  val (oacq_data_cnt, oacq_data_done) = connectOutgoingDataBeatCounter(io.outer.acquire)
+
+  io.has_acquire_conflict := Bool(false)
+  io.has_release_match := io.irel().isVoluntary()
+  io.has_acquire_match := Bool(false)
+
+  io.outer.acquire.valid := Bool(false)
+  io.outer.grant.ready := Bool(false)
+  io.inner.acquire.ready := Bool(false)
+  io.inner.probe.valid := Bool(false)
+  io.inner.release.ready := Bool(false)
+  io.inner.grant.valid := Bool(false)
+  io.inner.finish.ready := Bool(false)
+
+  io.inner.grant.bits := coh.makeGrant(xact, UInt(trackerId))
+
+  //TODO: Use io.outer.release instead?
+  io.outer.acquire.bits := Bundle(
+    PutBlock( 
+      client_xact_id = UInt(trackerId),
+      addr_block = xact.addr_block,
+      addr_beat = oacq_data_cnt,
+      data = data_buffer(oacq_data_cnt)))(outerTLParams)
+
+  when(collect_irel_data) {
+    io.inner.release.ready := Bool(true)
+    when(io.inner.release.valid) {
+      data_buffer(io.irel().addr_beat) := io.irel().data
+      irel_data_valid(io.irel().addr_beat) := Bool(true)
+    }
+    when(irel_data_done) { collect_irel_data := Bool(false) }
+  }
+
+  switch (state) {
+    is(s_idle) {
+      io.inner.release.ready := Bool(true)
+      when( io.inner.release.valid ) {
+        xact := io.irel()
+        data_buffer(UInt(0)) := io.irel().data
+        collect_irel_data := io.irel().hasMultibeatData()
+        irel_data_valid := io.irel().hasData() << io.irel().addr_beat
+        state := Mux(io.irel().hasData(), s_outer,
+                   Mux(io.irel().requiresAck(), s_ack, s_idle))
+      }
+    }
+    is(s_outer) {
+      io.outer.acquire.valid := !collect_irel_data || irel_data_valid(oacq_data_cnt)
+      when(oacq_data_done) { 
+        state := s_grant // converted irel to oacq, so expect grant TODO: Mux(xact.requiresAck(), s_grant, s_idle) ?
+      }
+    }
+    is(s_grant) { // Forward the Grant.voluntaryAck
+      io.outer.grant.ready := io.inner.grant.ready
+      io.inner.grant.valid := io.outer.grant.valid 
+      when(io.inner.grant.fire()) {
+        state := Mux(io.ignt().requiresAck(), s_ack, s_idle)
+      }
+    }
+    is(s_ack) {
+      // TODO: This state is unnecessary if no client will ever issue the
+      // pending Acquire that caused this writeback until it receives the 
+      // Grant.voluntaryAck for this writeback
+      io.inner.finish.ready := Bool(true)
+      when(io.inner.finish.valid) { state := s_idle }
+    }
+  }
+}
+
+class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
+  val s_idle :: s_probe :: s_mem_read :: s_mem_write :: s_make_grant :: s_mem_resp :: s_ack :: Nil = Enum(UInt(), 7)
+  val state = Reg(init=s_idle)
+
+  val xact = Reg(Bundle(new AcquireFromSrc, { case TLId => params(InnerTLId); case TLDataBits => 0 }))
+  val data_buffer = Vec.fill(innerDataBeats){ Reg(io.iacq().data.clone) }
+  val coh = ManagerMetadata.onReset
+
+  assert(!(state != s_idle && xact.isBuiltInType() && 
+      Vec(Acquire.getType, Acquire.putType, Acquire.putAtomicType,
+        Acquire.prefetchType).contains(xact.a_type)),
+    "Broadcast Hub does not support PutAtomics, subblock Gets/Puts, or prefetches") // TODO
+
+  val release_count = Reg(init=UInt(0, width = log2Up(io.inner.tlNCachingClients+1)))
+  val pending_probes = Reg(init=Bits(0, width = io.inner.tlNCachingClients))
+  val curr_p_id = PriorityEncoder(pending_probes)
+  val full_sharers = coh.full()
+  val probe_self = io.inner.acquire.bits.requiresSelfProbe()
+  val mask_self_true = UInt(UInt(1) << io.inner.acquire.bits.client_id, width = io.inner.tlNCachingClients)
+  val mask_self_false = ~UInt(UInt(1) << io.inner.acquire.bits.client_id, width = io.inner.tlNCachingClients)
+  val mask_self = Mux(probe_self, full_sharers | mask_self_true, full_sharers & mask_self_false)
+  val mask_incoherent = mask_self & ~io.incoherent.toBits
+
+  val collect_iacq_data = Reg(init=Bool(false))
+  val iacq_data_valid = Reg(init=Bits(0, width = innerDataBeats))
+  val iacq_data_done = connectIncomingDataBeatCounter(io.inner.acquire)
+  val irel_data_done = connectIncomingDataBeatCounter(io.inner.release)
+  val (ignt_data_cnt, ignt_data_done) = connectOutgoingDataBeatCounter(io.inner.grant)
+  val (oacq_data_cnt, oacq_data_done) = connectOutgoingDataBeatCounter(io.outer.acquire)
+  val ognt_data_done = connectIncomingDataBeatCounter(io.outer.grant)
+  val pending_ognt_ack = Reg(init=Bool(false))
+  val pending_outer_write = xact.hasData()
+  val pending_outer_write_ = io.iacq().hasData()
+  val pending_outer_read = io.ignt().hasData()
+  val pending_outer_read_ = coh.makeGrant(io.iacq(), UInt(trackerId)).hasData()
+
+  io.has_acquire_conflict := xact.conflicts(io.iacq()) && 
+                              (state != s_idle) &&
+                              !collect_iacq_data
+  io.has_acquire_match := xact.conflicts(io.iacq()) &&
+                              collect_iacq_data
+  io.has_release_match := xact.conflicts(io.irel()) &&
+                            !io.irel().isVoluntary() &&
+                            (state === s_probe)
+
+  val outer_write_acq = Bundle(PutBlock(
+                                client_xact_id = UInt(trackerId),
+                                addr_block = xact.addr_block,
+                                addr_beat = oacq_data_cnt,
+                                data = data_buffer(oacq_data_cnt)))(outerTLParams)
+  val outer_write_rel = Bundle(PutBlock(
+                                client_xact_id = UInt(trackerId),
+                                addr_block = xact.addr_block,
+                                addr_beat = io.irel().addr_beat,
+                                data = io.irel().data))(outerTLParams)
+  val outer_read = Bundle(GetBlock(
+                            client_xact_id = UInt(trackerId),
+                            addr_block = xact.addr_block))(outerTLParams)
+
+  io.outer.acquire.valid := Bool(false)
+  io.outer.acquire.bits := outer_read //default
+  io.outer.grant.ready := Bool(false)
+
+  io.inner.probe.valid := Bool(false)
+  io.inner.probe.bits := coh.makeProbe(curr_p_id, xact)
+
+  io.inner.grant.valid := Bool(false)
+  io.inner.grant.bits := coh.makeGrant(xact, UInt(trackerId)) // Data bypassed in parent
+
+  io.inner.acquire.ready := Bool(false)
+  io.inner.release.ready := Bool(false)
+  io.inner.finish.ready := Bool(false)
+
+  assert(!(state != s_idle && collect_iacq_data && io.inner.acquire.fire() &&
+    io.iacq().client_id != xact.client_id),
+    "AcquireTracker accepted data beat from different network source than initial request.")
+
+  assert(!(state != s_idle && collect_iacq_data && io.inner.acquire.fire() &&
+    io.iacq().client_xact_id != xact.client_xact_id),
+    "AcquireTracker accepted data beat from different client transaction than initial request.")
+
+  assert(!(state === s_idle && io.inner.acquire.fire() &&
+    io.iacq().addr_beat != UInt(0)),
+    "AcquireTracker initialized with a tail data beat.")
+
+  when(collect_iacq_data) {
+    io.inner.acquire.ready := Bool(true)
+    when(io.inner.acquire.valid) {
+      data_buffer(io.iacq().addr_beat) := io.iacq().data
+      iacq_data_valid(io.iacq().addr_beat) := Bool(true)
+    }
+    when(iacq_data_done) { collect_iacq_data := Bool(false) }
+  }
+
+  when(pending_ognt_ack) {
+    io.outer.grant.ready := Bool(true)
+    when(io.outer.grant.valid) { pending_ognt_ack := Bool(false) }
+    //TODO add finish queue if this isnt the last level manager
+  }
+
+  switch (state) {
+    is(s_idle) {
+      io.inner.acquire.ready := Bool(true)
+      when(io.inner.acquire.valid) {
+        xact := io.iacq()
+        data_buffer(UInt(0)) := io.iacq().data
+        collect_iacq_data := io.iacq().hasMultibeatData()
+        iacq_data_valid := io.iacq().hasData() << io.iacq().addr_beat
+        val needs_probes = mask_incoherent.orR
+        when(needs_probes) {
+          pending_probes := mask_incoherent
+          release_count := PopCount(mask_incoherent)
+        }
+        state := Mux(needs_probes, s_probe,
+                  Mux(pending_outer_write_, s_mem_write,
+                    Mux(pending_outer_read_, s_mem_read, s_make_grant)))
+      }
+    }
+    is(s_probe) {
+      // Generate probes
+      io.inner.probe.valid := pending_probes.orR
+      when(io.inner.probe.ready) {
+        pending_probes := pending_probes & ~UIntToOH(curr_p_id)
+      }
+
+      // Handle releases, which may have data to be written back
+      io.inner.release.ready := !io.irel().hasData() || io.outer.acquire.ready
+      when(io.inner.release.valid) {
+        when(io.irel().hasData()) {
+          io.outer.acquire.valid := Bool(true)
+          io.outer.acquire.bits := outer_write_rel
+          when(io.outer.acquire.ready) {
+            when(oacq_data_done) {
+              pending_ognt_ack := Bool(true)
+              release_count := release_count - UInt(1)
+              when(release_count === UInt(1)) {
+                state := Mux(pending_outer_write, s_mem_write,
+                          Mux(pending_outer_read, s_mem_read, s_make_grant))
+              }
+            }
+          }
+        } .otherwise {
+          release_count := release_count - UInt(1)
+          when(release_count === UInt(1)) {
+            state := Mux(pending_outer_write, s_mem_write, 
+                      Mux(pending_outer_read, s_mem_read, s_make_grant))
+          }
+        }
+      }
+    }
+    is(s_mem_write) { // Write data to outer memory
+      io.outer.acquire.valid := !pending_ognt_ack || !collect_iacq_data || iacq_data_valid(oacq_data_cnt)
+      io.outer.acquire.bits := outer_write_acq
+      when(oacq_data_done) {
+        pending_ognt_ack := Bool(true)
+        state := Mux(pending_outer_read, s_mem_read, s_mem_resp)
+      }
+    }
+    is(s_mem_read) { // Read data from outer memory (possibly what was just written)
+      io.outer.acquire.valid := !pending_ognt_ack
+      io.outer.acquire.bits := outer_read
+      when(io.outer.acquire.fire()) { state := s_mem_resp }
+    }
+    is(s_mem_resp) { // Wait to forward grants from outer memory
+      io.outer.grant.ready := io.inner.grant.ready
+      io.inner.grant.valid := io.outer.grant.valid
+      when(ignt_data_done) { 
+        state := Mux(io.ignt().requiresAck(), s_ack, s_idle)
+      }
+    }
+    is(s_make_grant) { // Manufacture a local grant (some kind of permission upgrade)
+      io.inner.grant.valid := Bool(true)
+      when(io.inner.grant.ready) { 
+        state := Mux(io.ignt().requiresAck(), s_ack, s_idle)
+      }
+    }
+    is(s_ack) { // Wait for transaction to complete
+      io.inner.finish.ready := Bool(true)
+      when(io.inner.finish.valid) { state := s_idle }
+    }
+  }
+}
diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala
new file mode 100644
index 00000000..7fcd1408
--- /dev/null
+++ b/uncore/src/main/scala/cache.scala
@@ -0,0 +1,1078 @@
+// See LICENSE for license details.
+
+package uncore
+import Chisel._
+import scala.reflect.ClassTag
+
+case object CacheName extends Field[String]
+case object NSets extends Field[Int]
+case object NWays extends Field[Int]
+case object RowBits extends Field[Int]
+case object Replacer extends Field[() => ReplacementPolicy]
+case object AmoAluOperandBits extends Field[Int]
+case object L2DirectoryRepresentation extends Field[DirectoryRepresentation]
+case object NPrimaryMisses extends Field[Int]
+case object NSecondaryMisses extends Field[Int]
+case object CacheBlockBytes extends Field[Int]
+case object CacheBlockOffsetBits extends Field[Int]
+case object ECCCode extends Field[Option[Code]]
+
+abstract trait CacheParameters extends UsesParameters {
+  val nSets = params(NSets)
+  val blockOffBits = params(CacheBlockOffsetBits)
+  val idxBits = log2Up(nSets)
+  val untagBits = blockOffBits + idxBits
+  val tagBits = params(PAddrBits) - untagBits
+  val nWays = params(NWays)
+  val wayBits = log2Up(nWays)
+  val isDM = nWays == 1
+  val rowBits = params(RowBits)
+  val rowBytes = rowBits/8
+  val rowOffBits = log2Up(rowBytes)
+  val code = params(ECCCode).getOrElse(new IdentityCode)
+}
+
+abstract class CacheBundle extends Bundle with CacheParameters
+abstract class CacheModule extends Module with CacheParameters
+
+class StoreGen(typ: Bits, addr: Bits, dat: Bits) {
+  val byte = typ === MT_B || typ === MT_BU
+  val half = typ === MT_H || typ === MT_HU
+  val word = typ === MT_W || typ === MT_WU
+  def mask =
+    Mux(byte, Bits(  1) <<     addr(2,0),
+    Mux(half, Bits(  3) << Cat(addr(2,1), Bits(0,1)),
+    Mux(word, Bits( 15) << Cat(addr(2),   Bits(0,2)),
+              Bits(255))))
+  def data =
+    Mux(byte, Fill(8, dat( 7,0)),
+    Mux(half, Fill(4, dat(15,0)),
+                      wordData))
+  lazy val wordData =
+    Mux(word, Fill(2, dat(31,0)),
+                      dat)
+}
+
+class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool) {
+  val t = new StoreGen(typ, addr, dat)
+  val sign = typ === MT_B || typ === MT_H || typ === MT_W || typ === MT_D
+
+  val wordShift = Mux(addr(2), dat(63,32), dat(31,0))
+  val word = Cat(Mux(t.word, Fill(32, sign && wordShift(31)), dat(63,32)), wordShift)
+  val halfShift = Mux(addr(1), word(31,16), word(15,0))
+  val half = Cat(Mux(t.half, Fill(48, sign && halfShift(15)), word(63,16)), halfShift)
+  val byteShift = Mux(zero, UInt(0), Mux(addr(0), half(15,8), half(7,0)))
+  val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift)
+}
+
+class AMOALU extends CacheModule {
+  val operandBits = params(AmoAluOperandBits)
+  require(operandBits == 64)
+  val io = new Bundle {
+    val addr = Bits(INPUT, blockOffBits)
+    val cmd = Bits(INPUT, M_SZ)
+    val typ = Bits(INPUT, MT_SZ)
+    val lhs = Bits(INPUT, operandBits)
+    val rhs = Bits(INPUT, operandBits)
+    val out = Bits(OUTPUT, operandBits)
+  }
+
+  val storegen = new StoreGen(io.typ, io.addr, io.rhs)
+  val rhs = storegen.wordData
+  
+  val sgned = io.cmd === M_XA_MIN || io.cmd === M_XA_MAX
+  val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU
+  val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU
+  val word = io.typ === MT_W || io.typ === MT_WU || // Logic minimization:
+               io.typ === MT_B || io.typ === MT_BU
+
+  val mask = SInt(-1,64) ^ (io.addr(2) << UInt(31))
+  val adder_out = (io.lhs & mask).toUInt + (rhs & mask)
+
+  val cmp_lhs  = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63))
+  val cmp_rhs  = Mux(word && !io.addr(2), rhs(31), rhs(63))
+  val lt_lo = io.lhs(31,0) < rhs(31,0)
+  val lt_hi = io.lhs(63,32) < rhs(63,32)
+  val eq_hi = io.lhs(63,32) === rhs(63,32)
+  val lt = Mux(word, Mux(io.addr(2), lt_hi, lt_lo), lt_hi || eq_hi && lt_lo)
+  val less = Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs))
+
+  val out = Mux(io.cmd === M_XA_ADD, adder_out,
+            Mux(io.cmd === M_XA_AND, io.lhs & rhs,
+            Mux(io.cmd === M_XA_OR,  io.lhs | rhs,
+            Mux(io.cmd === M_XA_XOR, io.lhs ^ rhs,
+            Mux(Mux(less, min, max), io.lhs,
+            storegen.data)))))
+
+  val wmask = FillInterleaved(8, storegen.mask)
+  io.out := wmask & out | ~wmask & io.lhs
+}
+
+abstract class ReplacementPolicy {
+  def way: UInt
+  def miss: Unit
+  def hit: Unit
+}
+
+class RandomReplacement(ways: Int) extends ReplacementPolicy {
+  private val replace = Bool()
+  replace := Bool(false)
+  val lfsr = LFSR16(replace)
+
+  def way = if(ways == 1) UInt(0) else lfsr(log2Up(ways)-1,0)
+  def miss = replace := Bool(true)
+  def hit = {}
+}
+
+abstract class Metadata extends CacheBundle {
+  val tag = Bits(width = tagBits)
+  val coh: CoherenceMetadata
+}
+
+class MetaReadReq extends CacheBundle {
+  val idx  = Bits(width = idxBits)
+}
+
+class MetaWriteReq[T <: Metadata](gen: T) extends MetaReadReq {
+  val way_en = Bits(width = nWays)
+  val data = gen.clone
+  override def clone = new MetaWriteReq(gen).asInstanceOf[this.type]
+}
+
+class MetadataArray[T <: Metadata](makeRstVal: () => T) extends CacheModule {
+  val rstVal = makeRstVal()
+  val io = new Bundle {
+    val read = Decoupled(new MetaReadReq).flip
+    val write = Decoupled(new MetaWriteReq(rstVal.clone)).flip
+    val resp = Vec.fill(nWays){rstVal.clone.asOutput}
+  }
+  val rst_cnt = Reg(init=UInt(0, log2Up(nSets+1)))
+  val rst = rst_cnt < UInt(nSets)
+  val waddr = Mux(rst, rst_cnt, io.write.bits.idx)
+  val wdata = Mux(rst, rstVal, io.write.bits.data).toBits
+  val wmask = Mux(rst, SInt(-1), io.write.bits.way_en).toUInt
+  when (rst) { rst_cnt := rst_cnt+UInt(1) }
+
+  val metabits = rstVal.getWidth
+  val tag_arr = Mem(UInt(width = metabits*nWays), nSets, seqRead = true)
+  when (rst || io.write.valid) {
+    tag_arr.write(waddr, Fill(nWays, wdata), FillInterleaved(metabits, wmask))
+  }
+
+  val tags = tag_arr(RegEnable(io.read.bits.idx, io.read.valid))
+  io.resp := io.resp.fromBits(tags)
+  io.read.ready := !rst && !io.write.valid // so really this could be a 6T RAM
+  io.write.ready := !rst
+}
+
+abstract trait L2HellaCacheParameters extends CacheParameters with CoherenceAgentParameters {
+  val idxMSB = idxBits-1
+  val idxLSB = 0
+  val blockAddrBits = params(TLBlockAddrBits)
+  val refillCyclesPerBeat = outerDataBits/rowBits
+  val refillCycles = refillCyclesPerBeat*outerDataBeats
+  val internalDataBeats = params(CacheBlockBytes)*8/rowBits
+  require(refillCyclesPerBeat == 1)
+  val amoAluOperandBits = params(AmoAluOperandBits)
+  require(amoAluOperandBits <= innerDataBits)
+  require(rowBits == innerDataBits) // TODO: relax this by improving s_data_* states
+  val nSecondaryMisses = params(NSecondaryMisses)
+  val isLastLevelCache = true
+  val ignoresWriteMask = !params(ECCCode).isEmpty
+}
+
+abstract class L2HellaCacheBundle extends Bundle with L2HellaCacheParameters
+abstract class L2HellaCacheModule extends Module with L2HellaCacheParameters {
+  def doInternalOutputArbitration[T <: Data : ClassTag](
+      out: DecoupledIO[T],
+      ins: Seq[DecoupledIO[T]]) {
+    val arb = Module(new RRArbiter(out.bits.clone, ins.size))
+    out <> arb.io.out
+    arb.io.in <> ins 
+  }
+
+  def doInternalInputRouting[T <: HasL2Id](in: ValidIO[T], outs: Seq[ValidIO[T]]) {
+    outs.map(_.bits := in.bits)
+    outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && in.bits.id === UInt(i) }
+  }
+}
+
+trait HasL2Id extends Bundle with CoherenceAgentParameters {
+  val id = UInt(width  = log2Up(nTransactors + 1))
+}
+
+trait HasL2InternalRequestState extends L2HellaCacheBundle {
+  val tag_match = Bool()
+  val meta = new L2Metadata
+  val way_en = Bits(width = nWays)
+}
+
+trait HasL2BeatAddr extends L2HellaCacheBundle {
+  val addr_beat = UInt(width = log2Up(refillCycles))
+}
+
+trait HasL2Data extends L2HellaCacheBundle 
+    with HasL2BeatAddr {
+  val data = UInt(width = rowBits)
+  def hasData(dummy: Int = 0) = Bool(true)
+  def hasMultibeatData(dummy: Int = 0) = Bool(refillCycles > 1)
+}
+
+class L2Metadata extends Metadata with L2HellaCacheParameters {
+  val coh = new HierarchicalMetadata
+}
+
+object L2Metadata {
+  def apply(tag: Bits, coh: HierarchicalMetadata) = {
+    val meta = new L2Metadata
+    meta.tag := tag
+    meta.coh := coh
+    meta
+  }
+}
+
+class L2MetaReadReq extends MetaReadReq with HasL2Id {
+  val tag = Bits(width = tagBits)
+}
+
+class L2MetaWriteReq extends MetaWriteReq[L2Metadata](new L2Metadata)
+    with HasL2Id {
+  override def clone = new L2MetaWriteReq().asInstanceOf[this.type]
+}
+
+class L2MetaResp extends L2HellaCacheBundle
+  with HasL2Id 
+  with HasL2InternalRequestState
+
+trait HasL2MetaReadIO extends L2HellaCacheBundle {
+  val read = Decoupled(new L2MetaReadReq)
+  val resp = Valid(new L2MetaResp).flip
+}
+
+trait HasL2MetaWriteIO extends L2HellaCacheBundle {
+  val write = Decoupled(new L2MetaWriteReq)
+}
+
+class L2MetaRWIO extends L2HellaCacheBundle with HasL2MetaReadIO with HasL2MetaWriteIO
+
+class L2MetadataArray extends L2HellaCacheModule {
+  val io = new L2MetaRWIO().flip
+
+  def onReset = L2Metadata(UInt(0), HierarchicalMetadata.onReset)
+  val meta = Module(new MetadataArray(onReset _))
+  meta.io.read <> io.read
+  meta.io.write <> io.write
+  
+  val s1_tag = RegEnable(io.read.bits.tag, io.read.valid)
+  val s1_id = RegEnable(io.read.bits.id, io.read.valid)
+  def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f))
+  val s1_clk_en = Reg(next = io.read.fire())
+  val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === s1_tag)
+  val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta.io.resp(w).coh.outer.isValid()).toBits
+  val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en)
+  val s2_tag_match = s2_tag_match_way.orR
+  val s2_hit_coh = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en)))
+
+  val replacer = params(Replacer)()
+  val s1_replaced_way_en = UIntToOH(replacer.way)
+  val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en))
+  val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => 
+    RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq)
+  when(!s2_tag_match) { replacer.miss }
+
+  io.resp.valid := Reg(next = s1_clk_en)
+  io.resp.bits.id := RegEnable(s1_id, s1_clk_en)
+  io.resp.bits.tag_match := s2_tag_match
+  io.resp.bits.meta := Mux(s2_tag_match, 
+    L2Metadata(s2_repl_meta.tag, s2_hit_coh), 
+    s2_repl_meta)
+  io.resp.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en)
+}
+
+class L2DataReadReq extends L2HellaCacheBundle 
+    with HasL2BeatAddr 
+    with HasL2Id {
+  val addr_idx = UInt(width = idxBits)
+  val way_en = Bits(width = nWays)
+}
+
+class L2DataWriteReq extends L2DataReadReq 
+    with HasL2Data {
+  val wmask  = Bits(width = rowBits/8)
+}
+
+class L2DataResp extends L2HellaCacheBundle with HasL2Id with HasL2Data
+
+trait HasL2DataReadIO extends L2HellaCacheBundle { 
+  val read = Decoupled(new L2DataReadReq)
+  val resp = Valid(new L2DataResp).flip
+}
+
+trait HasL2DataWriteIO extends L2HellaCacheBundle { 
+  val write = Decoupled(new L2DataWriteReq)
+}
+
+class L2DataRWIO extends L2HellaCacheBundle with HasL2DataReadIO with HasL2DataWriteIO
+
+class L2DataArray(delay: Int) extends L2HellaCacheModule {
+  val io = new L2DataRWIO().flip
+
+  val wmask = FillInterleaved(8, io.write.bits.wmask)
+  val reg_raddr = Reg(UInt())
+  val array = Mem(Bits(width=rowBits), nWays*nSets*refillCycles, seqRead = true)
+  val waddr = Cat(OHToUInt(io.write.bits.way_en), io.write.bits.addr_idx, io.write.bits.addr_beat)
+  val raddr = Cat(OHToUInt(io.read.bits.way_en), io.read.bits.addr_idx, io.read.bits.addr_beat)
+
+  when (io.write.bits.way_en.orR && io.write.valid) {
+    array.write(waddr, io.write.bits.data, wmask)
+  }.elsewhen (io.read.bits.way_en.orR && io.read.valid) {
+    reg_raddr := raddr
+  }
+
+  io.resp.valid := ShiftRegister(io.read.fire(), delay+1)
+  io.resp.bits.id := ShiftRegister(io.read.bits.id, delay+1)
+  io.resp.bits.addr_beat := ShiftRegister(io.read.bits.addr_beat, delay+1)
+  io.resp.bits.data := ShiftRegister(array(reg_raddr), delay)
+  io.read.ready := !io.write.valid
+  io.write.ready := Bool(true)
+}
+
+class L2HellaCacheBank extends HierarchicalCoherenceAgent with L2HellaCacheParameters {
+  require(isPow2(nSets))
+  require(isPow2(nWays)) 
+
+  val meta = Module(new L2MetadataArray) // TODO: add delay knob
+  val data = Module(new L2DataArray(1))
+  val tshrfile = Module(new TSHRFile)
+  tshrfile.io.inner <> io.inner
+  io.outer <> tshrfile.io.outer
+  io.incoherent <> tshrfile.io.incoherent
+  tshrfile.io.meta <> meta.io
+  tshrfile.io.data <> data.io
+}
+
+class TSHRFileIO extends HierarchicalTLIO {
+  val meta = new L2MetaRWIO
+  val data = new L2DataRWIO
+}
+
+class TSHRFile extends L2HellaCacheModule with HasCoherenceAgentWiringHelpers {
+  val io = new TSHRFileIO
+
+  // Create TSHRs for outstanding transactions
+  val trackerList = (0 until nReleaseTransactors).map(id => Module(new L2VoluntaryReleaseTracker(id))) ++
+    (nReleaseTransactors until nTransactors).map(id => Module(new L2AcquireTracker(id)))
+  
+  // WritebackUnit evicts data from L2, including invalidating L1s
+  val wb = Module(new L2WritebackUnit(nTransactors))
+  doInternalOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req))
+  doInternalInputRouting(wb.io.wb.resp, trackerList.map(_.io.wb.resp))
+
+  // Propagate incoherence flags
+  (trackerList.map(_.io.incoherent) :+ wb.io.incoherent) foreach { _ := io.incoherent.toBits }
+
+  // Handle acquire transaction initiation
+  val trackerAcquireIOs = trackerList.map(_.io.inner.acquire)
+  val acquireConflicts = Vec(trackerList.map(_.io.has_acquire_conflict)).toBits
+  val acquireMatches = Vec(trackerList.map(_.io.has_acquire_match)).toBits
+  val acquireReadys = Vec(trackerAcquireIOs.map(_.ready)).toBits
+  val acquire_idx = Mux(acquireMatches.orR,
+                      PriorityEncoder(acquireMatches),
+                      PriorityEncoder(acquireReadys))
+  val block_acquires = acquireConflicts.orR
+  io.inner.acquire.ready := acquireReadys.orR && !block_acquires
+  trackerAcquireIOs.zipWithIndex.foreach {
+    case(tracker, i) =>
+      tracker.bits := io.inner.acquire.bits
+      tracker.valid := io.inner.acquire.valid && !block_acquires && (acquire_idx === UInt(i))
+  }
+
+  // Wire releases from clients
+  val trackerReleaseIOs = trackerList.map(_.io.inner.release) :+ wb.io.inner.release
+  val releaseReadys = Vec(trackerReleaseIOs.map(_.ready)).toBits
+  val releaseMatches = Vec(trackerList.map(_.io.has_release_match) :+ wb.io.has_release_match).toBits
+  val release_idx = PriorityEncoder(releaseMatches)
+  io.inner.release.ready := releaseReadys(release_idx)
+  trackerReleaseIOs.zipWithIndex.foreach {
+    case(tracker, i) =>
+      tracker.bits := io.inner.release.bits
+      tracker.valid := io.inner.release.valid && (release_idx === UInt(i))
+  }
+  assert(!(io.inner.release.valid && !releaseMatches.orR),
+    "Non-voluntary release should always have a Tracker waiting for it.")
+
+  // Wire probe requests and grant reply to clients, finish acks from clients
+  doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe) :+ wb.io.inner.probe)
+  doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant))
+  doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish))
+
+  // Create an arbiter for the one memory port
+  val outerList = trackerList.map(_.io.outer) :+ wb.io.outer
+  val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size))(outerTLParams)
+  outer_arb.io.in <> outerList
+  io.outer <> outer_arb.io.out
+
+  // Wire local memory arrays
+  doInternalOutputArbitration(io.meta.read, trackerList.map(_.io.meta.read))
+  doInternalOutputArbitration(io.meta.write, trackerList.map(_.io.meta.write))
+  doInternalOutputArbitration(io.data.read, trackerList.map(_.io.data.read) :+ wb.io.data.read)
+  doInternalOutputArbitration(io.data.write, trackerList.map(_.io.data.write))
+  doInternalInputRouting(io.meta.resp, trackerList.map(_.io.meta.resp))
+  doInternalInputRouting(io.data.resp, trackerList.map(_.io.data.resp) :+ wb.io.data.resp)
+}
+
+
+class L2XactTrackerIO extends HierarchicalXactTrackerIO {
+  val data = new L2DataRWIO
+  val meta = new L2MetaRWIO
+  val wb = new L2WritebackIO
+}
+
+abstract class L2XactTracker extends XactTracker with L2HellaCacheParameters {
+  class CacheBlockBuffer { // TODO
+    val buffer = Reg(Bits(width = params(CacheBlockBytes)*8))
+
+    def internal = Vec.fill(internalDataBeats){ Bits(width = rowBits) }.fromBits(buffer)
+    def inner = Vec.fill(innerDataBeats){ Bits(width = innerDataBits) }.fromBits(buffer)
+    def outer = Vec.fill(outerDataBeats){ Bits(width = outerDataBits) }.fromBits(buffer)
+  }
+
+  def connectDataBeatCounter[S <: L2HellaCacheBundle](inc: Bool, data: S, beat: UInt, full_block: Bool) = {
+    if(data.refillCycles > 1) {
+      val (multi_cnt, multi_done) = Counter(full_block && inc, data.refillCycles)
+      (Mux(!full_block, beat, multi_cnt), Mux(!full_block, inc, multi_done))
+    } else { (UInt(0), inc) }
+  }
+
+  def connectInternalDataBeatCounter[T <: HasL2BeatAddr](
+      in: DecoupledIO[T],
+      beat: UInt = UInt(0),
+      full_block: Bool = Bool(true)) = {
+    connectDataBeatCounter(in.fire(), in.bits, beat, full_block)
+  }
+
+  def connectInternalDataBeatCounter[T <: HasL2Data](
+      in: ValidIO[T],
+      full_block: Bool = Bool(true)) = {
+    connectDataBeatCounter(in.valid, in.bits, UInt(0), full_block)._2
+  }
+
+  def addPendingBitInternal[T <: HasL2BeatAddr](in: DecoupledIO[T]) =
+    Fill(in.bits.refillCycles, in.fire()) & UIntToOH(in.bits.addr_beat)
+
+  def addPendingBitInternal[T <: HasL2BeatAddr](in: ValidIO[T]) =
+    Fill(in.bits.refillCycles, in.valid) & UIntToOH(in.bits.addr_beat)
+
+  def dropPendingBit[T <: HasL2BeatAddr] (in: DecoupledIO[T]) =
+    ~Fill(in.bits.refillCycles, in.fire()) | ~UIntToOH(in.bits.addr_beat)
+
+  def dropPendingBitInternal[T <: HasL2BeatAddr] (in: ValidIO[T]) =
+    ~Fill(in.bits.refillCycles, in.valid) | ~UIntToOH(in.bits.addr_beat)
+
+  def addPendingBitWhenBeatHasPartialWritemask(in: DecoupledIO[AcquireFromSrc]): UInt = {
+    val a = in.bits
+    val isPartial = a.wmask() != Acquire.fullWriteMask
+    addPendingBitWhenBeat(in.fire() && isPartial && Bool(ignoresWriteMask), a)
+  }
+
+  def pinAllReadyValidLow[T <: Data](b: Bundle) {
+    b.elements.foreach {
+      _._2 match {
+        case d: DecoupledIO[T] =>
+          if(d.ready.dir == OUTPUT) d.ready := Bool(false)
+          else if(d.valid.dir == OUTPUT) d.valid := Bool(false)
+        case v: ValidIO[T] => if(v.valid.dir == OUTPUT) v.valid := Bool(false) 
+        case b: Bundle => pinAllReadyValidLow(b)
+        case _ =>
+      }
+    }
+  }
+}
+
+class L2VoluntaryReleaseTracker(trackerId: Int) extends L2XactTracker {
+  val io = new L2XactTrackerIO
+  pinAllReadyValidLow(io)
+
+  val s_idle :: s_meta_read :: s_meta_resp :: s_busy :: s_meta_write :: Nil = Enum(UInt(), 5)
+  val state = Reg(init=s_idle)
+
+  val xact = Reg(Bundle(new ReleaseFromSrc, { case TLId => params(InnerTLId); case TLDataBits => 0 }))
+  val data_buffer = Vec.fill(innerDataBeats){ Reg(init=UInt(0, width = innerDataBits)) }
+  val xact_way_en = Reg{ Bits(width = nWays) }
+  val xact_old_meta = Reg{ new L2Metadata }
+  val coh = xact_old_meta.coh
+
+  val pending_irels = Reg(init=Bits(0, width = io.inner.tlDataBeats))
+  val pending_writes = Reg(init=Bits(0, width = io.inner.tlDataBeats))
+  val pending_ignt = Reg(init=Bool(false))
+
+  val all_pending_done =
+    !(pending_writes.orR ||
+      pending_ignt)
+
+  // Accept a voluntary Release (and any further beats of data)
+  pending_irels := (pending_irels & dropPendingBitWhenBeatHasData(io.inner.release))
+  io.inner.release.ready := state === s_idle || pending_irels.orR
+  when(io.inner.release.fire()) { data_buffer(io.irel().addr_beat) := io.irel().data }
+
+  // Begin a transaction by getting the current block metadata
+  io.meta.read.valid := state === s_meta_read
+  io.meta.read.bits.id := UInt(trackerId)
+  io.meta.read.bits.idx := xact.addr_block(idxMSB,idxLSB)
+  io.meta.read.bits.tag := xact.addr_block >> UInt(idxBits)
+
+  // Write the voluntarily written back data to this cache
+  pending_writes := (pending_writes & dropPendingBit(io.data.write)) |
+                      addPendingBitWhenBeatHasData(io.inner.release)
+  val curr_write_beat = PriorityEncoder(pending_writes)
+  io.data.write.valid := state === s_busy && pending_writes.orR
+  io.data.write.bits.id := UInt(trackerId)
+  io.data.write.bits.way_en := xact_way_en
+  io.data.write.bits.addr_idx := xact.addr_block(idxMSB,idxLSB)
+  io.data.write.bits.addr_beat := curr_write_beat
+  io.data.write.bits.wmask := SInt(-1)
+  io.data.write.bits.data := data_buffer(curr_write_beat)
+
+  // Send an acknowledgement
+  io.inner.grant.valid := state === s_busy && pending_ignt && !pending_irels
+  io.inner.grant.bits := coh.inner.makeGrant(xact, UInt(trackerId))
+  when(io.inner.grant.fire()) { pending_ignt := Bool(false) }
+
+  // End a transaction by updating the block metadata
+  io.meta.write.valid := state === s_meta_write
+  io.meta.write.bits.id := UInt(trackerId)
+  io.meta.write.bits.idx := xact.addr_block(idxMSB,idxLSB)
+  io.meta.write.bits.way_en := xact_way_en
+  io.meta.write.bits.data.tag := xact.addr_block >> UInt(idxBits)
+  io.meta.write.bits.data.coh.inner := xact_old_meta.coh.inner.onRelease(xact)
+  io.meta.write.bits.data.coh.outer := Mux(xact.hasData(),
+                                         xact_old_meta.coh.outer.onHit(M_XWR),
+                                         xact_old_meta.coh.outer)
+
+  // State machine updates and transaction handler metadata intialization
+  when(state === s_idle && io.inner.release.valid) {
+    xact := io.irel()
+    when(io.irel().hasMultibeatData()) {
+      pending_irels := dropPendingBitWhenBeatHasData(io.inner.release)
+    }. otherwise { 
+      pending_irels := UInt(0)
+    }
+    pending_writes := addPendingBitWhenBeatHasData(io.inner.release)
+    pending_ignt := io.irel().requiresAck()
+    state := s_meta_read
+  }
+  when(state === s_meta_read && io.meta.read.ready) { state := s_meta_resp }
+  when(state === s_meta_resp && io.meta.resp.valid) {
+    xact_old_meta := io.meta.resp.bits.meta
+    xact_way_en := io.meta.resp.bits.way_en
+    state := s_busy
+  }
+  when(state === s_busy && all_pending_done) { state := s_meta_write  }
+  when(state === s_meta_write && io.meta.write.ready) { state := s_idle }
+
+  // These IOs are used for routing in the parent
+  io.has_release_match := io.irel().isVoluntary()
+  io.has_acquire_match := Bool(false)
+  io.has_acquire_conflict := Bool(false)
+
+  // Checks for illegal behavior
+  assert(!(state === s_meta_resp && io.meta.resp.valid && !io.meta.resp.bits.tag_match),
+    "VoluntaryReleaseTracker accepted Release for a block not resident in this cache!")
+  assert(!(state === s_idle && io.inner.release.fire() && !io.irel().isVoluntary()),
+    "VoluntaryReleaseTracker accepted Release that wasn't voluntary!")
+}
+
+
+class L2AcquireTracker(trackerId: Int) extends L2XactTracker {
+  val io = new L2XactTrackerIO
+  pinAllReadyValidLow(io)
+
+  val s_idle :: s_meta_read :: s_meta_resp :: s_wb_req :: s_wb_resp :: s_inner_probe :: s_outer_acquire :: s_busy :: s_meta_write :: Nil = Enum(UInt(), 9)
+  val state = Reg(init=s_idle)
+
+  // State holding transaction metadata
+  val xact = Reg(Bundle(new AcquireFromSrc, { case TLId => params(InnerTLId) }))
+  val data_buffer = Vec.fill(innerDataBeats){ Reg(init=UInt(0, width = innerDataBits)) }
+  val wmask_buffer = Vec.fill(innerDataBeats){ Reg(init=UInt(0,width = innerDataBits/8)) }
+  val xact_tag_match = Reg{ Bool() }
+  val xact_way_en = Reg{ Bits(width = nWays) }
+  val xact_old_meta = Reg{ new L2Metadata }
+  val pending_coh = Reg{ xact_old_meta.coh.clone }
+
+  // Secondary miss queue
+  val ignt_q = Module(new Queue(new SecondaryMissInfo, nSecondaryMisses))(innerTLParams)
+
+  // State holding progress made on processing this transaction
+  val iacq_data_done = connectIncomingDataBeatCounter(io.inner.acquire)
+  val pending_irels = connectTwoWayBeatCounter(
+    max = io.inner.tlNCachingClients,
+    up = io.inner.probe,
+    down = io.inner.release)._1
+  val (pending_ognt, oacq_data_idx, oacq_data_done, ognt_data_idx, ognt_data_done) =
+    connectTwoWayBeatCounter(
+      max = 1,
+      up = io.outer.acquire,
+      down = io.outer.grant,
+      beat = xact.addr_beat)
+  val (ignt_data_idx, ignt_data_done) = connectOutgoingDataBeatCounter(io.inner.grant, ignt_q.io.deq.bits.addr_beat)
+  val pending_ifins = connectTwoWayBeatCounter(
+    max = nSecondaryMisses,
+    up = io.inner.grant,
+    down = io.inner.finish,
+    track = (g: Grant) => g.requiresAck())._1
+  val pending_puts = Reg(init=Bits(0, width = io.inner.tlDataBeats))
+  val pending_iprbs = Reg(init = Bits(0, width = io.inner.tlNCachingClients))
+  val pending_reads = Reg(init=Bits(0, width = io.inner.tlDataBeats))
+  val pending_writes = Reg(init=Bits(0, width = io.inner.tlDataBeats))
+  val pending_resps = Reg(init=Bits(0, width = io.inner.tlDataBeats))
+  val pending_ignt_data = Reg(init=Bits(0, width = io.inner.tlDataBeats))
+  val pending_meta_write = Reg{ Bool() }
+
+  val all_pending_done =
+    !(pending_reads.orR ||
+      pending_writes.orR ||
+      pending_resps.orR ||
+      pending_puts.orR ||
+      pending_ognt ||
+      ignt_q.io.count > UInt(0) ||
+      //pending_meta_write || // Has own state: s_meta_write
+      pending_ifins)
+
+  // Provide a single ALU per tracker to merge Puts and AMOs with data being
+  // refilled, written back, or extant in the cache
+  val amoalu = Module(new AMOALU)
+  amoalu.io.addr := xact.addr()
+  amoalu.io.cmd := xact.op_code()
+  amoalu.io.typ := xact.op_size()
+  amoalu.io.lhs := io.data.resp.bits.data // default, overwritten by calls to mergeData
+  amoalu.io.rhs := data_buffer.head       // default, overwritten by calls to mergeData
+  val amo_result = xact.data // Reuse xact buffer space to store AMO result
+
+  // Utility functions for updating the data and metadata that will be kept in
+  // the cache or granted to the original requestor after this transaction:
+
+  def updatePendingCohWhen(flag: Bool, next: HierarchicalMetadata) {
+    when(flag && pending_coh != next) {
+      pending_meta_write := Bool(true)
+      pending_coh := next
+    }
+  }
+
+  def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) {
+    val old_data = incoming     // Refilled, written back, or de-cached data
+    val new_data = data_buffer(beat) // Newly Put data is already in the buffer
+    amoalu.io.lhs := old_data >> xact.amo_shift_bits()
+    amoalu.io.rhs := new_data >> xact.amo_shift_bits()
+    val wmask = FillInterleaved(8, wmask_buffer(beat))
+    data_buffer(beat) := ~wmask & old_data |
+                          wmask & Mux(xact.isBuiltInType(Acquire.putAtomicType),
+                                        amoalu.io.out << xact.amo_shift_bits(),
+                                        new_data)
+    wmask_buffer(beat) := SInt(-1)
+    when(xact.is(Acquire.putAtomicType) && xact.addr_beat === beat) { amo_result := old_data }
+  }
+  def mergeDataInternal[T <: HasL2Data with HasL2BeatAddr](in: ValidIO[T]) {
+    when(in.valid) { mergeData(rowBits)(in.bits.addr_beat, in.bits.data) }
+  }
+  def mergeDataInner[T <: HasTileLinkData with HasTileLinkBeatId](in: DecoupledIO[T]) {
+    when(in.fire() && in.bits.hasData()) { 
+      mergeData(innerDataBits)(in.bits.addr_beat, in.bits.data)
+    }
+  }
+  def mergeDataOuter[T <: HasTileLinkData with HasTileLinkBeatId](in: DecoupledIO[T]) {
+    when(in.fire() && in.bits.hasData()) { 
+      mergeData(outerDataBits)(in.bits.addr_beat, in.bits.data)
+    }
+  }
+
+  // Actual transaction processing logic begins here:
+  //
+  // First, take care of accpeting new requires or secondary misses
+  // For now, the only allowed secondary miss types are Gets-under-Get
+  // and Puts-under-Put from the same client
+  val can_merge_iacq_get = (xact.isBuiltInType(Acquire.getType) &&
+                               io.iacq().isBuiltInType(Acquire.getType)) &&
+                           xact.client_id === io.iacq().client_id && //TODO remove
+                           xact.conflicts(io.iacq()) &&
+                           state != s_idle && state != s_meta_write &&
+                           !all_pending_done &&
+                           xact.allocate() &&
+                           !io.inner.release.fire() &&
+                           !io.outer.grant.fire() &&
+                           !io.data.resp.valid &&
+                           ignt_q.io.enq.ready
+
+  // This logic also allows the tail beats of a PutBlock to be merged in
+  val can_merge_iacq_put = ((xact.isBuiltInType(Acquire.putType) &&
+                               io.iacq().isBuiltInType(Acquire.putType)) ||
+                             (xact.isBuiltInType(Acquire.putBlockType) &&
+                               io.iacq().isBuiltInType(Acquire.putBlockType))) &&
+                           xact.client_id === io.iacq().client_id && //TODO remove
+                           xact.conflicts(io.iacq()) &&
+                           state != s_idle && state != s_meta_write &&
+                           !all_pending_done &&
+                           xact.allocate() &&
+                           !io.inner.release.fire() &&
+                           !io.outer.grant.fire() &&
+                           !io.data.resp.valid &&
+                           ignt_q.io.enq.ready
+
+  io.inner.acquire.ready := state === s_idle ||
+                              can_merge_iacq_put ||
+                              can_merge_iacq_get
+
+  // Enqueue secondary miss information
+  ignt_q.io.enq.valid := iacq_data_done
+  ignt_q.io.enq.bits.client_xact_id := io.iacq().client_xact_id
+  ignt_q.io.enq.bits.addr_beat := io.iacq().addr_beat
+  // TODO add ignt.dst <- iacq.src
+
+  // Track whether any beats are missing from a PutBlock
+  pending_puts := (pending_puts & dropPendingBitWhenBeatHasData(io.inner.acquire))
+
+  // Begin a transaction by getting the current block metadata
+  io.meta.read.valid := state === s_meta_read
+  io.meta.read.bits.id := UInt(trackerId)
+  io.meta.read.bits.idx := xact.addr_block(idxMSB,idxLSB)
+  io.meta.read.bits.tag := xact.addr_block >> UInt(idxBits)
+
+  // Issue a request to the writeback unit
+  io.wb.req.valid := state === s_wb_req
+  io.wb.req.bits.id := UInt(trackerId)
+  io.wb.req.bits.idx := xact.addr_block(idxMSB,idxLSB)
+  io.wb.req.bits.tag := xact_old_meta.tag
+  io.wb.req.bits.coh := xact_old_meta.coh
+  io.wb.req.bits.way_en := xact_way_en
+
+  // Track which clients yet need to be probed and make Probe message
+  pending_iprbs := pending_iprbs & dropPendingBitAtDest(io.inner.probe)
+  val curr_probe_dst = PriorityEncoder(pending_iprbs)
+  io.inner.probe.valid := state === s_inner_probe && pending_iprbs.orR
+  io.inner.probe.bits := pending_coh.inner.makeProbe(curr_probe_dst, xact)
+
+  // Handle incoming releases from clients, which may reduce sharer counts
+  // and/or write back dirty data
+  io.inner.release.ready := state === s_inner_probe
+  val pending_coh_on_irel = HierarchicalMetadata(
+                              pending_coh.inner.onRelease(io.irel()), // Drop sharer
+                              Mux(io.irel().hasData(),     // Dirty writeback
+                                pending_coh.outer.onHit(M_XWR),
+                                pending_coh.outer))
+  updatePendingCohWhen(io.inner.release.fire(), pending_coh_on_irel)
+  mergeDataInner(io.inner.release)
+
+  // Handle misses or coherence permission upgrades by initiating a new transaction in the outer memory:
+  //
+  // If we're allocating in this cache, we can use the current metadata
+  // to make an appropriate custom Acquire, otherwise we copy over the
+  // built-in Acquire from the inner TL to the outer TL
+  io.outer.acquire.valid := state === s_outer_acquire
+  io.outer.acquire.bits := Mux(
+      xact.allocate(),
+      xact_old_meta.coh.outer.makeAcquire(
+        client_xact_id = UInt(0),
+        addr_block = xact.addr_block,
+        op_code = xact.op_code()),
+      Bundle(Acquire(xact))(outerTLParams))
+
+  // Handle the response from outer memory
+  io.outer.grant.ready := state === s_busy
+  val pending_coh_on_ognt = HierarchicalMetadata(
+                              ManagerMetadata.onReset,
+                              pending_coh.outer.onGrant(io.outer.grant.bits, xact.op_code()))
+  updatePendingCohWhen(ognt_data_done, pending_coh_on_ognt)
+  mergeDataOuter(io.outer.grant)
+
+  // Going back to the original inner transaction, we can issue a Grant as
+  // soon as the data is released, granted, put, or read from the cache
+  pending_ignt_data := pending_ignt_data |
+                       addPendingBitWhenBeatHasData(io.inner.release) |
+                       addPendingBitWhenBeatHasData(io.outer.grant) |
+                       addPendingBitInternal(io.data.resp)
+  ignt_q.io.deq.ready := ignt_data_done
+  io.inner.grant.valid := state === s_busy &&
+                          ignt_q.io.deq.valid &&
+                          (!io.ignt().hasData() || pending_ignt_data(ignt_data_idx))
+  // Make the Grant message using the data stored in the secondary miss queue
+  io.inner.grant.bits := pending_coh.inner.makeGrant(
+                           pri = xact,
+                           sec = ignt_q.io.deq.bits,
+                           manager_xact_id = UInt(trackerId), 
+                           data = Mux(xact.is(Acquire.putAtomicType),
+                                    amo_result,
+                                    data_buffer(ignt_data_idx)))
+  io.inner.grant.bits.addr_beat := ignt_data_idx // override based on outgoing counter
+
+  val pending_coh_on_ignt = HierarchicalMetadata(
+                              pending_coh.inner.onGrant(io.ignt()),
+                              Mux(ognt_data_done,
+                                pending_coh_on_ognt.outer,
+                                pending_coh.outer))
+  updatePendingCohWhen(io.inner.grant.fire(), pending_coh_on_ignt)
+
+  // We must wait for as many Finishes as we sent Grants
+  io.inner.finish.ready := state === s_busy
+
+  // We read from the the cache at this level if data wasn't written back or refilled.
+  // We may merge Gets, requiring further beats to be read.
+  // If ECC requires a full writemask, we'll read out data on partial writes as well.
+  pending_reads := (pending_reads &
+                       dropPendingBit(io.data.read) &
+                       dropPendingBitWhenBeatHasData(io.inner.release) &
+                       dropPendingBitWhenBeatHasData(io.outer.grant)) |
+                     addPendingBitWhenBeatIsGetOrAtomic(io.inner.acquire) |
+                     addPendingBitWhenBeatHasPartialWritemask(io.inner.acquire)
+  val curr_read_beat = PriorityEncoder(pending_reads)
+  io.data.read.valid := state === s_busy && pending_reads.orR && !pending_ognt
+  io.data.read.bits.id := UInt(trackerId)
+  io.data.read.bits.way_en := xact_way_en
+  io.data.read.bits.addr_idx := xact.addr_block(idxMSB,idxLSB)
+  io.data.read.bits.addr_beat := curr_read_beat
+
+  pending_resps := (pending_resps & dropPendingBitInternal(io.data.resp)) |
+                     addPendingBitInternal(io.data.read)
+  mergeDataInternal(io.data.resp)
+
+  // We write data to the cache at this level if it was Put here with allocate flag,
+  // written back dirty, or refilled from outer memory.
+  pending_writes := (pending_writes & dropPendingBit(io.data.write)) |
+                      addPendingBitWhenBeatHasData(io.inner.acquire) |
+                      addPendingBitWhenBeatHasData(io.inner.release) |
+                      addPendingBitWhenBeatHasData(io.outer.grant)
+  val curr_write_beat = PriorityEncoder(pending_writes)
+  io.data.write.valid := state === s_busy &&
+                           pending_writes.orR &&
+                           !pending_ognt &&
+                           !pending_reads(curr_write_beat) &&
+                           !pending_resps(curr_write_beat)
+  io.data.write.bits.id := UInt(trackerId)
+  io.data.write.bits.way_en := xact_way_en
+  io.data.write.bits.addr_idx := xact.addr_block(idxMSB,idxLSB)
+  io.data.write.bits.addr_beat := curr_write_beat
+  io.data.write.bits.wmask := wmask_buffer(curr_write_beat)
+  io.data.write.bits.data := data_buffer(curr_write_beat)
+
+  // End a transaction by updating the block metadata
+  io.meta.write.valid := state === s_meta_write
+  io.meta.write.bits.id := UInt(trackerId)
+  io.meta.write.bits.idx := xact.addr_block(idxMSB,idxLSB)
+  io.meta.write.bits.way_en := xact_way_en
+  io.meta.write.bits.data.tag := xact.addr_block >> UInt(idxBits)
+  io.meta.write.bits.data.coh := pending_coh
+                                        
+  // Handling of secondary misses (Gets and Puts only for now)
+  when(io.inner.acquire.fire() && io.iacq().hasData()) { // state <= s_meta_wrtie
+    val beat = io.iacq().addr_beat
+    val wmask = io.iacq().wmask()
+    val full = FillInterleaved(8, wmask)
+    data_buffer(beat) := (~full & data_buffer(beat)) | (full & io.iacq().data)
+    wmask_buffer(beat) := wmask | Mux(state === s_idle, Bits(0), wmask_buffer(beat))
+  }
+
+  // Defined here because of Chisel default wire demands, used in s_meta_resp
+  val pending_coh_on_hit = HierarchicalMetadata(
+    io.meta.resp.bits.meta.coh.inner,
+    io.meta.resp.bits.meta.coh.outer.onHit(xact.op_code()))
+
+  // State machine updates and transaction handler metadata intialization
+  when(state === s_idle && io.inner.acquire.valid) {
+    xact := io.iacq()
+    xact.data := UInt(0)
+    pending_puts := Mux( // Make sure to collect all data from a PutBlock
+      io.iacq().isBuiltInType(Acquire.putBlockType),
+      dropPendingBitWhenBeatHasData(io.inner.acquire),
+      UInt(0))
+    pending_reads := Mux( // GetBlocks and custom types read all beats
+      io.iacq().isBuiltInType(Acquire.getBlockType) || !io.iacq().isBuiltInType(),
+      SInt(-1, width = innerDataBeats),
+      (addPendingBitWhenBeatIsGetOrAtomic(io.inner.acquire) | 
+        addPendingBitWhenBeatHasPartialWritemask(io.inner.acquire)).toUInt)
+    pending_writes := addPendingBitWhenBeatHasData(io.inner.acquire)
+    pending_resps := UInt(0)
+    pending_ignt_data := UInt(0)
+    pending_meta_write := UInt(0)
+    state := s_meta_read
+  }
+  when(state === s_meta_read && io.meta.read.ready) { state := s_meta_resp }
+  when(state === s_meta_resp && io.meta.resp.valid) {
+    xact_tag_match := io.meta.resp.bits.tag_match
+    xact_old_meta := io.meta.resp.bits.meta
+    xact_way_en := io.meta.resp.bits.way_en
+    val coh = io.meta.resp.bits.meta.coh
+    val tag_match = io.meta.resp.bits.tag_match
+    val is_hit = (if(!isLastLevelCache) tag_match && coh.outer.isHit(xact.op_code())
+                  else xact.isBuiltInType(Acquire.putBlockType) ||
+                       tag_match && coh.outer.isValid())
+    val needs_writeback = !tag_match &&
+                          xact.allocate() && 
+                          (coh.outer.requiresVoluntaryWriteback() ||
+                             coh.inner.requiresProbesOnVoluntaryWriteback())
+    val needs_inner_probes = tag_match && coh.inner.requiresProbes(xact)
+    when(!tag_match || is_hit && pending_coh_on_hit != coh) { pending_meta_write := Bool(true) }
+    pending_coh := Mux(is_hit, pending_coh_on_hit, coh)
+    when(needs_inner_probes) {
+      val full_sharers = coh.inner.full()
+      val mask_self = Mux(
+        xact.requiresSelfProbe(),
+        coh.inner.full() | UIntToOH(xact.client_id),
+        coh.inner.full() & ~UIntToOH(xact.client_id))
+      val mask_incoherent = mask_self & ~io.incoherent.toBits
+      pending_iprbs := mask_incoherent
+    } 
+    state := Mux(needs_writeback, s_wb_req,
+               Mux(needs_inner_probes, s_inner_probe,
+                  Mux(!is_hit, s_outer_acquire, s_busy)))
+  }
+  when(state === s_wb_req && io.wb.req.ready) { state := s_wb_resp }
+  when(state === s_wb_resp && io.wb.resp.valid) {
+    // If we're overwriting the whole block in a last level cache we can
+    // just do it without fetching any data from memory
+    val skip_outer_acquire = Bool(isLastLevelCache) && xact.isBuiltInType(Acquire.putBlockType)
+    state := Mux(!skip_outer_acquire, s_outer_acquire, s_busy)
+  }
+  when(state === s_inner_probe && !(pending_iprbs.orR || pending_irels)) {
+    // Tag matches, so if this is the last level cache we can use the data without upgrading permissions
+    val skip_outer_acquire = 
+      (if(!isLastLevelCache) xact_old_meta.coh.outer.isHit(xact.op_code())
+       else xact.isBuiltInType(Acquire.putBlockType) || xact_old_meta.coh.outer.isValid())
+    state := Mux(!skip_outer_acquire, s_outer_acquire, s_busy)
+  }
+  when(state === s_outer_acquire && oacq_data_done) { state := s_busy }
+  when(state === s_busy && all_pending_done) { state := s_meta_write  }
+  when(state === s_meta_write && (io.meta.write.ready || !pending_meta_write)) {
+    wmask_buffer.foreach { w => w := UInt(0) }
+    state := s_idle
+  }
+
+  // These IOs are used for routing in the parent
+  val in_same_set = xact.addr_block(idxMSB,idxLSB) === io.iacq().addr_block(idxMSB,idxLSB)
+  io.has_release_match := xact.conflicts(io.irel()) && !io.irel().isVoluntary() && io.inner.release.ready
+  io.has_acquire_match := can_merge_iacq_put || can_merge_iacq_get
+  io.has_acquire_conflict := in_same_set && (state != s_idle) && !io.has_acquire_match
+  //TODO: relax from in_same_set to xact.conflicts(io.iacq())?
+
+  // Checks for illegal behavior
+  assert(!(state != s_idle && io.inner.acquire.fire() &&
+    io.inner.acquire.bits.client_id != xact.client_id),
+    "AcquireTracker accepted data beat from different network source than initial request.")
+}
+
+class L2WritebackReq extends L2Metadata with HasL2Id {
+  val idx  = Bits(width = idxBits)
+  val way_en = Bits(width = nWays)
+}
+
+class L2WritebackResp extends L2HellaCacheBundle with HasL2Id
+
+class L2WritebackIO extends L2HellaCacheBundle {
+  val req = Decoupled(new L2WritebackReq)
+  val resp = Valid(new L2WritebackResp).flip
+}
+
+class L2WritebackUnitIO extends HierarchicalXactTrackerIO {
+  val wb = new L2WritebackIO().flip
+  val data = new L2DataRWIO
+}
+
+class L2WritebackUnit(trackerId: Int) extends L2XactTracker {
+  val io = new L2WritebackUnitIO
+  pinAllReadyValidLow(io)
+
+  val s_idle :: s_inner_probe :: s_busy :: s_outer_grant :: s_wb_resp :: Nil = Enum(UInt(), 5)
+  val state = Reg(init=s_idle)
+
+  val xact = Reg(new L2WritebackReq)
+  val data_buffer = Vec.fill(innerDataBeats){ Reg(init=UInt(0, width = innerDataBits)) }
+  val xact_addr_block = Cat(xact.tag, xact.idx)
+
+  val pending_irels =
+    connectTwoWayBeatCounter(max = io.inner.tlNCachingClients, up = io.inner.probe, down = io.inner.release)._1
+  val (pending_ognt, orel_data_idx, orel_data_done, ognt_data_idx, ognt_data_done) =
+    connectTwoWayBeatCounter(max = 1, up = io.outer.release, down = io.outer.grant)
+  val pending_iprbs = Reg(init = Bits(0, width = io.inner.tlNCachingClients))
+  val pending_reads = Reg(init=Bits(0, width = io.inner.tlDataBeats))
+  val pending_resps = Reg(init=Bits(0, width = io.inner.tlDataBeats))
+  val pending_orel_data = Reg(init=Bits(0, width = io.inner.tlDataBeats))
+
+  // Start the writeback sub-transaction
+  io.wb.req.ready := state === s_idle
+
+  // Track which clients yet need to be probed and make Probe message
+  pending_iprbs := pending_iprbs & dropPendingBitAtDest(io.inner.probe)
+  val curr_probe_dst = PriorityEncoder(pending_iprbs)
+  io.inner.probe.valid := state === s_inner_probe && pending_iprbs.orR
+  io.inner.probe.bits := xact.coh.inner.makeProbeForVoluntaryWriteback(curr_probe_dst, xact_addr_block)
+
+  // Handle incoming releases from clients, which may reduce sharer counts
+  // and/or write back dirty data
+  val inner_coh_on_irel = xact.coh.inner.onRelease(io.irel())
+  val outer_coh_on_irel = xact.coh.outer.onHit(M_XWR)
+  io.inner.release.ready := state === s_inner_probe || state === s_busy
+  when(io.inner.release.fire()) {
+    xact.coh.inner := inner_coh_on_irel
+    when(io.irel().hasData()) { xact.coh.outer := outer_coh_on_irel } // WB is a write
+    data_buffer(io.inner.release.bits.addr_beat) := io.inner.release.bits.data
+  }
+
+  // If a release didn't write back data, have to read it from data array
+  pending_reads := (pending_reads &
+                     dropPendingBit(io.data.read) &
+                     dropPendingBitWhenBeatHasData(io.inner.release))
+  val curr_read_beat = PriorityEncoder(pending_reads)
+  io.data.read.valid := state === s_busy && pending_reads.orR
+  io.data.read.bits.id := UInt(trackerId)
+  io.data.read.bits.way_en := xact.way_en
+  io.data.read.bits.addr_idx := xact.idx
+  io.data.read.bits.addr_beat := curr_read_beat
+  io.data.write.valid := Bool(false)
+
+  pending_resps := (pending_resps & dropPendingBitInternal(io.data.resp)) |
+                     addPendingBitInternal(io.data.read)
+  when(io.data.resp.valid) { 
+    data_buffer(io.data.resp.bits.addr_beat) := io.data.resp.bits.data
+  }
+
+  // Once the data is buffered we can write it back to outer memory
+  pending_orel_data := pending_orel_data |
+                       addPendingBitWhenBeatHasData(io.inner.release) |
+                       addPendingBitInternal(io.data.resp)
+  io.outer.release.valid := state === s_busy &&
+                            (!io.orel().hasData() || pending_orel_data(orel_data_idx))
+  io.outer.release.bits := xact.coh.outer.makeVoluntaryWriteback(
+                             client_xact_id = UInt(trackerId),
+                             addr_block = xact_addr_block,
+                             addr_beat = orel_data_idx,
+                             data = data_buffer(orel_data_idx))
+
+  // Wait for an acknowledgement
+  io.outer.grant.ready := state === s_outer_grant
+
+  // Respond to the initiating transaction handler signalling completion of the writeback
+  io.wb.resp.valid := state === s_wb_resp
+  io.wb.resp.bits.id := xact.id
+
+  // State machine updates and transaction handler metadata intialization
+  when(state === s_idle && io.wb.req.valid) {
+    xact := io.wb.req.bits
+    val coh = io.wb.req.bits.coh
+    val needs_inner_probes = coh.inner.requiresProbesOnVoluntaryWriteback()
+    when(needs_inner_probes) { pending_iprbs := coh.inner.full() & ~io.incoherent.toBits }
+    pending_reads := SInt(-1, width = innerDataBeats)
+    pending_resps := UInt(0)
+    pending_orel_data := UInt(0)
+    state := Mux(needs_inner_probes, s_inner_probe, s_busy)
+  }
+  when(state === s_inner_probe && !(pending_iprbs.orR || pending_irels)) {
+    state := Mux(xact.coh.outer.requiresVoluntaryWriteback(), s_busy, s_wb_resp)
+  }
+  when(state === s_busy && orel_data_done) {
+    state := Mux(io.orel().requiresAck(), s_outer_grant, s_wb_resp)
+  }
+  when(state === s_outer_grant && ognt_data_done) { state := s_wb_resp }
+  when(state === s_wb_resp ) { state := s_idle }
+
+  // These IOs are used for routing in the parent
+  io.has_release_match := io.irel().conflicts(xact_addr_block) && !io.irel().isVoluntary() && io.inner.release.ready
+  io.has_acquire_match := Bool(false)
+  io.has_acquire_conflict := Bool(false)
+}
diff --git a/uncore/src/main/scala/coherence.scala b/uncore/src/main/scala/coherence.scala
new file mode 100644
index 00000000..862eb484
--- /dev/null
+++ b/uncore/src/main/scala/coherence.scala
@@ -0,0 +1,688 @@
+// See LICENSE for license details.
+
+package uncore
+import Chisel._
+
+/** The entire CoherencePolicy API consists of the following three traits:
+  * HasCustomTileLinkMessageTypes, used to define custom messages
+  * HasClientSideCoherencePolicy, for client coherence agents
+  * HasManagerSideCoherencePolicy, for manager coherence agents
+  */
+abstract class CoherencePolicy(val dir: DirectoryRepresentation) extends 
+  HasCustomTileLinkMessageTypes with
+  HasClientSideCoherencePolicy with
+  HasManagerSideCoherencePolicy
+
+/** This API defines the custom, coherence-policy-defined message types,
+  * as opposed to the built-in ones found in tilelink.scala.
+  * Policies must enumerate the custom messages to be sent over each
+  * channel, as well as which of them have associated data.
+  */
+trait HasCustomTileLinkMessageTypes {
+  val nAcquireTypes: Int
+  def acquireTypeWidth = log2Up(nAcquireTypes)
+  val nProbeTypes: Int
+  def probeTypeWidth = log2Up(nProbeTypes)
+  val nReleaseTypes: Int
+  def releaseTypeWidth = log2Up(nReleaseTypes)
+  val nGrantTypes: Int
+  def grantTypeWidth = log2Up(nGrantTypes)
+
+  val acquireTypesWithData = Nil // Only built-in Acquire types have data for now
+  val releaseTypesWithData: Vec[UInt] 
+  val grantTypesWithData: Vec[UInt]
+}
+
+/** This API contains all functions required for client coherence agents.
+  * Policies must enumerate the number of client states and define their 
+  * permissions with respect to memory operations. Policies must fill in functions
+  * to control which messages are sent and how metadata is updated in response
+  * to coherence events. These funtions are generally called from within the 
+  * ClientMetadata class in metadata.scala
+  */
+trait HasClientSideCoherencePolicy {
+  // Client coherence states and their permissions
+  val nClientStates: Int
+  def clientStateWidth = log2Ceil(nClientStates)
+  val clientStatesWithReadPermission: Vec[UInt]
+  val clientStatesWithWritePermission: Vec[UInt]
+  val clientStatesWithDirtyData: Vec[UInt]
+
+  // Transaction initiation logic
+  def isValid(meta: ClientMetadata): Bool
+  def isHit(cmd: UInt, meta: ClientMetadata): Bool = {
+    Mux(isWriteIntent(cmd), 
+      clientStatesWithWritePermission.contains(meta.state),
+      clientStatesWithReadPermission.contains(meta.state))
+  }
+  //TODO: Assumes all states with write permissions also have read permissions
+  def requiresAcquireOnSecondaryMiss(
+        first_cmd: UInt,
+        second_cmd: UInt,
+        meta: ClientMetadata): Bool = {
+    isWriteIntent(second_cmd) && !isWriteIntent(first_cmd)
+  }
+  //TODO: Assumes all cache ctrl ops writeback dirty data, and
+  //      doesn't issue transaction when e.g. downgrading Exclusive to Shared:
+  def requiresReleaseOnCacheControl(cmd: UInt, meta: ClientMetadata): Bool =
+    clientStatesWithDirtyData.contains(meta.state) 
+
+  // Determine which custom message type to use
+  def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt
+  def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt
+  def getReleaseType(p: Probe, meta: ClientMetadata): UInt
+
+  // Mutate ClientMetadata based on messages or cmds
+  def clientMetadataOnReset: ClientMetadata
+  def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata): ClientMetadata
+  def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata): ClientMetadata 
+  def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata): ClientMetadata 
+  def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata): ClientMetadata 
+}
+
+/** This API contains all functions required for manager coherence agents.
+  * Policies must enumerate the number of manager states. Policies must fill
+  * in functions to control which Probe and Grant messages are sent and how 
+  * metadata should be updated in response to coherence events. These funtions
+  * are generally called from within the ManagerMetadata class in metadata.scala
+  */
+trait HasManagerSideCoherencePolicy extends HasDirectoryRepresentation {
+  val nManagerStates: Int
+  def masterStateWidth = log2Ceil(nManagerStates)
+
+  // Transaction probing logic
+  def requiresProbes(acq: Acquire, meta: ManagerMetadata): Bool
+  def requiresProbes(cmd: UInt, meta: ManagerMetadata): Bool
+
+  // Determine which custom message type to use in response
+  def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt
+  def getProbeType(acq: Acquire, meta: ManagerMetadata): UInt
+  def getGrantType(acq: Acquire, meta: ManagerMetadata): UInt
+
+  // Mutate ManagerMetadata based on messages or cmds
+  def managerMetadataOnReset: ManagerMetadata
+  def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata): ManagerMetadata
+  def managerMetadataOnGrant(outgoing: Grant, dst: UInt, meta: ManagerMetadata) =
+    ManagerMetadata(sharers=Mux(outgoing.isBuiltInType(), // Assumes all built-ins are uncached
+                                meta.sharers,
+                                dir.push(meta.sharers, dst)))
+                    //state = meta.state)  TODO: Fix 0-width wires in Chisel
+}
+
+/** The following concrete implementations of CoherencePolicy each provide the 
+  * functionality of one particular protocol.
+  */
+
+/** A simple protocol with only two Client states.
+  * Data is always assumed to be dirty.
+  * Only a single client may ever have a copy of a block at a time.
+  */
+class MICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
+  // Message types
+  val nAcquireTypes = 1
+  val nProbeTypes = 2
+  val nReleaseTypes = 4
+  val nGrantTypes = 1
+
+  val acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
+  val probeInvalidate :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
+  val releaseInvalidateData :: releaseCopyData :: releaseInvalidateAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
+  val grantExclusive :: Nil = Enum(UInt(), nGrantTypes)
+
+  val releaseTypesWithData = Vec(releaseInvalidateData, releaseCopyData)
+  val grantTypesWithData = Vec(grantExclusive)
+
+  // Client states and functions
+  val nClientStates = 2
+  val clientInvalid :: clientValid :: Nil = Enum(UInt(), nClientStates)
+
+  val clientStatesWithReadPermission = Vec(clientValid)
+  val clientStatesWithWritePermission = Vec(clientValid)
+  val clientStatesWithDirtyData = Vec(clientValid)
+
+  def isValid (meta: ClientMetadata): Bool = meta.state != clientInvalid
+
+  def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = acquireExclusive
+
+  def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
+    val dirty = clientStatesWithDirtyData.contains(meta.state)
+    MuxLookup(cmd, releaseCopyAck, Array(
+      M_FLUSH   -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
+      M_PRODUCE -> Mux(dirty, releaseCopyData, releaseCopyAck),
+      M_CLEAN   -> Mux(dirty, releaseCopyData, releaseCopyAck)))
+  }
+
+  def getReleaseType(incoming: Probe, meta: ClientMetadata): UInt =
+    MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
+      probeInvalidate -> getReleaseType(M_FLUSH, meta),
+      probeCopy       -> getReleaseType(M_CLEAN, meta)))
+
+  def clientMetadataOnReset = ClientMetadata(clientInvalid)
+
+  def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = meta
+
+  def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(Mux(cmd === M_FLUSH, clientInvalid, meta.state))
+
+  def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(Mux(incoming.isBuiltInType(), clientInvalid, clientValid))
+
+  def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata) =
+    ClientMetadata(Mux(incoming.p_type === probeInvalidate,
+                       clientInvalid, meta.state))
+
+  // Manager states and functions:
+  val nManagerStates = 0 // We don't actually need any states for this protocol
+
+  def requiresProbes(a: Acquire, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+  def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+  def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
+    MuxLookup(cmd, probeCopy, Array(
+      M_FLUSH -> probeInvalidate))
+
+  def getProbeType(a: Acquire, meta: ManagerMetadata): UInt =
+    Mux(a.isBuiltInType(), 
+      MuxLookup(a.a_type, probeCopy, Array(
+        Acquire.getBlockType -> probeCopy, 
+        Acquire.putBlockType -> probeInvalidate,
+        Acquire.getType -> probeCopy, 
+        Acquire.putType -> probeInvalidate,
+        Acquire.putAtomicType -> probeInvalidate)), 
+      probeInvalidate)
+
+  def getGrantType(a: Acquire, meta: ManagerMetadata): UInt = grantExclusive
+
+  def managerMetadataOnReset = ManagerMetadata()
+
+  def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata) = {
+    val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))
+    MuxBundle(meta, Array(
+      incoming.is(releaseInvalidateData) -> popped,
+      incoming.is(releaseInvalidateAck)  -> popped))
+  }
+}
+
+/** A simple protocol with only three Client states.
+  * Data is marked as dirty when written.
+  * Only a single client may ever have a copy of a block at a time.
+  */
+class MEICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
+  // Message types
+  val nAcquireTypes = 2
+  val nProbeTypes = 3
+  val nReleaseTypes = 6
+  val nGrantTypes = 1
+
+  val acquireExclusiveClean :: acquireExclusiveDirty :: Nil = Enum(UInt(), nAcquireTypes)
+  val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
+  val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
+  val grantExclusive :: Nil = Enum(UInt(), nGrantTypes)
+
+  val releaseTypesWithData = Vec(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
+  val grantTypesWithData = Vec(grantExclusive)
+
+  // Client states and functions
+  val nClientStates = 3
+  val clientInvalid :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
+
+  val clientStatesWithReadPermission = Vec(clientExclusiveClean, clientExclusiveDirty)
+  val clientStatesWithWritePermission = Vec(clientExclusiveClean, clientExclusiveDirty)
+  val clientStatesWithDirtyData = Vec(clientExclusiveDirty)
+
+  def isValid (meta: ClientMetadata) = meta.state != clientInvalid
+
+  def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
+    Mux(isWriteIntent(cmd), acquireExclusiveDirty, acquireExclusiveClean)
+
+  def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
+    val dirty = clientStatesWithDirtyData.contains(meta.state)
+    MuxLookup(cmd, releaseCopyAck, Array(
+      M_FLUSH   -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
+      M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
+      M_CLEAN   -> Mux(dirty, releaseCopyData, releaseCopyAck)))
+  }
+
+  def getReleaseType(incoming: Probe, meta: ClientMetadata): UInt =
+    MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
+      probeInvalidate -> getReleaseType(M_FLUSH, meta),
+      probeDowngrade  -> getReleaseType(M_PRODUCE, meta),
+      probeCopy       -> getReleaseType(M_CLEAN, meta)))
+
+  def clientMetadataOnReset = ClientMetadata(clientInvalid)
+
+  def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = 
+    ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))
+
+  def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(
+      MuxLookup(cmd, meta.state, Array(
+        M_FLUSH -> clientInvalid,
+        M_CLEAN -> Mux(meta.state === clientExclusiveDirty, clientExclusiveClean, meta.state))))
+
+  def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(
+      Mux(incoming.isBuiltInType(), clientInvalid,
+        Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean)))
+
+  def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata) =
+    ClientMetadata(
+      MuxLookup(incoming.p_type, meta.state, Array(
+        probeInvalidate -> clientInvalid,
+        probeDowngrade  -> clientExclusiveClean,
+        probeCopy       -> meta.state)))
+
+  // Manager states and functions:
+  val nManagerStates = 0 // We don't actually need any states for this protocol
+
+  def requiresProbes(a: Acquire, meta: ManagerMetadata) = !dir.none(meta.sharers)
+  def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+  def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
+    MuxLookup(cmd, probeCopy, Array(
+      M_FLUSH -> probeInvalidate,
+      M_PRODUCE -> probeDowngrade))
+
+  def getProbeType(a: Acquire, meta: ManagerMetadata): UInt =
+    Mux(a.isBuiltInType(), 
+      MuxLookup(a.a_type, probeCopy, Array(
+        Acquire.getBlockType -> probeCopy, 
+        Acquire.putBlockType -> probeInvalidate,
+        Acquire.getType -> probeCopy, 
+        Acquire.putType -> probeInvalidate,
+        Acquire.putAtomicType -> probeInvalidate)),
+      probeInvalidate)
+
+  def getGrantType(a: Acquire, meta: ManagerMetadata): UInt = grantExclusive
+
+  def managerMetadataOnReset = ManagerMetadata()
+
+  def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata) = {
+    val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))
+    MuxBundle(meta, Array(
+      incoming.is(releaseInvalidateData) -> popped,
+      incoming.is(releaseInvalidateAck)  -> popped))
+  }
+}
+
+/** A protocol with only three Client states.
+  * Data is always assumed to be dirty.
+  * Multiple clients may share read permissions on a block at the same time.
+  */
+class MSICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
+  // Message types
+  val nAcquireTypes = 2
+  val nProbeTypes = 3
+  val nReleaseTypes = 6
+  val nGrantTypes = 3
+
+  val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
+  val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
+  val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
+  val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes)
+
+  val releaseTypesWithData = Vec(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
+  val grantTypesWithData = Vec(grantShared, grantExclusive)
+
+  // Client states and functions
+  val nClientStates = 3
+  val clientInvalid :: clientShared :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
+
+  val clientStatesWithReadPermission = Vec(clientShared, clientExclusiveDirty)
+  val clientStatesWithWritePermission = Vec(clientExclusiveDirty)
+  val clientStatesWithDirtyData = Vec(clientExclusiveDirty)
+
+  def isValid(meta: ClientMetadata): Bool = meta.state != clientInvalid
+
+  def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
+    Mux(isWriteIntent(cmd), acquireExclusive, acquireShared)
+
+  def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
+    val dirty = clientStatesWithDirtyData.contains(meta.state)
+    MuxLookup(cmd, releaseCopyAck, Array(
+      M_FLUSH   -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
+      M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
+      M_CLEAN   -> Mux(dirty, releaseCopyData, releaseCopyAck)))
+  }
+
+  def getReleaseType(incoming: Probe, meta: ClientMetadata): UInt =
+    MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
+      probeInvalidate -> getReleaseType(M_FLUSH, meta),
+      probeDowngrade  -> getReleaseType(M_PRODUCE, meta),
+      probeCopy       -> getReleaseType(M_CLEAN, meta)))
+
+  def clientMetadataOnReset = ClientMetadata(clientInvalid)
+
+  def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))
+
+  def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(
+      MuxLookup(cmd, meta.state, Array(
+        M_FLUSH   -> clientInvalid,
+        M_PRODUCE -> Mux(clientStatesWithWritePermission.contains(meta.state), 
+                      clientShared, meta.state))))
+
+  def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(
+      Mux(incoming.isBuiltInType(), clientInvalid,
+        MuxLookup(incoming.g_type, clientInvalid, Array(
+          grantShared -> clientShared,
+          grantExclusive -> clientExclusiveDirty,
+          grantExclusiveAck -> clientExclusiveDirty))))
+
+  def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata) = 
+    ClientMetadata(
+      MuxLookup(incoming.p_type, meta.state, Array(
+        probeInvalidate -> clientInvalid,
+        probeDowngrade  -> clientShared,
+        probeCopy       -> meta.state)))
+
+  // Manager states and functions:
+  val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing
+                         //        only a single sharer (also would need 
+                         //        notification msg to track clean drops)
+                         //        Also could avoid probes on outer WBs.
+
+  def requiresProbes(a: Acquire, meta: ManagerMetadata) =
+    Mux(dir.none(meta.sharers), Bool(false), 
+      Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
+        Mux(a.isBuiltInType(), a.hasData(), a.a_type != acquireShared)))
+
+  def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+  def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
+    MuxLookup(cmd, probeCopy, Array(
+      M_FLUSH -> probeInvalidate,
+      M_PRODUCE -> probeDowngrade))
+
+  def getProbeType(a: Acquire, meta: ManagerMetadata): UInt =
+    Mux(a.isBuiltInType(), 
+      MuxLookup(a.a_type, probeCopy, Array(
+        Acquire.getBlockType -> probeCopy, 
+        Acquire.putBlockType -> probeInvalidate,
+        Acquire.getType -> probeCopy, 
+        Acquire.putType -> probeInvalidate,
+        Acquire.putAtomicType -> probeInvalidate)),
+      MuxLookup(a.a_type, probeCopy, Array(
+        acquireShared -> probeDowngrade,
+        acquireExclusive -> probeInvalidate)))
+
+  def getGrantType(a: Acquire, meta: ManagerMetadata): UInt =
+    Mux(a.a_type === acquireShared,
+      Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
+      grantExclusive)
+
+  def managerMetadataOnReset = ManagerMetadata()
+
+  def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata) = {
+    val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))
+    MuxBundle(meta, Array(
+      incoming.is(releaseInvalidateData) -> popped,
+      incoming.is(releaseInvalidateAck)  -> popped))
+  }
+}
+
+/** A protocol with four Client states.
+  * Data is marked as dirty when written.
+  * Multiple clients may share read permissions on a block at the same time.
+  */
+class MESICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
+  // Message types
+  val nAcquireTypes = 2
+  val nProbeTypes = 3
+  val nReleaseTypes = 6
+  val nGrantTypes = 3
+
+  val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
+  val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
+  val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
+  val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes)
+
+  val releaseTypesWithData = Vec(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
+  val grantTypesWithData = Vec(grantShared, grantExclusive)
+
+  // Client states and functions
+  val nClientStates = 4
+  val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
+
+  val clientStatesWithReadPermission = Vec(clientShared, clientExclusiveClean, clientExclusiveDirty)
+  val clientStatesWithWritePermission = Vec(clientExclusiveClean, clientExclusiveDirty)
+  val clientStatesWithDirtyData = Vec(clientExclusiveDirty)
+
+  def isValid (meta: ClientMetadata): Bool = meta.state != clientInvalid
+
+  def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
+    Mux(isWriteIntent(cmd), acquireExclusive, acquireShared)
+
+  def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
+    val dirty = clientStatesWithDirtyData.contains(meta.state)
+    MuxLookup(cmd, releaseCopyAck, Array(
+      M_FLUSH   -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
+      M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
+      M_CLEAN   -> Mux(dirty, releaseCopyData, releaseCopyAck)))
+  }
+
+  def getReleaseType(incoming: Probe, meta: ClientMetadata): UInt =
+    MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
+      probeInvalidate -> getReleaseType(M_FLUSH, meta),
+      probeDowngrade  -> getReleaseType(M_PRODUCE, meta),
+      probeCopy       -> getReleaseType(M_CLEAN, meta)))
+
+  def clientMetadataOnReset = ClientMetadata(clientInvalid)
+
+  def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))
+
+  def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(
+      MuxLookup(cmd, meta.state, Array(
+        M_FLUSH   -> clientInvalid,
+        M_PRODUCE -> Mux(clientStatesWithWritePermission.contains(meta.state), 
+                      clientShared, meta.state),
+        M_CLEAN   -> Mux(meta.state === clientExclusiveDirty, clientExclusiveClean, meta.state))))
+
+  def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(
+      Mux(incoming.isBuiltInType(), clientInvalid,
+        MuxLookup(incoming.g_type, clientInvalid, Array(
+          grantShared -> clientShared,
+          grantExclusive -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean),
+          grantExclusiveAck -> clientExclusiveDirty))))
+
+  def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata) =
+    ClientMetadata(
+      MuxLookup(incoming.p_type, meta.state, Array(
+        probeInvalidate -> clientInvalid,
+        probeDowngrade  -> clientShared,
+        probeCopy       -> meta.state)))
+
+  // Manager states and functions:
+  val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing
+                         //        only a single sharer (also would need 
+                         //        notification msg to track clean drops)
+                         //        Also could avoid probes on outer WBs.
+
+  def requiresProbes(a: Acquire, meta: ManagerMetadata) =
+    Mux(dir.none(meta.sharers), Bool(false), 
+      Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
+        Mux(a.isBuiltInType(), a.hasData(), a.a_type != acquireShared)))
+
+  def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+  def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
+    MuxLookup(cmd, probeCopy, Array(
+      M_FLUSH -> probeInvalidate,
+      M_PRODUCE -> probeDowngrade))
+
+  def getProbeType(a: Acquire, meta: ManagerMetadata): UInt =
+    Mux(a.isBuiltInType(), 
+      MuxLookup(a.a_type, probeCopy, Array(
+        Acquire.getBlockType -> probeCopy, 
+        Acquire.putBlockType -> probeInvalidate,
+        Acquire.getType -> probeCopy, 
+        Acquire.putType -> probeInvalidate,
+        Acquire.putAtomicType -> probeInvalidate)),
+      MuxLookup(a.a_type, probeCopy, Array(
+        acquireShared -> probeDowngrade,
+        acquireExclusive -> probeInvalidate)))
+
+  def getGrantType(a: Acquire, meta: ManagerMetadata): UInt =
+    Mux(a.a_type === acquireShared,
+      Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
+      grantExclusive)
+
+  def managerMetadataOnReset = ManagerMetadata()
+
+  def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata) = {
+    val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))
+    MuxBundle(meta, Array(
+      incoming.is(releaseInvalidateData) -> popped,
+      incoming.is(releaseInvalidateAck)  -> popped))
+  }
+}
+
+class MigratoryCoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
+  // Message types
+  val nAcquireTypes = 3
+  val nProbeTypes = 4
+  val nReleaseTypes = 10
+  val nGrantTypes = 4
+
+  val acquireShared :: acquireExclusive :: acquireInvalidateOthers :: Nil = Enum(UInt(), nAcquireTypes)
+  val probeInvalidate :: probeDowngrade :: probeCopy :: probeInvalidateOthers :: Nil = Enum(UInt(), nProbeTypes)
+  val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: releaseDowngradeDataMigratory :: releaseDowngradeAckHasCopy :: releaseInvalidateDataMigratory :: releaseInvalidateAckMigratory :: Nil = Enum(UInt(), nReleaseTypes)
+  val grantShared :: grantExclusive :: grantExclusiveAck :: grantReadMigratory :: Nil = Enum(UInt(), nGrantTypes)
+
+  val releaseTypesWithData = Vec(releaseInvalidateData, releaseDowngradeData, releaseCopyData, releaseInvalidateDataMigratory, releaseDowngradeDataMigratory)
+  val grantTypesWithData = Vec(grantShared, grantExclusive, grantReadMigratory)
+
+  // Client states and functions
+  val nClientStates = 7
+  val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: clientSharedByTwo :: clientMigratoryClean :: clientMigratoryDirty :: Nil = Enum(UInt(), nClientStates)
+
+  val clientStatesWithReadPermission = Vec(clientShared, clientExclusiveClean, clientExclusiveDirty, clientSharedByTwo, clientMigratoryClean, clientMigratoryDirty)
+  val clientStatesWithWritePermission = Vec(clientExclusiveClean, clientExclusiveDirty, clientMigratoryClean, clientMigratoryDirty)
+  val clientStatesWithDirtyData = Vec(clientExclusiveDirty, clientMigratoryDirty)
+
+  def isValid (meta: ClientMetadata): Bool = meta.state != clientInvalid
+
+  def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
+    Mux(isWriteIntent(cmd), 
+      Mux(meta.state === clientInvalid, acquireExclusive, acquireInvalidateOthers), 
+      acquireShared)
+
+  def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
+    val dirty = clientStatesWithDirtyData.contains(meta.state)
+    MuxLookup(cmd, releaseCopyAck, Array(
+      M_FLUSH   -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
+      M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
+      M_CLEAN   -> Mux(dirty, releaseCopyData, releaseCopyAck)))
+  }
+
+  def getReleaseType(incoming: Probe, meta: ClientMetadata): UInt = {
+    val dirty = clientStatesWithDirtyData.contains(meta.state)
+    val with_data = MuxLookup(incoming.p_type, releaseInvalidateData, Array(
+      probeInvalidate -> Mux(Vec(clientExclusiveDirty, clientMigratoryDirty).contains(meta.state),
+                          releaseInvalidateDataMigratory, releaseInvalidateData),
+      probeDowngrade -> Mux(meta.state === clientMigratoryDirty,
+                          releaseDowngradeDataMigratory, releaseDowngradeData),
+      probeCopy -> releaseCopyData))
+    val without_data = MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
+      probeInvalidate -> Mux(clientExclusiveClean === meta.state,
+                           releaseInvalidateAckMigratory, releaseInvalidateAck),
+      probeInvalidateOthers -> Mux(clientSharedByTwo === meta.state,
+                                 releaseInvalidateAckMigratory, releaseInvalidateAck),
+      probeDowngrade  -> Mux(meta.state != clientInvalid,
+                           releaseDowngradeAckHasCopy, releaseDowngradeAck),
+      probeCopy       -> releaseCopyAck))
+    Mux(dirty, with_data, without_data)
+  }
+
+  def clientMetadataOnReset = ClientMetadata(clientInvalid)
+
+  def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(
+      Mux(isWrite(cmd), MuxLookup(meta.state, clientExclusiveDirty, Array(
+                          clientExclusiveClean -> clientExclusiveDirty,
+                          clientMigratoryClean -> clientMigratoryDirty)),
+                        meta.state))
+
+  def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(
+      MuxLookup(cmd, meta.state, Array(
+        M_FLUSH   -> clientInvalid,
+        M_PRODUCE -> Mux(clientStatesWithWritePermission.contains(meta.state), 
+                       clientShared, meta.state),
+        M_CLEAN   -> MuxLookup(meta.state, meta.state, Array(
+                       clientExclusiveDirty -> clientExclusiveClean,
+                       clientMigratoryDirty -> clientMigratoryClean)))))
+
+  def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata) =
+    ClientMetadata(
+      Mux(incoming.isBuiltInType(), clientInvalid,
+        MuxLookup(incoming.g_type, clientInvalid, Array(
+          grantShared        -> clientShared,
+          grantExclusive     -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean),
+          grantExclusiveAck  -> clientExclusiveDirty, 
+          grantReadMigratory -> Mux(isWrite(cmd), clientMigratoryDirty, clientMigratoryClean)))))
+
+  def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata) =
+    ClientMetadata(
+      MuxLookup(incoming.p_type, meta.state, Array(
+        probeInvalidate -> clientInvalid,
+        probeInvalidateOthers -> clientInvalid,
+        probeCopy -> meta.state,
+        probeDowngrade -> MuxLookup(meta.state, clientShared, Array(
+                                clientExclusiveClean -> clientSharedByTwo,
+                                clientExclusiveDirty -> clientSharedByTwo,
+                                clientSharedByTwo    -> clientShared,
+                                clientMigratoryClean -> clientSharedByTwo,
+                                clientMigratoryDirty -> clientInvalid)))))
+
+  // Manager states and functions:
+  val nManagerStates = 0 // TODO: we could add some states to reduce the number of message types
+
+  def requiresProbes(a: Acquire, meta: ManagerMetadata) =
+    Mux(dir.none(meta.sharers), Bool(false),
+      Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
+        Mux(a.isBuiltInType(), a.hasData(), a.a_type != acquireShared)))
+
+  def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+  def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
+    MuxLookup(cmd, probeCopy, Array(
+      M_FLUSH -> probeInvalidate,
+      M_PRODUCE -> probeDowngrade))
+
+  def getProbeType(a: Acquire, meta: ManagerMetadata): UInt =
+    Mux(a.isBuiltInType(), 
+      MuxLookup(a.a_type, probeCopy, Array(
+        Acquire.getBlockType -> probeCopy, 
+        Acquire.putBlockType -> probeInvalidate,
+        Acquire.getType -> probeCopy, 
+        Acquire.putType -> probeInvalidate,
+        Acquire.putAtomicType -> probeInvalidate)),
+      MuxLookup(a.a_type, probeCopy, Array(
+        acquireShared -> probeDowngrade,
+        acquireExclusive -> probeInvalidate, 
+        acquireInvalidateOthers -> probeInvalidateOthers)))
+
+  def getGrantType(a: Acquire, meta: ManagerMetadata): UInt =
+      MuxLookup(a.a_type, grantShared, Array(
+        acquireShared    -> Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
+        acquireExclusive -> grantExclusive,                                            
+        acquireInvalidateOthers -> grantExclusiveAck))  //TODO: add this to MESI for broadcast?
+
+  def managerMetadataOnReset = ManagerMetadata()
+
+  def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata) = {
+    val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))
+    MuxBundle(meta, Array(
+      incoming.is(releaseInvalidateData) -> popped,
+      incoming.is(releaseInvalidateAck)  -> popped,
+      incoming.is(releaseInvalidateDataMigratory) -> popped,
+      incoming.is(releaseInvalidateAckMigratory) -> popped))
+  }
+}
diff --git a/uncore/src/main/scala/consts.scala b/uncore/src/main/scala/consts.scala
new file mode 100644
index 00000000..69cdc41f
--- /dev/null
+++ b/uncore/src/main/scala/consts.scala
@@ -0,0 +1,48 @@
+// See LICENSE for license details.
+
+package uncore
+package constants
+
+import Chisel._
+
+object MemoryOpConstants extends MemoryOpConstants
+trait MemoryOpConstants {
+  val MT_SZ = 3
+  val MT_X  = Bits("b???")
+  val MT_B  = Bits("b000")
+  val MT_H  = Bits("b001")
+  val MT_W  = Bits("b010")
+  val MT_D  = Bits("b011")
+  val MT_BU = Bits("b100")
+  val MT_HU = Bits("b101")
+  val MT_WU = Bits("b110")
+
+  val NUM_XA_OPS = 9
+  val M_SZ      = 5
+  val M_X       = Bits("b?????");
+  val M_XRD     = Bits("b00000"); // int load
+  val M_XWR     = Bits("b00001"); // int store
+  val M_PFR     = Bits("b00010"); // prefetch with intent to read
+  val M_PFW     = Bits("b00011"); // prefetch with intent to write
+  val M_XA_SWAP = Bits("b00100");
+  val M_NOP     = Bits("b00101");
+  val M_XLR     = Bits("b00110");
+  val M_XSC     = Bits("b00111");
+  val M_XA_ADD  = Bits("b01000");
+  val M_XA_XOR  = Bits("b01001");
+  val M_XA_OR   = Bits("b01010");
+  val M_XA_AND  = Bits("b01011");
+  val M_XA_MIN  = Bits("b01100");
+  val M_XA_MAX  = Bits("b01101");
+  val M_XA_MINU = Bits("b01110");
+  val M_XA_MAXU = Bits("b01111");
+  val M_FLUSH   = Bits("b10000") // write back dirty data and cede R/W permissions
+  val M_PRODUCE = Bits("b10001") // write back dirty data and cede W permissions
+  val M_CLEAN   = Bits("b10011") // write back dirty data and retain R/W permissions
+
+  def isAMO(cmd: Bits) = cmd(3) || cmd === M_XA_SWAP
+  def isPrefetch(cmd: Bits) = cmd === M_PFR || cmd === M_PFW
+  def isRead(cmd: Bits) = cmd === M_XRD || cmd === M_XLR || isAMO(cmd)
+  def isWrite(cmd: Bits) = cmd === M_XWR || cmd === M_XSC || isAMO(cmd)
+  def isWriteIntent(cmd: Bits) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR
+}
diff --git a/uncore/src/main/scala/directory.scala b/uncore/src/main/scala/directory.scala
new file mode 100644
index 00000000..db555ad3
--- /dev/null
+++ b/uncore/src/main/scala/directory.scala
@@ -0,0 +1,43 @@
+// See LICENSE for license details.
+
+package uncore
+import Chisel._
+
+// This class encapsulates transformations on different directory information
+// storage formats
+abstract class DirectoryRepresentation(val width: Int) {
+  def pop(prev: UInt, id: UInt): UInt
+  def push(prev: UInt, id: UInt): UInt
+  def flush: UInt
+  def none(s: UInt): Bool
+  def one(s: UInt): Bool
+  def count(s: UInt): UInt
+  def next(s: UInt): UInt
+  def full(s: UInt): UInt
+}
+
+abstract trait HasDirectoryRepresentation {
+  val dir: DirectoryRepresentation
+}
+
+class NullRepresentation(nClients: Int) extends DirectoryRepresentation(1) {
+  def pop(prev: UInt, id: UInt) = UInt(0)
+  def push(prev: UInt, id: UInt) = UInt(0)
+  def flush  = UInt(0)
+  def none(s: UInt) = Bool(false)
+  def one(s: UInt) = Bool(false)
+  def count(s: UInt) = UInt(nClients)
+  def next(s: UInt) = UInt(0)
+  def full(s: UInt) = SInt(-1, width = nClients).toUInt
+}
+
+class FullRepresentation(nClients: Int) extends DirectoryRepresentation(nClients) {
+  def pop(prev: UInt, id: UInt) =  prev &  ~UIntToOH(id)
+  def push(prev: UInt, id: UInt) = prev | UIntToOH(id)
+  def flush = UInt(0, width = width)
+  def none(s: UInt) = s === UInt(0)
+  def one(s: UInt) = PopCount(s) === UInt(1)
+  def count(s: UInt) = PopCount(s)
+  def next(s: UInt) = PriorityEncoder(s)
+  def full(s: UInt) = s
+}
diff --git a/uncore/src/main/scala/ecc.scala b/uncore/src/main/scala/ecc.scala
new file mode 100644
index 00000000..b5864b2c
--- /dev/null
+++ b/uncore/src/main/scala/ecc.scala
@@ -0,0 +1,146 @@
+// See LICENSE for license details.
+
+package uncore
+
+import Chisel._
+
+abstract class Decoding
+{
+  def uncorrected: Bits
+  def corrected: Bits
+  def correctable: Bool
+  def uncorrectable: Bool
+  def error = correctable || uncorrectable
+}
+
+abstract class Code
+{
+  def width(w0: Int): Int
+  def encode(x: Bits): Bits
+  def decode(x: Bits): Decoding
+}
+
+class IdentityCode extends Code
+{
+  def width(w0: Int) = w0
+  def encode(x: Bits) = x
+  def decode(y: Bits) = new Decoding {
+    def uncorrected = y
+    def corrected = y
+    def correctable = Bool(false)
+    def uncorrectable = Bool(false)
+  }
+}
+
+class ParityCode extends Code
+{
+  def width(w0: Int) = w0+1
+  def encode(x: Bits) = Cat(x.xorR, x)
+  def decode(y: Bits) = new Decoding {
+    def uncorrected = y(y.getWidth-2,0)
+    def corrected = uncorrected
+    def correctable = Bool(false)
+    def uncorrectable = y.xorR
+  }
+}
+
+class SECCode extends Code
+{
+  def width(k: Int) = {
+    val m = new Unsigned(k).log2 + 1
+    k + m + (if((1 << m) < m+k+1) 1 else 0)
+  }
+  def encode(x: Bits) = {
+    val k = x.getWidth
+    require(k > 0)
+    val n = width(k)
+
+    val y = for (i <- 1 to n) yield {
+      if (isPow2(i)) {
+        val r = for (j <- 1 to n; if j != i && (j & i) != 0)
+          yield x(mapping(j))
+        r reduce (_^_)
+      } else
+        x(mapping(i))
+    }
+    Vec(y).toBits
+  }
+  def decode(y: Bits) = new Decoding {
+    val n = y.getWidth
+    require(n > 0 && !isPow2(n))
+
+    val p2 = for (i <- 0 until log2Up(n)) yield 1 << i
+    val syndrome = p2 map { i =>
+      val r = for (j <- 1 to n; if (j & i) != 0)
+        yield y(j-1)
+      r reduce (_^_)
+    }
+    val s = Vec(syndrome).toBits
+
+    private def swizzle(z: Bits) = Vec((1 to n).filter(i => !isPow2(i)).map(i => z(i-1))).toBits
+    def uncorrected = swizzle(y)
+    def corrected = swizzle(((y.toUInt << UInt(1)) ^ UIntToOH(s)) >> UInt(1))
+    def correctable = s.orR
+    def uncorrectable = Bool(false)
+  }
+  private def mapping(i: Int) = i-1-log2Up(i)
+}
+
+class SECDEDCode extends Code
+{
+  private val sec = new SECCode
+  private val par = new ParityCode
+
+  def width(k: Int) = sec.width(k)+1
+  def encode(x: Bits) = par.encode(sec.encode(x))
+  def decode(x: Bits) = new Decoding {
+    val secdec = sec.decode(x(x.getWidth-2,0))
+    val pardec = par.decode(x)
+
+    def uncorrected = secdec.uncorrected
+    def corrected = secdec.corrected
+    def correctable = pardec.uncorrectable
+    def uncorrectable = !pardec.uncorrectable && secdec.correctable
+  }
+}
+
+object ErrGen
+{
+  // generate a 1-bit error with approximate probability 2^-f
+  def apply(width: Int, f: Int): Bits = {
+    require(width > 0 && f >= 0 && log2Up(width) + f <= 16)
+    UIntToOH(LFSR16()(log2Up(width)+f-1,0))(width-1,0)
+  }
+  def apply(x: Bits, f: Int): Bits = x ^ apply(x.getWidth, f)
+}
+
+class SECDEDTest extends Module
+{
+  val code = new SECDEDCode
+  val k = 4
+  val n = code.width(k)
+
+  val io = new Bundle {
+    val original = Bits(OUTPUT, k)
+    val encoded = Bits(OUTPUT, n)
+    val injected = Bits(OUTPUT, n)
+    val uncorrected = Bits(OUTPUT, k)
+    val corrected = Bits(OUTPUT, k)
+    val correctable = Bool(OUTPUT)
+    val uncorrectable = Bool(OUTPUT)
+  }
+
+  val c = Counter(Bool(true), 1 << k)
+  val numErrors = Counter(c._2, 3)._1
+  val e = code.encode(c._1)
+  val i = e ^ Mux(numErrors < UInt(1), UInt(0), ErrGen(n, 1)) ^ Mux(numErrors < UInt(2), UInt(0), ErrGen(n, 1))
+  val d = code.decode(i)
+
+  io.original := c._1
+  io.encoded := e
+  io.injected := i
+  io.uncorrected := d.uncorrected
+  io.corrected := d.corrected
+  io.correctable := d.correctable
+  io.uncorrectable := d.uncorrectable
+}
diff --git a/uncore/src/main/scala/htif.scala b/uncore/src/main/scala/htif.scala
new file mode 100644
index 00000000..8a79cda1
--- /dev/null
+++ b/uncore/src/main/scala/htif.scala
@@ -0,0 +1,255 @@
+// See LICENSE for license details.
+
+package uncore
+
+import Chisel._
+import Node._
+import uncore._
+
+case object HTIFWidth extends Field[Int]
+case object HTIFNSCR extends Field[Int]
+case object HTIFOffsetBits extends Field[Int]
+case object HTIFNCores extends Field[Int]
+
+abstract trait HTIFParameters extends UsesParameters {
+  val dataBits = params(TLDataBits)
+  val dataBeats = params(TLDataBeats)
+  val w = params(HTIFWidth)
+  val nSCR = params(HTIFNSCR)
+  val offsetBits = params(HTIFOffsetBits)
+  val nCores = params(HTIFNCores)
+}
+
+abstract class HTIFBundle extends Bundle with HTIFParameters
+
+class HostIO extends HTIFBundle
+{
+  val clk = Bool(OUTPUT)
+  val clk_edge = Bool(OUTPUT)
+  val in = Decoupled(Bits(width = w)).flip
+  val out = Decoupled(Bits(width = w))
+  val debug_stats_pcr = Bool(OUTPUT)
+}
+
+class PCRReq extends Bundle
+{
+  val rw = Bool()
+  val addr = Bits(width = 12)
+  val data = Bits(width = 64)
+}
+
+class HTIFIO extends HTIFBundle {
+  val reset = Bool(INPUT)
+  val id = UInt(INPUT, log2Up(nCores))
+  val pcr_req = Decoupled(new PCRReq).flip
+  val pcr_rep = Decoupled(Bits(width = 64))
+  val ipi_req = Decoupled(Bits(width = log2Up(nCores)))
+  val ipi_rep = Decoupled(Bool()).flip
+  val debug_stats_pcr = Bool(OUTPUT)
+    // wired directly to stats register
+    // expected to be used to quickly indicate to testbench to do logging b/c in 'interesting' work
+}
+
+class SCRIO extends HTIFBundle {
+  val rdata = Vec.fill(nSCR){Bits(INPUT, 64)}
+  val wen = Bool(OUTPUT)
+  val waddr = UInt(OUTPUT, log2Up(nSCR))
+  val wdata = Bits(OUTPUT, 64)
+}
+
+class HTIFModuleIO extends HTIFBundle {
+    val host = new HostIO
+    val cpu = Vec.fill(nCores){new HTIFIO}.flip
+    val mem = new ClientUncachedTileLinkIO
+    val scr = new SCRIO
+}
+
+class HTIF(pcr_RESET: Int) extends Module with HTIFParameters {
+  val io = new HTIFModuleIO
+
+  io.host.debug_stats_pcr := io.cpu.map(_.debug_stats_pcr).reduce(_||_)
+    // system is 'interesting' if any tile is 'interesting'
+
+  val short_request_bits = 64
+  val long_request_bits = short_request_bits + dataBits*dataBeats
+  require(short_request_bits % w == 0)
+
+  val rx_count_w = 13 + log2Up(64) - log2Up(w) // data size field is 12 bits
+  val rx_count = Reg(init=UInt(0,rx_count_w))
+  val rx_shifter = Reg(Bits(width = short_request_bits))
+  val rx_shifter_in = Cat(io.host.in.bits, rx_shifter(short_request_bits-1,w))
+  val next_cmd = rx_shifter_in(3,0)
+  val cmd = Reg(Bits())
+  val size = Reg(Bits())
+  val pos = Reg(Bits())
+  val seqno = Reg(Bits())
+  val addr = Reg(Bits())
+  when (io.host.in.valid && io.host.in.ready) {
+    rx_shifter := rx_shifter_in
+    rx_count := rx_count + UInt(1)
+    when (rx_count === UInt(short_request_bits/w-1)) {
+      cmd := next_cmd
+      size := rx_shifter_in(15,4)
+      pos := rx_shifter_in(15,4+offsetBits-3)
+      seqno := rx_shifter_in(23,16)
+      addr := rx_shifter_in(63,24)
+    }
+  }
+
+  val rx_word_count = (rx_count >> UInt(log2Up(short_request_bits/w)))
+  val rx_word_done = io.host.in.valid && rx_count(log2Up(short_request_bits/w)-1,0).andR
+  val packet_ram_depth = long_request_bits/short_request_bits-1
+  val packet_ram = Mem(Bits(width = short_request_bits), packet_ram_depth)
+  when (rx_word_done && io.host.in.ready) {
+    packet_ram(rx_word_count(log2Up(packet_ram_depth)-1,0) - UInt(1)) := rx_shifter_in
+  }
+
+  val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(UInt(), 6)
+
+  val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.getWidth-1, 0)
+  val pcr_coreid = addr(log2Up(nCores)-1+20+1,20)
+  val pcr_wdata = packet_ram(0)
+
+  val bad_mem_packet = size(offsetBits-1-3,0).orR || addr(offsetBits-1-3,0).orR
+  val nack = Mux(cmd === cmd_readmem || cmd === cmd_writemem, bad_mem_packet,
+             Mux(cmd === cmd_readcr || cmd === cmd_writecr, size != UInt(1),
+             Bool(true)))
+
+  val tx_count = Reg(init=UInt(0, rx_count_w))
+  val tx_subword_count = tx_count(log2Up(short_request_bits/w)-1,0)
+  val tx_word_count = tx_count(rx_count_w-1, log2Up(short_request_bits/w))
+  val packet_ram_raddr = tx_word_count(log2Up(packet_ram_depth)-1,0) - UInt(1)
+  when (io.host.out.valid && io.host.out.ready) {
+    tx_count := tx_count + UInt(1)
+  }
+
+  val rx_done = rx_word_done && Mux(rx_word_count === UInt(0), next_cmd != cmd_writemem && next_cmd != cmd_writecr, rx_word_count === size || rx_word_count(log2Up(packet_ram_depth)-1,0) === UInt(0))
+  val tx_size = Mux(!nack && (cmd === cmd_readmem || cmd === cmd_readcr || cmd === cmd_writecr), size, UInt(0))
+  val tx_done = io.host.out.ready && tx_subword_count.andR && (tx_word_count === tx_size || tx_word_count > UInt(0) && packet_ram_raddr.andR)
+
+  val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_rreq :: state_mem_wreq :: state_mem_rresp :: state_mem_wresp :: state_tx :: Nil = Enum(UInt(), 8)
+  val state = Reg(init=state_rx)
+
+  val (cnt, cnt_done) = Counter((state === state_mem_wreq && io.mem.acquire.ready) ||
+                                 (state === state_mem_rresp && io.mem.grant.valid), dataBeats)
+  val rx_cmd = Mux(rx_word_count === UInt(0), next_cmd, cmd)
+  when (state === state_rx && rx_done) {
+    state := Mux(rx_cmd === cmd_readmem, state_mem_rreq,
+             Mux(rx_cmd === cmd_writemem, state_mem_wreq,
+             Mux(rx_cmd === cmd_readcr || rx_cmd === cmd_writecr, state_pcr_req,
+             state_tx)))
+  }
+  when (state === state_mem_wreq) {
+    when (cnt_done) { state := state_mem_wresp }
+  }
+  when (state === state_mem_rreq) {
+    when(io.mem.acquire.ready) { state := state_mem_rresp }
+  }
+  when (state === state_mem_wresp && io.mem.grant.valid) {
+    state := Mux(cmd === cmd_readmem || pos === UInt(1),  state_tx, state_rx)
+    pos := pos - UInt(1)
+    addr := addr + UInt(1 << offsetBits-3)
+  }
+  when (state === state_mem_rresp && cnt_done) {
+    state := Mux(cmd === cmd_readmem || pos === UInt(1),  state_tx, state_rx)
+    pos := pos - UInt(1)
+    addr := addr + UInt(1 << offsetBits-3)
+  }
+  when (state === state_tx && tx_done) {
+    when (tx_word_count === tx_size) {
+      rx_count := UInt(0)
+      tx_count := UInt(0)
+    }
+    state := Mux(cmd === cmd_readmem && pos != UInt(0), state_mem_rreq, state_rx)
+  }
+
+  val n = dataBits/short_request_bits
+  val mem_req_data = (0 until n).map { i =>
+    val ui = UInt(i, log2Up(n))
+    when (state === state_mem_rresp && io.mem.grant.valid) {
+      packet_ram(Cat(io.mem.grant.bits.addr_beat, ui)) := 
+        io.mem.grant.bits.data((i+1)*short_request_bits-1, i*short_request_bits)
+    }
+    packet_ram(Cat(cnt, ui))
+  }.reverse.reduce(_##_)
+
+  val init_addr = addr.toUInt >> UInt(offsetBits-3)
+  io.mem.acquire.valid := state === state_mem_rreq || state === state_mem_wreq
+  io.mem.acquire.bits := Mux(cmd === cmd_writemem, 
+    PutBlock(
+      addr_block = init_addr,
+      addr_beat = cnt,
+      client_xact_id = UInt(0),
+      data = mem_req_data), 
+    GetBlock(addr_block = init_addr))
+  io.mem.grant.ready := Bool(true)
+
+  val pcrReadData = Reg(Bits(width = io.cpu(0).pcr_rep.bits.getWidth))
+  for (i <- 0 until nCores) {
+    val my_reset = Reg(init=Bool(true))
+    val my_ipi = Reg(init=Bool(false))
+
+    val cpu = io.cpu(i)
+    val me = pcr_coreid === UInt(i)
+    cpu.pcr_req.valid := state === state_pcr_req && me && pcr_addr != UInt(pcr_RESET)
+    cpu.pcr_req.bits.rw := cmd === cmd_writecr
+    cpu.pcr_req.bits.addr := pcr_addr
+    cpu.pcr_req.bits.data := pcr_wdata
+    cpu.reset := my_reset
+
+    when (cpu.ipi_rep.ready) {
+      my_ipi := Bool(false)
+    }
+    cpu.ipi_rep.valid := my_ipi
+    cpu.ipi_req.ready := Bool(true)
+    for (j <- 0 until nCores) {
+      when (io.cpu(j).ipi_req.valid && io.cpu(j).ipi_req.bits === UInt(i)) {
+        my_ipi := Bool(true)
+      }
+    }
+
+    when (cpu.pcr_req.valid && cpu.pcr_req.ready) {
+      state := state_pcr_resp
+    }
+    when (state === state_pcr_req && me && pcr_addr === UInt(pcr_RESET)) {
+      when (cmd === cmd_writecr) {
+        my_reset := pcr_wdata(0)
+      }
+      pcrReadData := my_reset.toBits
+      state := state_tx
+    }
+
+    cpu.pcr_rep.ready := Bool(true)
+    when (cpu.pcr_rep.valid) {
+      pcrReadData := cpu.pcr_rep.bits
+      state := state_tx
+    }
+  }
+
+  val scr_addr = addr(log2Up(nSCR)-1, 0)
+  val scr_rdata = Vec.fill(io.scr.rdata.size){Bits(width = 64)}
+  for (i <- 0 until scr_rdata.size)
+    scr_rdata(i) := io.scr.rdata(i)
+  scr_rdata(0) := UInt(nCores)
+  scr_rdata(1) := UInt((BigInt(dataBits*dataBeats/8) << params(TLBlockAddrBits)) >> 20)
+
+  io.scr.wen := Bool(false)
+  io.scr.wdata := pcr_wdata
+  io.scr.waddr := scr_addr.toUInt
+  when (state === state_pcr_req && pcr_coreid === SInt(-1)) {
+    io.scr.wen := cmd === cmd_writecr
+    pcrReadData := scr_rdata(scr_addr)
+    state := state_tx
+  }
+
+  val tx_cmd = Mux(nack, cmd_nack, cmd_ack)
+  val tx_cmd_ext = Cat(Bits(0, 4-tx_cmd.getWidth), tx_cmd)
+  val tx_header = Cat(addr, seqno, tx_size, tx_cmd_ext)
+  val tx_data = Mux(tx_word_count === UInt(0), tx_header,
+                Mux(cmd === cmd_readcr || cmd === cmd_writecr, pcrReadData,
+                packet_ram(packet_ram_raddr)))
+
+  io.host.in.ready := state === state_rx
+  io.host.out.valid := state === state_tx
+  io.host.out.bits := tx_data >> Cat(tx_count(log2Up(short_request_bits/w)-1,0), Bits(0, log2Up(w)))
+}
diff --git a/uncore/src/main/scala/memserdes.scala b/uncore/src/main/scala/memserdes.scala
new file mode 100644
index 00000000..7497e2e4
--- /dev/null
+++ b/uncore/src/main/scala/memserdes.scala
@@ -0,0 +1,584 @@
+// See LICENSE for license details.
+
+package uncore
+import Chisel._
+import scala.math._
+
+case object PAddrBits extends Field[Int]
+case object VAddrBits extends Field[Int]
+case object PgIdxBits extends Field[Int]
+case object PgLevels extends Field[Int]
+case object PgLevelBits extends Field[Int]
+case object ASIdBits extends Field[Int]
+case object PPNBits extends Field[Int]
+case object VPNBits extends Field[Int]
+
+case object MIFAddrBits extends Field[Int]
+case object MIFDataBits extends Field[Int]
+case object MIFTagBits extends Field[Int]
+case object MIFDataBeats extends Field[Int]
+
+trait MIFParameters extends UsesParameters {
+  val mifTagBits = params(MIFTagBits)
+  val mifAddrBits = params(MIFAddrBits)
+  val mifDataBits = params(MIFDataBits)
+  val mifDataBeats = params(MIFDataBeats)
+}
+ 
+abstract class MIFBundle extends Bundle with MIFParameters
+abstract class MIFModule extends Module with MIFParameters
+
+trait HasMemData extends MIFBundle {
+  val data = Bits(width = mifDataBits)
+}
+
+trait HasMemAddr extends MIFBundle {
+  val addr = UInt(width = mifAddrBits)
+}
+
+trait HasMemTag extends MIFBundle {
+  val tag = UInt(width = mifTagBits)
+}
+
+class MemReqCmd extends HasMemAddr with HasMemTag {
+  val rw = Bool()
+}
+
+class MemTag extends HasMemTag
+class MemData extends HasMemData
+class MemResp extends HasMemData with HasMemTag
+
+class MemIO extends Bundle {
+  val req_cmd  = Decoupled(new MemReqCmd)
+  val req_data = Decoupled(new MemData)
+  val resp     = Decoupled(new MemResp).flip
+}
+
+class MemPipeIO extends Bundle {
+  val req_cmd  = Decoupled(new MemReqCmd)
+  val req_data = Decoupled(new MemData)
+  val resp     = Valid(new MemResp).flip
+}
+
+class MemSerializedIO(w: Int) extends Bundle
+{
+  val req = Decoupled(Bits(width = w))
+  val resp = Valid(Bits(width = w)).flip
+}
+
+class MemSerdes(w: Int) extends MIFModule
+{
+  val io = new Bundle {
+    val wide = new MemIO().flip
+    val narrow = new MemSerializedIO(w)
+  }
+  val abits = io.wide.req_cmd.bits.toBits.getWidth
+  val dbits = io.wide.req_data.bits.toBits.getWidth
+  val rbits = io.wide.resp.bits.getWidth
+
+  val out_buf = Reg(Bits())
+  val in_buf = Reg(Bits())
+
+  val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(UInt(), 5)
+  val state = Reg(init=s_idle)
+  val send_cnt = Reg(init=UInt(0, log2Up((max(abits, dbits)+w-1)/w)))
+  val data_send_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
+  val adone = io.narrow.req.ready && send_cnt === UInt((abits-1)/w)
+  val ddone = io.narrow.req.ready && send_cnt === UInt((dbits-1)/w)
+
+  when (io.narrow.req.valid && io.narrow.req.ready) {
+    send_cnt := send_cnt + UInt(1)
+    out_buf := out_buf >> UInt(w)
+  }
+  when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) {
+    out_buf := io.wide.req_cmd.bits.toBits
+  }
+  when (io.wide.req_data.valid && io.wide.req_data.ready) {
+    out_buf := io.wide.req_data.bits.toBits
+  }
+
+  io.wide.req_cmd.ready := state === s_idle
+  io.wide.req_data.ready := state === s_write_idle
+  io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data
+  io.narrow.req.bits := out_buf
+
+  when (state === s_idle && io.wide.req_cmd.valid) {
+    state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr)
+  }
+  when (state === s_read_addr && adone) {
+    state := s_idle
+    send_cnt := UInt(0)
+  }
+  when (state === s_write_addr && adone) {
+    state := s_write_idle
+    send_cnt := UInt(0)
+  }
+  when (state === s_write_idle && io.wide.req_data.valid) {
+    state := s_write_data
+  }
+  when (state === s_write_data && ddone) {
+    data_send_cnt := data_send_cnt + UInt(1)
+    state := Mux(data_send_cnt === UInt(mifDataBeats-1), s_idle, s_write_idle)
+    send_cnt := UInt(0)
+  }
+
+  val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w)))
+  val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
+  val resp_val = Reg(init=Bool(false))
+
+  resp_val := Bool(false)
+  when (io.narrow.resp.valid) {
+    recv_cnt := recv_cnt + UInt(1)
+    when (recv_cnt === UInt((rbits-1)/w)) {
+      recv_cnt := UInt(0)
+      data_recv_cnt := data_recv_cnt + UInt(1)
+      resp_val := Bool(true)
+    }
+    in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+w-1)/w*w-1,w))
+  }
+
+  io.wide.resp.valid := resp_val
+  io.wide.resp.bits := io.wide.resp.bits.fromBits(in_buf)
+}
+
+class MemDesserIO(w: Int) extends Bundle {
+  val narrow = new MemSerializedIO(w).flip
+  val wide = new MemIO
+}
+
+class MemDesser(w: Int) extends Module // test rig side
+{
+  val io = new MemDesserIO(w)
+  val abits = io.wide.req_cmd.bits.toBits.getWidth
+  val dbits = io.wide.req_data.bits.toBits.getWidth
+  val rbits = io.wide.resp.bits.getWidth
+  val mifDataBeats = params(MIFDataBeats)
+
+  require(dbits >= abits && rbits >= dbits)
+  val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w)))
+  val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
+  val adone = io.narrow.req.valid && recv_cnt === UInt((abits-1)/w)
+  val ddone = io.narrow.req.valid && recv_cnt === UInt((dbits-1)/w)
+  val rdone = io.narrow.resp.valid && recv_cnt === UInt((rbits-1)/w)
+
+  val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(UInt(), 5)
+  val state = Reg(init=s_cmd_recv)
+
+  val in_buf = Reg(Bits())
+  when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) {
+    recv_cnt := recv_cnt + UInt(1)
+    in_buf := Cat(io.narrow.req.bits, in_buf((rbits+w-1)/w*w-1,w))
+  }
+  io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv
+
+  when (state === s_cmd_recv && adone) {
+    state := s_cmd
+    recv_cnt := UInt(0)
+  }
+  when (state === s_cmd && io.wide.req_cmd.ready) {
+    state := Mux(io.wide.req_cmd.bits.rw, s_data_recv, s_reply)
+  }
+  when (state === s_data_recv && ddone) {
+    state := s_data
+    recv_cnt := UInt(0)
+  }
+  when (state === s_data && io.wide.req_data.ready) {
+    state := s_data_recv
+    when (data_recv_cnt === UInt(mifDataBeats-1)) {
+      state := s_cmd_recv
+    }
+    data_recv_cnt := data_recv_cnt + UInt(1)
+  }
+  when (rdone) { // state === s_reply
+    when (data_recv_cnt === UInt(mifDataBeats-1)) {
+      state := s_cmd_recv
+    }
+    recv_cnt := UInt(0)
+    data_recv_cnt := data_recv_cnt + UInt(1)
+  }
+
+  val req_cmd = in_buf >> UInt(((rbits+w-1)/w - (abits+w-1)/w)*w)
+  io.wide.req_cmd.valid := state === s_cmd
+  io.wide.req_cmd.bits := io.wide.req_cmd.bits.fromBits(req_cmd)
+
+  io.wide.req_data.valid := state === s_data
+  io.wide.req_data.bits.data := in_buf >> UInt(((rbits+w-1)/w - (dbits+w-1)/w)*w)
+
+  val dataq = Module(new Queue(new MemResp, mifDataBeats))
+  dataq.io.enq <> io.wide.resp
+  dataq.io.deq.ready := recv_cnt === UInt((rbits-1)/w)
+
+  io.narrow.resp.valid := dataq.io.deq.valid
+  io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UInt(w))
+}
+
+//Adapter betweewn an UncachedTileLinkIO and a mem controller MemIO
+class MemIOTileLinkIOConverter(qDepth: Int) extends TLModule with MIFParameters {
+  val io = new Bundle {
+    val tl = new ManagerTileLinkIO
+    val mem = new MemIO
+  }
+  val dataBits = tlDataBits*tlDataBeats 
+  val dstIdBits = params(LNHeaderBits)
+  require(tlDataBits*tlDataBeats == mifDataBits*mifDataBeats, "Data sizes between LLC and MC don't agree")
+  require(dstIdBits + tlClientXactIdBits < mifTagBits, "MemIO converter is going truncate tags: " + dstIdBits + " + " + tlClientXactIdBits + " >= " + mifTagBits)
+
+  io.tl.acquire.ready := Bool(false)
+  io.tl.probe.valid := Bool(false)
+  io.tl.release.ready := Bool(false)
+  io.tl.finish.ready := Bool(true)
+  io.mem.resp.ready := Bool(false)
+
+  val gnt_arb = Module(new Arbiter(new GrantToDst, 2))
+  io.tl.grant <> gnt_arb.io.out
+
+  val dst_off = dstIdBits + tlClientXactIdBits
+  val acq_has_data = io.tl.acquire.bits.hasData()
+  val rel_has_data = io.tl.release.bits.hasData()
+
+  // Decompose outgoing TL Acquires into MemIO cmd and data
+  val active_out = Reg(init=Bool(false))
+  val cmd_sent_out = Reg(init=Bool(false))
+  val tag_out = Reg(UInt(width = mifTagBits))
+  val addr_out = Reg(UInt(width = mifAddrBits))
+  val has_data = Reg(init=Bool(false))
+  val data_from_rel = Reg(init=Bool(false))
+  val (tl_cnt_out, tl_wrap_out) =
+    Counter((io.tl.acquire.fire() && acq_has_data) ||
+              (io.tl.release.fire() && rel_has_data), tlDataBeats)
+  val tl_done_out = Reg(init=Bool(false))
+  val make_grant_ack = Reg(init=Bool(false))
+
+  gnt_arb.io.in(1).valid := Bool(false)
+  gnt_arb.io.in(1).bits := Grant(
+    dst = (if(dstIdBits > 0) tag_out(dst_off, tlClientXactIdBits + 1) else UInt(0)),
+    is_builtin_type = Bool(true),
+    g_type = Mux(data_from_rel, Grant.voluntaryAckType, Grant.putAckType),
+    client_xact_id = tag_out >> UInt(1),
+    manager_xact_id = UInt(0))
+
+  if(tlDataBits != mifDataBits || tlDataBeats != mifDataBeats) {
+    val mem_cmd_q = Module(new Queue(new MemReqCmd, qDepth))
+    val mem_data_q = Module(new Queue(new MemData, qDepth))
+    mem_cmd_q.io.enq.valid := Bool(false)
+    mem_data_q.io.enq.valid := Bool(false)
+    val (mif_cnt_out, mif_wrap_out) = Counter(mem_data_q.io.enq.fire(), mifDataBeats)
+    val mif_done_out = Reg(init=Bool(false))
+    val tl_buf_out = Vec.fill(tlDataBeats){ Reg(io.tl.acquire.bits.data.clone) }
+    val mif_buf_out = Vec.fill(mifDataBeats){ new MemData }
+    mif_buf_out := mif_buf_out.fromBits(tl_buf_out.toBits)
+    val mif_prog_out = (mif_cnt_out+UInt(1, width = log2Up(mifDataBeats+1)))*UInt(mifDataBits)
+    val tl_prog_out = tl_cnt_out*UInt(tlDataBits)
+
+    when(!active_out){
+      io.tl.release.ready := Bool(true)
+      io.tl.acquire.ready := !io.tl.release.valid
+      when(io.tl.release.valid) {
+        active_out := Bool(true)
+        cmd_sent_out := Bool(false)
+        tag_out := Cat(io.tl.release.bits.client_id,
+                       io.tl.release.bits.client_xact_id,
+                       io.tl.release.bits.isVoluntary())
+        addr_out := io.tl.release.bits.addr_block
+        has_data := rel_has_data
+        data_from_rel := Bool(true)
+        make_grant_ack := io.tl.release.bits.requiresAck()
+        tl_done_out := tl_wrap_out
+        tl_buf_out(tl_cnt_out) := io.tl.release.bits.data
+      } .elsewhen(io.tl.acquire.valid) {
+        active_out := Bool(true)
+        cmd_sent_out := Bool(false)
+        tag_out := Cat(io.tl.release.bits.client_id,
+                       io.tl.acquire.bits.client_xact_id,
+                       io.tl.acquire.bits.isBuiltInType())
+        addr_out := io.tl.acquire.bits.addr_block
+        has_data := acq_has_data
+        data_from_rel := Bool(false)
+        make_grant_ack := acq_has_data
+        tl_done_out := tl_wrap_out
+        tl_buf_out(tl_cnt_out) := io.tl.acquire.bits.data
+      }
+    }
+    when(active_out) {
+      mem_cmd_q.io.enq.valid := !cmd_sent_out
+      cmd_sent_out := cmd_sent_out || mem_cmd_q.io.enq.fire()
+      when(has_data) {
+        when(!tl_done_out) {
+          io.tl.acquire.ready := Bool(true)
+          when(io.tl.acquire.valid) {
+            tl_buf_out(tl_cnt_out) := Mux(data_from_rel,
+                                        io.tl.release.bits.data,
+                                        io.tl.acquire.bits.data)
+          }
+        }
+        when(!mif_done_out) { 
+          mem_data_q.io.enq.valid := tl_done_out || mif_prog_out <= tl_prog_out
+        }
+      }
+      when(tl_wrap_out) { tl_done_out := Bool(true) }
+      when(mif_wrap_out) { mif_done_out := Bool(true) }
+      when(tl_done_out && make_grant_ack) {
+        gnt_arb.io.in(1).valid := Bool(true)
+        when(gnt_arb.io.in(1).ready) { make_grant_ack := Bool(false) }
+      }
+      when(cmd_sent_out && (!has_data || mif_done_out) && !make_grant_ack) {
+        active_out := Bool(false)
+      }
+    }
+
+    mem_cmd_q.io.enq.bits.rw := has_data
+    mem_cmd_q.io.enq.bits.tag := tag_out
+    mem_cmd_q.io.enq.bits.addr := addr_out
+    mem_data_q.io.enq.bits.data := mif_buf_out(mif_cnt_out).data
+    io.mem.req_cmd <> mem_cmd_q.io.deq
+    io.mem.req_data <> mem_data_q.io.deq
+  } else { // Don't make the data buffers and try to flow cmd and data
+    io.mem.req_cmd.valid := Bool(false)
+    io.mem.req_data.valid := Bool(false)
+    io.mem.req_cmd.bits.rw := has_data
+    io.mem.req_cmd.bits.tag := tag_out
+    io.mem.req_cmd.bits.addr := addr_out
+    io.mem.req_data.bits.data := Mux(data_from_rel,
+                                   io.tl.release.bits.data,
+                                   io.tl.acquire.bits.data)
+    when(!active_out){
+      io.tl.release.ready := io.mem.req_data.ready
+      io.tl.acquire.ready := io.mem.req_data.ready && !io.tl.release.valid
+      io.mem.req_data.valid := (io.tl.release.valid && rel_has_data) ||
+                                 (io.tl.acquire.valid && acq_has_data)
+      when(io.mem.req_data.ready && (io.tl.release.valid || io.tl.acquire.valid)) {
+        active_out := !io.mem.req_cmd.ready || io.mem.req_data.valid
+        io.mem.req_cmd.valid := Bool(true)
+        cmd_sent_out := io.mem.req_cmd.ready
+        tl_done_out := tl_wrap_out
+        when(io.tl.release.valid) {
+          data_from_rel := Bool(true)
+          make_grant_ack := io.tl.release.bits.requiresAck()
+          io.mem.req_data.bits.data := io.tl.release.bits.data
+          val tag =  Cat(io.tl.release.bits.client_id,
+                         io.tl.release.bits.client_xact_id,
+                         io.tl.release.bits.isVoluntary())
+          val addr = io.tl.release.bits.addr_block
+          io.mem.req_cmd.bits.tag := tag
+          io.mem.req_cmd.bits.addr := addr
+          io.mem.req_cmd.bits.rw := rel_has_data
+          tag_out := tag
+          addr_out := addr
+          has_data := rel_has_data
+        } .elsewhen(io.tl.acquire.valid) {
+          data_from_rel := Bool(false)
+          make_grant_ack := acq_has_data // i.e. is it a Put
+          io.mem.req_data.bits.data := io.tl.acquire.bits.data
+          io.mem.req_cmd.bits.rw := acq_has_data
+          val tag = Cat(io.tl.acquire.bits.client_id,
+                        io.tl.acquire.bits.client_xact_id,
+                        io.tl.acquire.bits.isBuiltInType())
+          val addr = io.tl.acquire.bits.addr_block
+          io.mem.req_cmd.bits.tag := tag
+          io.mem.req_cmd.bits.addr := addr
+          io.mem.req_cmd.bits.rw := acq_has_data
+          tag_out := tag
+          addr_out := addr
+          has_data := acq_has_data
+        }
+      }
+    }
+    when(active_out) {
+      io.mem.req_cmd.valid := !cmd_sent_out
+      cmd_sent_out := cmd_sent_out || io.mem.req_cmd.fire()
+      when(has_data && !tl_done_out) {
+        when(data_from_rel) {
+          io.tl.release.ready := io.mem.req_data.ready
+          io.mem.req_data.valid := io.tl.release.valid
+        } .otherwise {
+          io.tl.acquire.ready := io.mem.req_data.ready
+          io.mem.req_data.valid := io.tl.acquire.valid
+        }
+      }
+      when(tl_wrap_out) { tl_done_out := Bool(true) }
+      when(tl_done_out && make_grant_ack) {
+        gnt_arb.io.in(1).valid := Bool(true) // TODO: grants for voluntary acks?
+        when(gnt_arb.io.in(1).ready) { make_grant_ack := Bool(false) }
+      }
+      when(cmd_sent_out && (!has_data || tl_done_out) && !make_grant_ack) {
+        active_out := Bool(false)
+      }
+    }
+  }
+
+  // Aggregate incoming MemIO responses into TL Grants
+  val active_in = Reg(init=Bool(false))
+  val (tl_cnt_in, tl_wrap_in) = Counter(io.tl.grant.fire() && io.tl.grant.bits.hasMultibeatData(), tlDataBeats)
+  val tag_in = Reg(UInt(width = mifTagBits))
+
+  if(tlDataBits != mifDataBits || tlDataBeats != mifDataBeats) {
+    val (mif_cnt_in, mif_wrap_in) = Counter(io.mem.resp.fire(), mifDataBeats) // TODO: Assumes all resps have data
+    val mif_done_in = Reg(init=Bool(false))
+    val mif_buf_in = Vec.fill(mifDataBeats){ Reg(new MemData) }
+    val tl_buf_in = Vec.fill(tlDataBeats){ io.tl.acquire.bits.data.clone }
+    tl_buf_in := tl_buf_in.fromBits(mif_buf_in.toBits)
+    val tl_prog_in = (tl_cnt_in+UInt(1, width = log2Up(tlDataBeats+1)))*UInt(tlDataBits)
+    val mif_prog_in = mif_cnt_in*UInt(mifDataBits)
+    gnt_arb.io.in(0).bits := Grant(
+      dst = (if(dstIdBits > 0) tag_in(dst_off, tlClientXactIdBits + 1) else UInt(0)),
+      is_builtin_type = tag_in(0),
+      g_type = Mux(tag_in(0), Grant.getDataBlockType, UInt(0)), // TODO: Assumes MI or MEI protocol
+      client_xact_id = tag_in >> UInt(1),
+      manager_xact_id = UInt(0),
+      addr_beat = tl_cnt_in,
+      data = tl_buf_in(tl_cnt_in))
+
+    when(!active_in) {
+      io.mem.resp.ready := Bool(true)
+      when(io.mem.resp.valid) {
+        active_in := Bool(true)
+        mif_done_in := mif_wrap_in
+        tag_in := io.mem.resp.bits.tag
+        mif_buf_in(tl_cnt_in).data := io.mem.resp.bits.data
+      }
+    }
+    when(active_in) {
+      gnt_arb.io.in(0).valid := mif_done_in || tl_prog_in <= mif_prog_in
+      when(!mif_done_in) {
+        io.mem.resp.ready := Bool(true)
+        when(io.mem.resp.valid) {
+          mif_buf_in(mif_cnt_in).data := io.mem.resp.bits.data
+        }
+      }
+      when(mif_wrap_in) { mif_done_in := Bool(true) }
+      when(tl_wrap_in) { active_in := Bool(false) }
+    }
+  } else { // Don't generate all the uneeded data buffers and flow resp
+    gnt_arb.io.in(0).valid := io.mem.resp.valid
+    io.mem.resp.ready := gnt_arb.io.in(0).ready
+    gnt_arb.io.in(0).bits := Grant(
+      dst = (if(dstIdBits > 0) io.mem.resp.bits.tag(dst_off, tlClientXactIdBits + 1) else UInt(0)),
+      is_builtin_type = io.mem.resp.bits.tag(0),
+      g_type = Mux(io.mem.resp.bits.tag(0), Grant.getDataBlockType, UInt(0)), // TODO: Assumes MI or MEI protocol
+      client_xact_id = io.mem.resp.bits.tag >> UInt(1),
+      manager_xact_id = UInt(0),
+      addr_beat = tl_cnt_in,
+      data = io.mem.resp.bits.data)
+  }
+}
+
+class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Module
+{
+  val io = new QueueIO(data, entries)
+  require(entries > 1)
+
+  val do_flow = Bool()
+  val do_enq = io.enq.fire() && !do_flow
+  val do_deq = io.deq.fire() && !do_flow
+
+  val maybe_full = Reg(init=Bool(false))
+  val enq_ptr = Counter(do_enq, entries)._1
+  val (deq_ptr, deq_done) = Counter(do_deq, entries)
+  when (do_enq != do_deq) { maybe_full := do_enq }
+
+  val ptr_match = enq_ptr === deq_ptr
+  val empty = ptr_match && !maybe_full
+  val full = ptr_match && maybe_full
+  val atLeastTwo = full || enq_ptr - deq_ptr >= UInt(2)
+  do_flow := empty && io.deq.ready
+
+  val ram = Mem(data, entries, seqRead = true)
+  val ram_addr = Reg(Bits())
+  val ram_out_valid = Reg(Bool())
+  ram_out_valid := Bool(false)
+  when (do_enq) { ram(enq_ptr) := io.enq.bits }
+  when (io.deq.ready && (atLeastTwo || !io.deq.valid && !empty)) {
+    ram_out_valid := Bool(true)
+    ram_addr := Mux(io.deq.valid, Mux(deq_done, UInt(0), deq_ptr + UInt(1)), deq_ptr)
+  }
+
+  io.deq.valid := Mux(empty, io.enq.valid, ram_out_valid)
+  io.enq.ready := !full
+  io.deq.bits := Mux(empty, io.enq.bits, ram(ram_addr))
+}
+
+class HellaQueue[T <: Data](val entries: Int)(data: => T) extends Module
+{
+  val io = new QueueIO(data, entries)
+
+  val fq = Module(new HellaFlowQueue(entries)(data))
+  io.enq <> fq.io.enq
+  io.deq <> Queue(fq.io.deq, 1, pipe = true)
+}
+
+object HellaQueue
+{
+  def apply[T <: Data](enq: DecoupledIO[T], entries: Int) = {
+    val q = Module((new HellaQueue(entries)) { enq.bits.clone })
+    q.io.enq.valid := enq.valid // not using <> so that override is allowed
+    q.io.enq.bits := enq.bits
+    enq.ready := q.io.enq.ready
+    q.io.deq
+  }
+}
+
+class MemPipeIOMemIOConverter(numRequests: Int) extends MIFModule {
+  val io = new Bundle {
+    val cpu = new MemIO().flip
+    val mem = new MemPipeIO
+  }
+
+  val numEntries = numRequests * mifDataBeats
+  val size = log2Down(numEntries) + 1
+
+  val inc = Bool()
+  val dec = Bool()
+  val count = Reg(init=UInt(numEntries, size))
+  val watermark = count >= UInt(mifDataBeats)
+
+  when (inc && !dec) {
+    count := count + UInt(1)
+  }
+  when (!inc && dec) {
+    count := count - UInt(mifDataBeats)
+  }
+  when (inc && dec) {
+    count := count - UInt(mifDataBeats-1)
+  }
+
+  val cmdq_mask = io.cpu.req_cmd.bits.rw || watermark
+
+  io.mem.req_cmd.valid := io.cpu.req_cmd.valid && cmdq_mask
+  io.cpu.req_cmd.ready := io.mem.req_cmd.ready && cmdq_mask
+  io.mem.req_cmd.bits := io.cpu.req_cmd.bits
+
+  io.mem.req_data <> io.cpu.req_data
+
+  // Have separate queues to allow for different mem implementations
+  val resp_data_q = Module((new HellaQueue(numEntries)) { new MemData })
+  resp_data_q.io.enq.valid := io.mem.resp.valid
+  resp_data_q.io.enq.bits.data := io.mem.resp.bits.data
+
+  val resp_tag_q = Module((new HellaQueue(numEntries)) { new MemTag })
+  resp_tag_q.io.enq.valid := io.mem.resp.valid
+  resp_tag_q.io.enq.bits.tag := io.mem.resp.bits.tag
+
+  io.cpu.resp.valid := resp_data_q.io.deq.valid && resp_tag_q.io.deq.valid
+  io.cpu.resp.bits.data := resp_data_q.io.deq.bits.data
+  io.cpu.resp.bits.tag := resp_tag_q.io.deq.bits.tag
+  resp_data_q.io.deq.ready := io.cpu.resp.ready
+  resp_tag_q.io.deq.ready := io.cpu.resp.ready
+
+  inc := resp_data_q.io.deq.fire() && resp_tag_q.io.deq.fire()
+  dec := io.mem.req_cmd.fire() && !io.mem.req_cmd.bits.rw
+}
+
+class MemPipeIOTileLinkIOConverter(outstanding: Int) extends MIFModule {
+  val io = new Bundle {
+    val tl = new ManagerTileLinkIO
+    val mem = new MemPipeIO
+  }
+  
+  val a = Module(new MemIOTileLinkIOConverter(1))
+  val b = Module(new MemPipeIOMemIOConverter(outstanding))
+  a.io.tl <> io.tl
+  b.io.cpu.req_cmd <> Queue(a.io.mem.req_cmd, 2, pipe=true)
+  b.io.cpu.req_data <> Queue(a.io.mem.req_data, mifDataBeats, pipe=true)
+  a.io.mem.resp <> b.io.cpu.resp
+  b.io.mem <> io.mem
+}
diff --git a/uncore/src/main/scala/metadata.scala b/uncore/src/main/scala/metadata.scala
new file mode 100644
index 00000000..b8d4446c
--- /dev/null
+++ b/uncore/src/main/scala/metadata.scala
@@ -0,0 +1,315 @@
+// See LICENSE for license details.
+
+package uncore
+import Chisel._
+
+/** Base class to represent coherence information in clients and managers */
+abstract class CoherenceMetadata extends Bundle {
+  val co = params(TLCoherencePolicy)
+  val id = params(TLId)
+}
+
+/** Stores the client-side coherence information,
+  * such as permissions on the data and whether the data is dirty.
+  * Its API can be used to make TileLink messages in response to
+  * memory operations or [[uncore.Probe]] messages.
+  */
+class ClientMetadata extends CoherenceMetadata {
+  /** Actual state information stored in this bundle */
+  val state = UInt(width = co.clientStateWidth)
+
+  /** Metadata equality */
+  def ===(rhs: ClientMetadata): Bool = this.state === rhs.state
+  def !=(rhs: ClientMetadata): Bool = !this.===(rhs)
+
+  /** Is the block's data present in this cache */
+  def isValid(dummy: Int = 0): Bool = co.isValid(this)
+  /** Does this cache have permissions on this block sufficient to perform op */
+  def isHit(op_code: UInt): Bool = co.isHit(op_code, this)
+  /** Does this cache lack permissions on this block sufficient to perform op */ 
+  def isMiss(op_code: UInt): Bool = !co.isHit(op_code, this)
+  /** Does a secondary miss on the block require another Acquire message */
+  def requiresAcquireOnSecondaryMiss(first_op: UInt, second_op: UInt): Bool =
+    co.requiresAcquireOnSecondaryMiss(first_op, second_op, this)
+  /** Does op require a Release to be made to outer memory */
+  def requiresReleaseOnCacheControl(op_code: UInt): Bool =
+    co.requiresReleaseOnCacheControl(op_code: UInt, this)
+  /** Does an eviction require a Release to be made to outer memory */
+  def requiresVoluntaryWriteback(dummy: Int = 0): Bool =
+    co.requiresReleaseOnCacheControl(M_FLUSH, this)
+
+  /** Constructs an Acquire message based on this metdata and a memory operation
+    *
+    * @param client_xact_id client's transaction id
+    * @param addr_block address of the cache block
+    * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
+    */
+  def makeAcquire(
+      client_xact_id: UInt,
+      addr_block: UInt,
+      op_code: UInt): Acquire = {
+    Bundle(Acquire(
+        is_builtin_type = Bool(false),
+        a_type = co.getAcquireType(op_code, this),
+        client_xact_id = client_xact_id,
+        addr_block = addr_block,
+        union = Cat(op_code, Bool(true))),
+      { case TLId => id })
+  }
+
+  /** Constructs a Release message based on this metadata on an eviction
+    *
+    * @param client_xact_id client's transaction id
+    * @param addr_block address of the cache block
+    * @param addr_beat sub-block address (which beat)
+    * @param data data being written back
+    */
+  def makeVoluntaryWriteback(
+      client_xact_id: UInt,
+      addr_block: UInt,
+      addr_beat: UInt = UInt(0),
+      data: UInt = UInt(0)): Release = {
+    Bundle(Release(
+      voluntary = Bool(true),
+      r_type = co.getReleaseType(M_FLUSH, this),
+      client_xact_id = client_xact_id,
+      addr_block = addr_block,
+      addr_beat = addr_beat,
+      data = data), { case TLId => id })
+  }
+
+  /** Constructs a Release message based on this metadata and a [[uncore.Probe]]
+    *
+    * @param the incoming [[uncore.Probe]]
+    * @param addr_beat sub-block address (which beat)
+    * @param data data being released
+    */
+  def makeRelease(
+      prb: Probe,
+      addr_beat: UInt = UInt(0),
+      data: UInt = UInt(0)): Release = {
+    Bundle(Release(
+      voluntary = Bool(false),
+      r_type = co.getReleaseType(prb, this),
+      client_xact_id = UInt(0),
+      addr_block = prb.addr_block,
+      addr_beat = addr_beat,
+      data = data), { case TLId => id })
+  }
+
+  /** New metadata after receiving a [[uncore.Grant]]
+    *
+    * @param incoming the incoming [[uncore.Grant]]
+    * @param pending the mem op that triggered this transaction
+    */
+  def onGrant(incoming: Grant, pending: UInt): ClientMetadata =
+    Bundle(co.clientMetadataOnGrant(incoming, pending, this), { case TLId => id })
+
+  /** New metadata after receiving a [[uncore.Probe]]
+    *
+    * @param incoming the incoming [[uncore.Probe]]
+    */
+  def onProbe(incoming: Probe): ClientMetadata =
+    Bundle(co.clientMetadataOnProbe(incoming, this), { case TLId => id })
+
+  /** New metadata after a op_code hits this block
+    *
+    * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
+    */
+  def onHit(op_code: UInt): ClientMetadata =
+    Bundle(co.clientMetadataOnHit(op_code, this), { case TLId => id })
+
+  /** New metadata after receiving a [[uncore.Probe]]
+    *
+    * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
+    */
+  def onCacheControl(op_code: UInt): ClientMetadata =
+    Bundle(co.clientMetadataOnCacheControl(op_code, this), { case TLId => id })
+}
+
+/** Factories for ClientMetadata, including on reset */
+object ClientMetadata {
+  def apply(state: UInt) = {
+    val meta = new ClientMetadata
+    meta.state := state
+    meta
+  }
+  def onReset = new ClientMetadata().co.clientMetadataOnReset
+}
+
+/** Stores manager-side information about the status 
+  * of a cache block, including whether it has any known sharers.
+  *
+  * Its API can be used to create [[uncore.Probe]] and [[uncore.Grant]] messages.
+  */
+class ManagerMetadata extends CoherenceMetadata {
+  // Currently no coherence policies assume manager-side state information
+  // val state = UInt(width = co.masterStateWidth) TODO: Fix 0-width wires in Chisel
+
+  /** The directory information for this block */
+  val sharers = UInt(width = co.dir.width)
+
+  /** Metadata equality */
+  def ===(rhs: ManagerMetadata): Bool = //this.state === rhs.state && TODO: Fix 0-width wires in Chisel
+                                         this.sharers === rhs.sharers
+  def !=(rhs: ManagerMetadata): Bool = !this.===(rhs)
+
+  /** Converts the directory info into an N-hot sharer bitvector (i.e. full representation) */
+  def full(dummy: Int = 0): UInt = co.dir.full(this.sharers)
+
+  /** Does this [[uncore.Acquire]] require [[uncore.Probe Probes]] to be sent */
+  def requiresProbes(acq: Acquire): Bool = co.requiresProbes(acq, this)
+  /** Does this memory op require [[uncore.Probe Probes]] to be sent */
+  def requiresProbes(op_code: UInt): Bool = co.requiresProbes(op_code, this)
+  /** Does an eviction require [[uncore.Probe Probes]] to be sent */
+  def requiresProbesOnVoluntaryWriteback(dummy: Int = 0): Bool =
+    co.requiresProbes(M_FLUSH, this)
+
+  /** Construct an appropriate [[uncore.ProbeToDst]] for a given [[uncore.Acquire]]
+    *
+    * @param dst Destination client id for this Probe
+    * @param acq Acquire message triggering this Probe
+    */
+  def makeProbe(dst: UInt, acq: Acquire): ProbeToDst = 
+    Bundle(Probe(dst, co.getProbeType(acq, this), acq.addr_block), { case TLId => id })
+
+  /** Construct an appropriate [[uncore.ProbeToDst]] for a given mem op
+    *
+    * @param dst Destination client id for this Probe
+    * @param op_code memory operation triggering this Probe
+    * @param addr_block address of the cache block being probed
+    */
+  def makeProbe(dst: UInt, op_code: UInt, addr_block: UInt): ProbeToDst =
+    Bundle(Probe(dst, co.getProbeType(op_code, this), addr_block), { case TLId => id })
+
+  /** Construct an appropriate [[uncore.ProbeToDst]] for an eviction
+    *
+    * @param dst Destination client id for this Probe
+    * @param addr_block address of the cache block being probed prior to eviction
+    */
+  def makeProbeForVoluntaryWriteback(dst: UInt, addr_block: UInt): ProbeToDst =
+    makeProbe(dst, M_FLUSH, addr_block)
+
+  /** Construct an appropriate [[uncore.GrantToDst]] to acknowledge an [[uncore.Release]]
+    *
+    * @param rel Release message being acknowledged by this Grant
+    * @param manager_xact_id manager's transaction id
+    */
+  def makeGrant(rel: ReleaseFromSrc, manager_xact_id: UInt): GrantToDst = {
+    Bundle(Grant(
+      dst = rel.client_id,
+      is_builtin_type = Bool(true),
+      g_type = Grant.voluntaryAckType,
+      client_xact_id = rel.client_xact_id,
+      manager_xact_id = manager_xact_id), { case TLId => id })
+  }
+
+  /** Construct an appropriate [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]]
+    *
+    * May contain single or multiple beats of data, or just be a permissions upgrade.
+    *
+    * @param acq Acquire message being responded to by this Grant
+    * @param manager_xact_id manager's transaction id
+    * @param addr_beat beat id of the data
+    * @param data data being refilled to the original requestor
+    */
+  def makeGrant(
+      acq: AcquireFromSrc,
+      manager_xact_id: UInt, 
+      addr_beat: UInt = UInt(0),
+      data: UInt = UInt(0)): GrantToDst = {
+    Bundle(Grant(
+      dst = acq.client_id,
+      is_builtin_type = acq.isBuiltInType(),
+      g_type = Mux(acq.isBuiltInType(), 
+                     acq.getBuiltInGrantType(),
+                     co.getGrantType(acq, this)),
+      client_xact_id = acq.client_xact_id,
+      manager_xact_id = manager_xact_id,
+      addr_beat = addr_beat,
+      data = data), { case TLId => id })
+  }
+
+  /** Construct an [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]] with some overrides
+    *
+    * Used to respond to secondary misses merged into this transaction.
+    * May contain single or multiple beats of data.
+    *
+    * @param pri Primary miss's Acquire message, used to get g_type and dst
+    * @param sec Secondary miss info, used to get beat and client_xact_id
+    * @param manager_xact_id manager's transaction id
+    * @param data data being refilled to the original requestor
+    */
+  def makeGrant(
+      pri: AcquireFromSrc,
+      sec: SecondaryMissInfo,
+      manager_xact_id: UInt, 
+      data: UInt): GrantToDst = {
+    val g = makeGrant(pri, manager_xact_id, sec.addr_beat, data)
+    g.client_xact_id := sec.client_xact_id
+    g
+  }
+    
+  /** New metadata after receiving a [[uncore.ReleaseFromSrc]]
+    *
+    * @param incoming the incoming [[uncore.ReleaseFromSrc]]
+    */
+  def onRelease(incoming: ReleaseFromSrc): ManagerMetadata =
+    Bundle(co.managerMetadataOnRelease(incoming, incoming.client_id, this), { case TLId => id })
+
+  /** New metadata after sending a [[uncore.GrantToDst]]
+    *
+    * @param outgoing the outgoing [[uncore.GrantToDst]]
+    */
+  def onGrant(outgoing: GrantToDst): ManagerMetadata =
+    Bundle(co.managerMetadataOnGrant(outgoing, outgoing.client_id, this), { case TLId => id })
+}
+
+/** Factories for ManagerMetadata, including on reset */
+object ManagerMetadata {
+  def apply(sharers: UInt, state: UInt = UInt(width = 0)) = {
+    val meta = new ManagerMetadata
+    //meta.state := state TODO: Fix 0-width wires in Chisel 
+    meta.sharers := sharers
+    meta
+  }
+  def apply() = {
+    val meta = new ManagerMetadata
+    //meta.state := UInt(width = 0) TODO: Fix 0-width wires in Chisel 
+    meta.sharers := meta.co.dir.flush
+    meta
+  }
+  def onReset = new ManagerMetadata().co.managerMetadataOnReset
+}
+
+/** HierarchicalMetadata is used in a cache in a multi-level memory hierarchy
+  * that is a manager with respect to some inner caches and a client with
+  * respect to some outer cache.
+  *
+  * This class makes use of two different sets of TileLink parameters, which are
+  * applied by contextually mapping [[uncore.TLId]] to one of 
+  * [[uncore.InnerTLId]] or [[uncore.OuterTLId]].
+  */ 
+class HierarchicalMetadata extends CoherenceMetadata {
+  val inner: ManagerMetadata = Bundle(new ManagerMetadata, {case TLId => params(InnerTLId)})
+  val outer: ClientMetadata = Bundle(new ClientMetadata, {case TLId => params(OuterTLId)})
+  def ===(rhs: HierarchicalMetadata): Bool = 
+    this.inner === rhs.inner && this.outer === rhs.outer
+  def !=(rhs: HierarchicalMetadata): Bool = !this.===(rhs)
+}
+
+/** Factories for HierarchicalMetadata, including on reset */
+object HierarchicalMetadata {
+  def apply(inner: ManagerMetadata, outer: ClientMetadata): HierarchicalMetadata = {
+    val m = new HierarchicalMetadata
+    m.inner := inner
+    m.outer := outer
+    m
+  }
+  def onReset: HierarchicalMetadata = apply(ManagerMetadata.onReset, ClientMetadata.onReset)
+}
+
+/** Identifies the TLId of the inner network in a hierarchical cache controller */ 
+case object InnerTLId extends Field[String]
+/** Identifies the TLId of the outer network in a hierarchical cache controller */ 
+case object OuterTLId extends Field[String]
diff --git a/uncore/src/main/scala/network.scala b/uncore/src/main/scala/network.scala
new file mode 100644
index 00000000..4b00091d
--- /dev/null
+++ b/uncore/src/main/scala/network.scala
@@ -0,0 +1,104 @@
+// See LICENSE for license details.
+
+package uncore
+import Chisel._
+
+case object LNEndpoints extends Field[Int]
+case object LNHeaderBits extends Field[Int]
+
+class PhysicalHeader(n: Int) extends Bundle {
+  val src = UInt(width = log2Up(n))
+  val dst = UInt(width = log2Up(n))
+}
+
+class PhysicalNetworkIO[T <: Data](n: Int, dType: T) extends Bundle {
+  val header = new PhysicalHeader(n)
+  val payload = dType.clone
+  override def clone = new PhysicalNetworkIO(n,dType).asInstanceOf[this.type]
+}
+
+class BasicCrossbarIO[T <: Data](n: Int, dType: T) extends Bundle {
+    val in  = Vec.fill(n){Decoupled(new PhysicalNetworkIO(n,dType))}.flip 
+    val out = Vec.fill(n){Decoupled(new PhysicalNetworkIO(n,dType))}
+}
+
+abstract class PhysicalNetwork extends Module
+
+class BasicCrossbar[T <: Data](n: Int, dType: T, count: Int = 1, needsLock: Option[PhysicalNetworkIO[T] => Bool] = None) extends PhysicalNetwork {
+  val io = new BasicCrossbarIO(n, dType)
+
+  val rdyVecs = List.fill(n){Vec.fill(n)(Bool())}
+
+  io.out.zip(rdyVecs).zipWithIndex.map{ case ((out, rdys), i) => {
+    val rrarb = Module(new LockingRRArbiter(io.in(0).bits, n, count, needsLock))
+    (rrarb.io.in, io.in, rdys).zipped.map{ case (arb, in, rdy) => {
+      arb.valid := in.valid && (in.bits.header.dst === UInt(i)) 
+      arb.bits := in.bits
+      rdy := arb.ready && (in.bits.header.dst === UInt(i))
+    }}
+    out <> rrarb.io.out
+  }}
+  for(i <- 0 until n) {
+    io.in(i).ready := rdyVecs.map(r => r(i)).reduceLeft(_||_)
+  }
+}
+
+abstract class LogicalNetwork extends Module
+
+class LogicalHeader extends Bundle {
+  val src = UInt(width = params(LNHeaderBits))
+  val dst = UInt(width = params(LNHeaderBits))
+}
+
+class LogicalNetworkIO[T <: Data](dType: T) extends Bundle {
+  val header = new LogicalHeader
+  val payload = dType.clone
+  override def clone = { new LogicalNetworkIO(dType).asInstanceOf[this.type] }
+}
+
+object DecoupledLogicalNetworkIOWrapper {
+  def apply[T <: Data](
+      in: DecoupledIO[T],
+      src: UInt = UInt(0),
+      dst: UInt = UInt(0)): DecoupledIO[LogicalNetworkIO[T]] = {
+    val out = Decoupled(new LogicalNetworkIO(in.bits.clone)).asDirectionless
+    out.valid := in.valid
+    out.bits.payload := in.bits
+    out.bits.header.dst := dst
+    out.bits.header.src := src
+    in.ready := out.ready
+    out
+  }
+}
+
+object DecoupledLogicalNetworkIOUnwrapper {
+  def apply[T <: Data](in: DecoupledIO[LogicalNetworkIO[T]]): DecoupledIO[T] = {
+    val out = Decoupled(in.bits.payload.clone).asDirectionless
+    out.valid := in.valid
+    out.bits := in.bits.payload
+    in.ready := out.ready
+    out
+  }
+}
+
+object DefaultFromPhysicalShim {
+  def apply[T <: Data](in: DecoupledIO[PhysicalNetworkIO[T]]): DecoupledIO[LogicalNetworkIO[T]] = {
+    val out = Decoupled(new LogicalNetworkIO(in.bits.payload)).asDirectionless
+    out.bits.header := in.bits.header
+    out.bits.payload := in.bits.payload
+    out.valid := in.valid
+    in.ready := out.ready
+    out
+  }
+}
+
+object DefaultToPhysicalShim {
+  def apply[T <: Data](n: Int, in: DecoupledIO[LogicalNetworkIO[T]]): DecoupledIO[PhysicalNetworkIO[T]] = {
+    val out = Decoupled(new PhysicalNetworkIO(n, in.bits.payload)).asDirectionless
+    out.bits.header := in.bits.header
+    out.bits.payload := in.bits.payload
+    out.valid := in.valid
+    in.ready := out.ready
+    out
+  }
+}
diff --git a/uncore/src/main/scala/package.scala b/uncore/src/main/scala/package.scala
new file mode 100644
index 00000000..2c6c4a5f
--- /dev/null
+++ b/uncore/src/main/scala/package.scala
@@ -0,0 +1,6 @@
+// See LICENSE for license details.
+
+package object uncore extends uncore.constants.MemoryOpConstants
+{
+  implicit def toOption[A](a: A) = Option(a)
+}
diff --git a/uncore/src/main/scala/slowio.scala b/uncore/src/main/scala/slowio.scala
new file mode 100644
index 00000000..95ca34e6
--- /dev/null
+++ b/uncore/src/main/scala/slowio.scala
@@ -0,0 +1,70 @@
+// See LICENSE for license details.
+
+package uncore
+import Chisel._
+
+class SlowIO[T <: Data](val divisor_max: Int)(data: => T) extends Module
+{
+  val io = new Bundle {
+    val out_fast = Decoupled(data).flip
+    val out_slow = Decoupled(data)
+    val in_fast = Decoupled(data)
+    val in_slow = Decoupled(data).flip
+    val clk_slow = Bool(OUTPUT)
+    val set_divisor = Valid(Bits(width = 32)).flip
+    val divisor = Bits(OUTPUT, 32)
+  }
+
+  require(divisor_max >= 8 && divisor_max <= 65536 && isPow2(divisor_max))
+  val divisor = Reg(init=UInt(divisor_max-1))
+  val d_shadow = Reg(init=UInt(divisor_max-1))
+  val hold = Reg(init=UInt(divisor_max/4-1))
+  val h_shadow = Reg(init=UInt(divisor_max/4-1))
+  when (io.set_divisor.valid) {
+    d_shadow := io.set_divisor.bits(log2Up(divisor_max)-1, 0).toUInt
+    h_shadow := io.set_divisor.bits(log2Up(divisor_max)-1+16, 16).toUInt
+  }
+  io.divisor := hold << UInt(16) | divisor
+
+  val count = Reg{UInt(width = log2Up(divisor_max))}
+  val myclock = Reg{Bool()}
+  count := count + UInt(1)
+
+  val rising = count === (divisor >> UInt(1))
+  val falling = count === divisor
+  val held = count === (divisor >> UInt(1)) + hold
+
+  when (falling) {
+    divisor := d_shadow
+    hold := h_shadow
+    count := UInt(0)
+    myclock := Bool(false)
+  }
+  when (rising) {
+    myclock := Bool(true)
+  }
+
+  val in_slow_rdy = Reg(init=Bool(false))
+  val out_slow_val = Reg(init=Bool(false))
+  val out_slow_bits = Reg(data)
+
+  val fromhost_q = Module(new Queue(data,1))
+  fromhost_q.io.enq.valid := rising && (io.in_slow.valid && in_slow_rdy || this.reset)
+  fromhost_q.io.enq.bits := io.in_slow.bits
+  fromhost_q.io.deq <> io.in_fast
+
+  val tohost_q = Module(new Queue(data,1))
+  tohost_q.io.enq <> io.out_fast
+  tohost_q.io.deq.ready := rising && io.out_slow.ready && out_slow_val
+
+  when (held) {
+    in_slow_rdy := fromhost_q.io.enq.ready
+    out_slow_val := tohost_q.io.deq.valid
+    out_slow_bits := Mux(this.reset, fromhost_q.io.deq.bits, tohost_q.io.deq.bits)
+  }
+
+  io.in_slow.ready := in_slow_rdy
+  io.out_slow.valid := out_slow_val
+  io.out_slow.bits := out_slow_bits
+  io.clk_slow := myclock
+}
diff --git a/uncore/src/main/scala/tilelink.scala b/uncore/src/main/scala/tilelink.scala
new file mode 100644
index 00000000..306c8731
--- /dev/null
+++ b/uncore/src/main/scala/tilelink.scala
@@ -0,0 +1,1221 @@
+// See LICENSE for license details.
+
+package uncore
+import Chisel._
+import scala.math.max
+
+/** Parameters exposed to the top-level design, set based on 
+  * external requirements or design space exploration
+  */
+/** Unique name per TileLink network*/
+case object TLId extends Field[String]
+/** Coherency policy used to define custom mesage types */
+case object TLCoherencePolicy extends Field[CoherencePolicy]
+/** Number of manager agents */
+case object TLNManagers extends Field[Int] 
+/** Number of client agents */
+case object TLNClients extends Field[Int]
+/** Number of client agents that cache data and use custom [[uncore.Acquire]] types */
+case object TLNCachingClients extends Field[Int]
+/** Number of client agents that do not cache data and use built-in [[uncore.Acquire]] types */
+case object TLNCachelessClients extends Field[Int]
+/** Maximum number of unique outstanding transactions per client */
+case object TLMaxClientXacts extends Field[Int]
+/** Maximum number of clients multiplexed onto a single port */
+case object TLMaxClientsPerPort extends Field[Int]
+/** Maximum number of unique outstanding transactions per manager */
+case object TLMaxManagerXacts extends Field[Int]
+/** Width of cache block addresses */
+case object TLBlockAddrBits extends Field[Int]
+/** Width of data beats */
+case object TLDataBits extends Field[Int]
+/** Number of data beats per cache block */
+case object TLDataBeats extends Field[Int]
+/** Whether the underlying physical network preserved point-to-point ordering of messages */
+case object TLNetworkIsOrderedP2P extends Field[Boolean]
+
+/** Utility trait for building Modules and Bundles that use TileLink parameters */
+trait TileLinkParameters extends UsesParameters {
+  val tlCoh = params(TLCoherencePolicy)
+  val tlNManagers = params(TLNManagers)
+  val tlNClients = params(TLNClients)
+  val tlNCachingClients = params(TLNCachingClients)
+  val tlNCachelessClients = params(TLNCachelessClients)
+  val tlClientIdBits =  log2Up(tlNClients)
+  val tlManagerIdBits =  log2Up(tlNManagers)
+  val tlMaxClientXacts = params(TLMaxClientXacts)
+  val tlMaxClientsPerPort = params(TLMaxClientsPerPort)
+  val tlMaxManagerXacts = params(TLMaxManagerXacts)
+  val tlClientXactIdBits = log2Up(tlMaxClientXacts*tlMaxClientsPerPort)
+  val tlManagerXactIdBits = log2Up(tlMaxManagerXacts)
+  val tlBlockAddrBits = params(TLBlockAddrBits)
+  val tlDataBits = params(TLDataBits)
+  val tlDataBytes = tlDataBits/8
+  val tlDataBeats = params(TLDataBeats)
+  val tlWriteMaskBits = if(tlDataBits/8 < 1) 1 else tlDataBits/8
+  val tlBeatAddrBits = log2Up(tlDataBeats)
+  val tlByteAddrBits = log2Up(tlWriteMaskBits)
+  val tlMemoryOpcodeBits = M_SZ
+  val tlMemoryOperandSizeBits = MT_SZ
+  val tlAcquireTypeBits = max(log2Up(Acquire.nBuiltInTypes), 
+                              tlCoh.acquireTypeWidth)
+  val tlAcquireUnionBits = max(tlWriteMaskBits,
+                                 (tlByteAddrBits +
+                                   tlMemoryOperandSizeBits +
+                                   tlMemoryOpcodeBits)) + 1
+  val tlGrantTypeBits = max(log2Up(Grant.nBuiltInTypes), 
+                              tlCoh.grantTypeWidth) + 1
+  val tlNetworkPreservesPointToPointOrdering = params(TLNetworkIsOrderedP2P)
+  val tlNetworkDoesNotInterleaveBeats = true
+  val amoAluOperandBits = params(AmoAluOperandBits)
+}
+
+abstract class TLBundle extends Bundle with TileLinkParameters
+abstract class TLModule extends Module with TileLinkParameters
+
+/** Base trait for all TileLink channels */
+trait TileLinkChannel extends TLBundle {
+  def hasData(dummy: Int = 0): Bool
+  def hasMultibeatData(dummy: Int = 0): Bool
+}
+/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
+trait ClientToManagerChannel extends TileLinkChannel
+/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
+trait ManagerToClientChannel extends TileLinkChannel
+/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
+trait ClientToClientChannel extends TileLinkChannel // Unused for now
+
+/** Common signals that are used in multiple channels.
+  * These traits are useful for type parameterizing bundle wiring functions.
+  */
+
+/** Address of a cache block. */
+trait HasCacheBlockAddress extends TLBundle {
+  val addr_block = UInt(width = tlBlockAddrBits)
+
+  def conflicts(that: HasCacheBlockAddress) = this.addr_block === that.addr_block
+  def conflicts(addr: UInt) = this.addr_block === addr
+}
+
+/** Sub-block address or beat id of multi-beat data */
+trait HasTileLinkBeatId extends TLBundle {
+  val addr_beat = UInt(width = tlBeatAddrBits)
+}
+
+/* Client-side transaction id. Usually Miss Status Handling Register File index */
+trait HasClientTransactionId extends TLBundle {
+  val client_xact_id = Bits(width = tlClientXactIdBits)
+}
+
+/** Manager-side transaction id. Usually Transaction Status Handling Register File index. */
+trait HasManagerTransactionId extends TLBundle {
+  val manager_xact_id = Bits(width = tlManagerXactIdBits)
+}
+
+/** A single beat of cache block data */
+trait HasTileLinkData extends HasTileLinkBeatId {
+  val data = UInt(width = tlDataBits)
+
+  def hasData(dummy: Int = 0): Bool
+  def hasMultibeatData(dummy: Int = 0): Bool
+}
+
+/** The id of a client source or destination. Used in managers. */
+trait HasClientId extends TLBundle {
+  val client_id = UInt(width = tlClientIdBits)
+}
+
+/** TileLink channel bundle definitions */
+
+/** The Acquire channel is used to intiate coherence protocol transactions in
+  * order to gain access to a cache block's data with certain permissions
+  * enabled. Messages sent over this channel may be custom types defined by
+  * a [[uncore.CoherencePolicy]] for cached data accesse or may be built-in types
+  * used for uncached data accesses. Acquires may contain data for Put or
+  * PutAtomic built-in types. After sending an Acquire, clients must
+  * wait for a manager to send them a [[uncore.Grant]] message in response.
+  */
+class Acquire extends ClientToManagerChannel 
+    with HasCacheBlockAddress 
+    with HasClientTransactionId 
+    with HasTileLinkData {
+  // Actual bundle fields:
+  val is_builtin_type = Bool()
+  val a_type = UInt(width = tlAcquireTypeBits)
+  val union = Bits(width = tlAcquireUnionBits)
+
+  // Utility funcs for accessing subblock union:
+  val opCodeOff = 1
+  val opSizeOff = tlMemoryOpcodeBits + opCodeOff
+  val addrByteOff = tlMemoryOperandSizeBits + opSizeOff
+  val addrByteMSB = tlByteAddrBits + addrByteOff
+  /** Hint whether to allocate the block in any interveneing caches */
+  def allocate(dummy: Int = 0) = union(0)
+  /** Op code for [[uncore.PutAtomic]] operations */
+  def op_code(dummy: Int = 0) = Mux(
+    isBuiltInType(Acquire.putType) || isBuiltInType(Acquire.putBlockType),
+    M_XWR, union(opSizeOff-1, opCodeOff))
+  /** Operand size for [[uncore.PutAtomic]] */
+  def op_size(dummy: Int = 0) = union(addrByteOff-1, opSizeOff)
+  /** Byte address for [[uncore.PutAtomic]] operand */
+  def addr_byte(dummy: Int = 0) = union(addrByteMSB-1, addrByteOff)
+  private def amo_offset(dummy: Int = 0) = addr_byte()(tlByteAddrBits-1, log2Up(amoAluOperandBits/8))
+  /** Bit offset of [[uncore.PutAtomic]] operand */
+  def amo_shift_bits(dummy: Int = 0) = UInt(amoAluOperandBits)*amo_offset()
+  /** Write mask for [[uncore.Put]], [[uncore.PutBlock]], [[uncore.PutAtomic]] */
+  def wmask(dummy: Int = 0) = 
+    Mux(isBuiltInType(Acquire.putAtomicType), 
+      FillInterleaved(amoAluOperandBits/8, UIntToOH(amo_offset())),
+      Mux(isBuiltInType(Acquire.putBlockType) || isBuiltInType(Acquire.putType),
+        union(tlWriteMaskBits, 1),
+        UInt(0, width = tlWriteMaskBits)))
+  /** Full, beat-sized writemask */
+  def full_wmask(dummy: Int = 0) = FillInterleaved(8, wmask())
+  /** Complete physical address for block, beat or operand */
+  def addr(dummy: Int = 0) = Cat(this.addr_block, this.addr_beat, this.addr_byte())
+
+  // Other helper functions:
+  /** Message type equality */
+  def is(t: UInt) = a_type === t //TODO: make this more opaque; def ===?
+
+  /** Is this message a built-in or custom type */
+  def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type
+  /** Is this message a particular built-in type */
+  def isBuiltInType(t: UInt): Bool = is_builtin_type && a_type === t 
+
+  /** Does this message refer to subblock operands using info in the Acquire.union subbundle */ 
+  def isSubBlockType(dummy: Int = 0): Bool = isBuiltInType() && Acquire.typesOnSubBlocks.contains(a_type) 
+
+  /** Is this message a built-in prefetch message */
+  def isPrefetch(dummy: Int = 0): Bool = isBuiltInType() && is(Acquire.prefetchType) 
+
+  /** Does this message contain data? Assumes that no custom message types have data. */
+  def hasData(dummy: Int = 0): Bool = isBuiltInType() && Acquire.typesWithData.contains(a_type)
+
+  /** Does this message contain multiple beats of data? Assumes that no custom message types have data. */
+  def hasMultibeatData(dummy: Int = 0): Bool = Bool(tlDataBeats > 1) && isBuiltInType() &&
+                                           Acquire.typesWithMultibeatData.contains(a_type)
+
+  /** Does this message require the manager to probe the client the very client that sent it?
+    * Needed if multiple caches are attached to the same port.
+    */
+  def requiresSelfProbe(dummy: Int = 0) = Bool(false)
+
+  /** Mapping between each built-in Acquire type (defined in companion object)
+    * and a built-in Grant type.
+    */
+  def getBuiltInGrantType(dummy: Int = 0): UInt = {
+    MuxLookup(this.a_type, Grant.putAckType, Array(
+      Acquire.getType       -> Grant.getDataBeatType,
+      Acquire.getBlockType  -> Grant.getDataBlockType,
+      Acquire.putType       -> Grant.putAckType,
+      Acquire.putBlockType  -> Grant.putAckType,
+      Acquire.putAtomicType -> Grant.getDataBeatType,
+      Acquire.prefetchType  -> Grant.prefetchAckType))
+  }
+}
+
+/** [[uncore.Acquire]] with an extra field stating its source id */
+class AcquireFromSrc extends Acquire with HasClientId
+
+/** Contains definitions of the the built-in Acquire types and a factory
+  * for [[uncore.Acquire]]
+  *
+  * In general you should avoid using this factory directly and use
+  * [[uncore.ClientMetadata.makeAcquire]] for custom cached Acquires and
+  * [[uncore.Get]], [[uncore.Put]], etc. for built-in uncached Acquires.
+  *
+  * @param is_builtin_type built-in or custom type message?
+  * @param a_type built-in type enum or custom type enum
+  * @param client_xact_id client's transaction id
+  * @param addr_block address of the cache block
+  * @param addr_beat sub-block address (which beat)
+  * @param data data being put outwards
+  * @param union additional fields used for uncached types
+  */
+object Acquire {
+  val nBuiltInTypes = 5
+  //TODO: Use Enum
+  def getType       = UInt("b000") // Get a single beat of data
+  def getBlockType  = UInt("b001") // Get a whole block of data
+  def putType       = UInt("b010") // Put a single beat of data
+  def putBlockType  = UInt("b011") // Put a whole block of data
+  def putAtomicType = UInt("b100") // Perform an atomic memory op
+  def prefetchType  = UInt("b101") // Prefetch a whole block of data
+  def typesWithData = Vec(putType, putBlockType, putAtomicType)
+  def typesWithMultibeatData = Vec(putBlockType)
+  def typesOnSubBlocks = Vec(putType, getType, putAtomicType)
+
+  def fullWriteMask = SInt(-1, width = new Acquire().tlWriteMaskBits).toUInt
+
+  // Most generic constructor
+  def apply(
+      is_builtin_type: Bool,
+      a_type: Bits,
+      client_xact_id: UInt,
+      addr_block: UInt,
+      addr_beat: UInt = UInt(0),
+      data: UInt = UInt(0),
+      union: UInt = UInt(0)): Acquire = {
+    val acq = new Acquire
+    acq.is_builtin_type := is_builtin_type
+    acq.a_type := a_type
+    acq.client_xact_id := client_xact_id
+    acq.addr_block := addr_block
+    acq.addr_beat := addr_beat
+    acq.data := data
+    acq.union := union
+    acq
+  }
+  // Copy constructor
+  def apply(a: Acquire): Acquire = {
+    val acq = new Acquire
+    acq := a
+    acq
+  }
+}
+
+/** Get a single beat of data from the outer memory hierarchy
+  *
+  * The client can hint whether he block containing this beat should be 
+  * allocated in the intervening levels of the hierarchy.
+  *
+  * @param client_xact_id client's transaction id
+  * @param addr_block address of the cache block
+  * @param addr_beat sub-block address (which beat)
+  * @param alloc hint whether the block should be allocated in intervening caches
+  */
+object Get {
+  def apply(
+      client_xact_id: UInt,
+      addr_block: UInt,
+      addr_beat: UInt,
+      alloc: Bool = Bool(true)): Acquire = {
+    Acquire(
+      is_builtin_type = Bool(true),
+      a_type = Acquire.getType,
+      client_xact_id = client_xact_id,
+      addr_block = addr_block,
+      addr_beat = addr_beat,
+      union = Cat(M_XRD, alloc))
+  }
+}
+
+/** Get a whole cache block of data from the outer memory hierarchy
+  *
+  * The client can hint whether the block should be allocated in the 
+  * intervening levels of the hierarchy.
+  *
+  * @param client_xact_id client's transaction id
+  * @param addr_block address of the cache block
+  * @param alloc hint whether the block should be allocated in intervening caches
+  */
+object GetBlock {
+  def apply(
+      client_xact_id: UInt = UInt(0),
+      addr_block: UInt,
+      alloc: Bool = Bool(true)): Acquire = {
+    Acquire(
+      is_builtin_type = Bool(true),
+      a_type = Acquire.getBlockType,
+      client_xact_id = client_xact_id, 
+      addr_block = addr_block,
+      union = Cat(M_XRD, alloc))
+  }
+}
+
+/** Prefetch a cache block into the next-outermost level of the memory hierarchy
+  * with read permissions.
+  *
+  * @param client_xact_id client's transaction id
+  * @param addr_block address of the cache block
+  */
+object GetPrefetch {
+  def apply(
+      client_xact_id: UInt,
+      addr_block: UInt): Acquire = {
+    Acquire(
+      is_builtin_type = Bool(true),
+      a_type = Acquire.prefetchType,
+      client_xact_id = client_xact_id,
+      addr_block = addr_block,
+      addr_beat = UInt(0),
+      union = Cat(M_XRD, Bool(true)))
+  }
+}
+
+/** Put a single beat of data into the outer memory hierarchy
+  *
+  * The block will be allocated in the next-outermost level of the hierarchy.
+  *
+  * @param client_xact_id client's transaction id
+  * @param addr_block address of the cache block
+  * @param addr_beat sub-block address (which beat)
+  * @param data data being refilled to the original requestor
+  * @param wmask per-byte write mask for this beat
+  */
+object Put {
+  def apply(
+      client_xact_id: UInt,
+      addr_block: UInt,
+      addr_beat: UInt,
+      data: UInt,
+      wmask: UInt = Acquire.fullWriteMask): Acquire = {
+    Acquire(
+      is_builtin_type = Bool(true),
+      a_type = Acquire.putType,
+      addr_block = addr_block,
+      addr_beat = addr_beat,
+      client_xact_id = client_xact_id,
+      data = data,
+      union = Cat(wmask, Bool(true)))
+  }
+}
+
+/** Put a whole cache block of data into the outer memory hierarchy
+  *
+  * If the write mask is not full, the block will be allocated in the
+  * next-outermost level of the hierarchy. If the write mask is full, the
+  * client can hint whether the block should be allocated or not.
+  *
+  * @param client_xact_id client's transaction id
+  * @param addr_block address of the cache block
+  * @param addr_beat sub-block address (which beat of several)
+  * @param data data being refilled to the original requestor
+  * @param wmask per-byte write mask for this beat
+  * @param alloc hint whether the block should be allocated in intervening caches
+  */
+object PutBlock {
+  def apply(
+      client_xact_id: UInt,
+      addr_block: UInt,
+      addr_beat: UInt,
+      data: UInt,
+      wmask: UInt): Acquire = {
+    Acquire(
+      is_builtin_type = Bool(true),
+      a_type = Acquire.putBlockType,
+      client_xact_id = client_xact_id,
+      addr_block = addr_block,
+      addr_beat = addr_beat,
+      data = data,
+      union = Cat(wmask, (wmask != Acquire.fullWriteMask)))
+  }
+  def apply(
+      client_xact_id: UInt,
+      addr_block: UInt,
+      addr_beat: UInt,
+      data: UInt,
+      alloc: Bool = Bool(true)): Acquire = {
+    Acquire(
+      is_builtin_type = Bool(true),
+      a_type = Acquire.putBlockType,
+      client_xact_id = client_xact_id,
+      addr_block = addr_block,
+      addr_beat = addr_beat,
+      data = data,
+      union = Cat(Acquire.fullWriteMask, alloc))
+  }
+}
+
+/** Prefetch a cache block into the next-outermost level of the memory hierarchy
+  * with write permissions.
+  *
+  * @param client_xact_id client's transaction id
+  * @param addr_block address of the cache block
+  */
+object PutPrefetch {
+  def apply(
+      client_xact_id: UInt,
+      addr_block: UInt): Acquire = {
+    Acquire(
+      is_builtin_type = Bool(true),
+      a_type = Acquire.prefetchType,
+      client_xact_id = client_xact_id,
+      addr_block = addr_block,
+      addr_beat = UInt(0),
+      union = Cat(M_XWR, Bool(true)))
+  }
+}
+
+/** Perform an atomic memory operation in the next-outermost level of the memory hierarchy
+  *
+  * @param client_xact_id client's transaction id
+  * @param addr_block address of the cache block
+  * @param addr_beat sub-block address (within which beat)
+  * @param addr_byte sub-block address (which byte)
+  * @param atomic_opcode {swap, add, xor, and, min, max, minu, maxu} from [[uncore.MemoryOpConstants]]
+  * @param operand_size {byte, half, word, double} from [[uncore.MemoryOpConstants]]
+  * @param data source operand data
+  */
+object PutAtomic {
+  def apply(
+      client_xact_id: UInt,
+      addr_block: UInt,
+      addr_beat: UInt,
+      addr_byte: UInt,
+      atomic_opcode: UInt,
+      operand_size: UInt,
+      data: UInt): Acquire = {
+    Acquire(
+      is_builtin_type = Bool(true),
+      a_type = Acquire.putAtomicType,
+      client_xact_id = client_xact_id, 
+      addr_block = addr_block, 
+      addr_beat = addr_beat, 
+      data = data,
+      union = Cat(addr_byte, operand_size, atomic_opcode, Bool(true)))
+  }
+}
+
+/** The Probe channel is used to force clients to release data or cede permissions
+  * on a cache block. Clients respond to Probes with [[uncore.Release]] messages.
+  * The available types of Probes are customized by a particular
+  * [[uncore.CoherencePolicy]].
+  */
+class Probe extends ManagerToClientChannel 
+    with HasCacheBlockAddress {
+  val p_type = UInt(width = tlCoh.probeTypeWidth)
+
+  def is(t: UInt) = p_type === t
+  def hasData(dummy: Int = 0) = Bool(false)
+  def hasMultibeatData(dummy: Int = 0) = Bool(false)
+}
+
+/** [[uncore.Probe]] with an extra field stating its destination id */
+class ProbeToDst extends Probe with HasClientId
+
+/** Contains factories for [[uncore.Probe]] and [[uncore.ProbeToDst]]
+  *
+  * In general you should avoid using these factories directly and use
+  * [[uncore.ManagerMetadata.makeProbe(UInt,Acquire)*]] instead.
+  *
+  * @param dst id of client to which probe should be sent
+  * @param p_type custom probe type
+  * @param addr_block address of the cache block
+  */
+object Probe {
+  def apply(p_type: UInt, addr_block: UInt): Probe = {
+    val prb = new Probe
+    prb.p_type := p_type
+    prb.addr_block := addr_block
+    prb
+  }
+  def apply(dst: UInt, p_type: UInt, addr_block: UInt): ProbeToDst = {
+    val prb = new ProbeToDst
+    prb.client_id := dst
+    prb.p_type := p_type
+    prb.addr_block := addr_block
+    prb
+  }
+}
+
+/** The Release channel is used to release data or permission back to the manager
+  * in response to [[uncore.Probe]] messages. It can also be used to voluntarily
+  * write back data, for example in the event that dirty data must be evicted on
+  * a cache miss. The available types of Release messages are always customized by
+  * a particular [[uncore.CoherencePolicy]]. Releases may contain data or may be
+  * simple acknowledgements. Voluntary Releases are acknowledged with [[uncore.Grant Grants]].
+  */
+class Release extends ClientToManagerChannel 
+    with HasCacheBlockAddress 
+    with HasClientTransactionId 
+    with HasTileLinkData {
+  val r_type = UInt(width = tlCoh.releaseTypeWidth)
+  val voluntary = Bool()
+
+  // Helper funcs
+  def is(t: UInt) = r_type === t
+  def hasData(dummy: Int = 0) = tlCoh.releaseTypesWithData.contains(r_type)
+  //TODO: Assumes all releases write back full cache blocks:
+  def hasMultibeatData(dummy: Int = 0) = Bool(tlDataBeats > 1) && tlCoh.releaseTypesWithData.contains(r_type)
+  def isVoluntary(dummy: Int = 0) = voluntary
+  def requiresAck(dummy: Int = 0) = !Bool(tlNetworkPreservesPointToPointOrdering)
+}
+
+/** [[uncore.Release]] with an extra field stating its source id */
+class ReleaseFromSrc extends Release with HasClientId
+
+/** Contains a [[uncore.Release]] factory
+  *
+  * In general you should avoid using this factory directly and use
+  * [[uncore.ClientMetadata.makeRelease]] instead.
+  *
+  * @param voluntary is this a voluntary writeback
+  * @param r_type type enum defined by coherence protocol
+  * @param client_xact_id client's transaction id
+  * @param addr_block address of the cache block
+  * @param addr_beat beat id of the data
+  * @param data data being written back
+  */
+object Release {
+  def apply(
+      voluntary: Bool,
+      r_type: UInt,
+      client_xact_id: UInt,
+      addr_block: UInt,
+      addr_beat: UInt = UInt(0),
+      data: UInt = UInt(0)): Release = {
+    val rel = new Release
+    rel.r_type := r_type
+    rel.client_xact_id := client_xact_id
+    rel.addr_block := addr_block
+    rel.addr_beat := addr_beat
+    rel.data := data
+    rel.voluntary := voluntary
+    rel
+  }
+}
+
+/** The Grant channel is used to refill data or grant permissions requested of the 
+  * manager agent via an [[uncore.Acquire]] message. It is also used to acknowledge
+  * the receipt of voluntary writeback from clients in the form of [[uncore.Release]]
+  * messages. There are built-in Grant messages used for Gets and Puts, and
+  * coherence policies may also define custom Grant types. Grants may contain data
+  * or may be simple acknowledgements. Grants are responded to with [[uncore.Finish]].
+  */
+class Grant extends ManagerToClientChannel 
+    with HasTileLinkData 
+    with HasClientTransactionId 
+    with HasManagerTransactionId {
+  val is_builtin_type = Bool()
+  val g_type = UInt(width = tlGrantTypeBits)
+
+  // Helper funcs
+  def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type
+  def isBuiltInType(t: UInt): Bool = is_builtin_type && g_type === t 
+  def is(t: UInt):Bool = g_type === t
+  def hasData(dummy: Int = 0): Bool = Mux(isBuiltInType(),
+                                        Grant.typesWithData.contains(g_type),
+                                        tlCoh.grantTypesWithData.contains(g_type))
+  def hasMultibeatData(dummy: Int = 0): Bool = 
+    Bool(tlDataBeats > 1) && Mux(isBuiltInType(),
+                               Grant.typesWithMultibeatData.contains(g_type),
+                               tlCoh.grantTypesWithData.contains(g_type))
+  def isVoluntary(dummy: Int = 0): Bool = isBuiltInType() && (g_type === Grant.voluntaryAckType)
+  def requiresAck(dummy: Int = 0): Bool = !Bool(tlNetworkPreservesPointToPointOrdering) && !isVoluntary()
+  def makeFinish(dummy: Int = 0): Finish = {
+    val f = Bundle(new Finish, { case TLMaxManagerXacts => tlMaxManagerXacts })
+    f.manager_xact_id := this.manager_xact_id
+    f
+  }
+}
+
+/** [[uncore.Grant]] with an extra field stating its destination */
+class GrantToDst extends Grant with HasClientId
+
+/** Contains definitions of the the built-in grant types and factories 
+  * for [[uncore.Grant]] and [[uncore.GrantToDst]]
+  *
+  * In general you should avoid using these factories directly and use
+  * [[uncore.ManagerMetadata.makeGrant(uncore.AcquireFromSrc* makeGrant]] instead.
+  *
+  * @param dst id of client to which grant should be sent
+  * @param is_builtin_type built-in or custom type message?
+  * @param g_type built-in type enum or custom type enum
+  * @param client_xact_id client's transaction id
+  * @param manager_xact_id manager's transaction id
+  * @param addr_beat beat id of the data
+  * @param data data being refilled to the original requestor
+  */
+object Grant {
+  val nBuiltInTypes = 5
+  def voluntaryAckType = UInt("b000") // For acking Releases
+  def prefetchAckType  = UInt("b001") // For acking any kind of Prefetch
+  def putAckType       = UInt("b011") // For acking any kind of non-prfetch Put
+  def getDataBeatType  = UInt("b100") // Supplying a single beat of Get
+  def getDataBlockType = UInt("b101") // Supplying all beats of a GetBlock
+  def typesWithData = Vec(getDataBlockType, getDataBeatType)
+  def typesWithMultibeatData= Vec(getDataBlockType)
+
+  def apply(
+      is_builtin_type: Bool,
+      g_type: UInt,
+      client_xact_id: UInt, 
+      manager_xact_id: UInt,
+      addr_beat: UInt,
+      data: UInt): Grant = {
+    val gnt = new Grant
+    gnt.is_builtin_type := is_builtin_type
+    gnt.g_type := g_type
+    gnt.client_xact_id := client_xact_id
+    gnt.manager_xact_id := manager_xact_id
+    gnt.addr_beat := addr_beat
+    gnt.data := data
+    gnt
+  }
+
+  def apply(
+      dst: UInt,
+      is_builtin_type: Bool,
+      g_type: UInt,
+      client_xact_id: UInt,
+      manager_xact_id: UInt,
+      addr_beat: UInt = UInt(0),
+      data: UInt = UInt(0)): GrantToDst = {
+    val gnt = new GrantToDst
+    gnt.client_id := dst
+    gnt.is_builtin_type := is_builtin_type
+    gnt.g_type := g_type
+    gnt.client_xact_id := client_xact_id
+    gnt.manager_xact_id := manager_xact_id
+    gnt.addr_beat := addr_beat
+    gnt.data := data
+    gnt
+  }
+}
+
+/** The Finish channel is used to provide a global ordering of transactions
+  * in networks that do not guarantee point-to-point ordering of messages.
+  * A Finsish message is sent as acknowledgement of receipt of a [[uncore.Grant]].
+  * When a Finish message is received, a manager knows it is safe to begin
+  * processing other transactions that touch the same cache block.
+  */
+class Finish extends ClientToManagerChannel with HasManagerTransactionId {
+  def hasData(dummy: Int = 0) = Bool(false)
+  def hasMultibeatData(dummy: Int = 0) = Bool(false)
+}
+
+/** Complete IO definition for incoherent TileLink, including networking headers */
+class UncachedTileLinkIO extends TLBundle {
+  val acquire   = new DecoupledIO(new LogicalNetworkIO(new Acquire))
+  val grant     = new DecoupledIO(new LogicalNetworkIO(new Grant)).flip
+  val finish = new DecoupledIO(new LogicalNetworkIO(new Finish))
+}
+
+/** Complete IO definition for coherent TileLink, including networking headers */
+class TileLinkIO extends UncachedTileLinkIO {
+  val probe     = new DecoupledIO(new LogicalNetworkIO(new Probe)).flip
+  val release   = new DecoupledIO(new LogicalNetworkIO(new Release))
+}
+
+/** This version of UncachedTileLinkIO does not contain network headers. 
+  * It is intended for use within client agents.
+  *
+  * Headers are provided in the top-level that instantiates the clients and network,
+  * probably using a [[uncore.ClientTileLinkNetworkPort]] module.
+  * By eliding the header subbundles within the clients we can enable 
+  * hierarchical P-and-R while minimizing unconnected port errors in GDS.
+  *
+  * Secondly, this version of the interface elides [[uncore.Finish]] messages, with the
+  * assumption that a [[uncore.FinishUnit]] has been coupled to the TileLinkIO port
+  * to deal with acking received [[uncore.Grant Grants]].
+  */
+class ClientUncachedTileLinkIO extends TLBundle {
+  val acquire   = new DecoupledIO(new Acquire)
+  val grant     = new DecoupledIO(new Grant).flip
+}
+
+/** This version of TileLinkIO does not contain network headers. 
+  * It is intended for use within client agents.
+  */
+class ClientTileLinkIO extends ClientUncachedTileLinkIO {
+  val probe     = new DecoupledIO(new Probe).flip
+  val release   = new DecoupledIO(new Release)
+}
+
+/** This version of TileLinkIO does not contain network headers, but
+  * every channel does include an extra client_id subbundle.
+  * It is intended for use within Management agents.
+  *
+  * Managers need to track where [[uncore.Acquire]] and [[uncore.Release]] messages
+  * originated so that they can send a [[uncore.Grant]] to the right place. 
+  * Similarly they must be able to issues Probes to particular clients.
+  * However, we'd still prefer to have [[uncore.ManagerTileLinkNetworkPort]] fill in
+  * the header.src to enable hierarchical p-and-r of the managers. Additionally, 
+  * coherent clients might be mapped to random network port ids, and we'll leave it to the
+  * [[uncore.ManagerTileLinkNetworkPort]] to apply the correct mapping. Managers do need to
+  * see Finished so they know when to allow new transactions on a cache
+  * block to proceed.
+  */
+class ManagerTileLinkIO extends TLBundle {
+  val acquire   = new DecoupledIO(new AcquireFromSrc).flip
+  val grant     = new DecoupledIO(new GrantToDst)
+  val finish    = new DecoupledIO(new Finish).flip
+  val probe     = new DecoupledIO(new ProbeToDst)
+  val release   = new DecoupledIO(new ReleaseFromSrc).flip
+}
+
+/** Utilities for safely wrapping a *UncachedTileLink by pinning probe.ready and release.valid low */
+object TileLinkIOWrapper {
+  def apply(utl: ClientUncachedTileLinkIO, p: Parameters): ClientTileLinkIO = {
+    val conv = Module(new ClientTileLinkIOWrapper)(p)
+    conv.io.in <> utl
+    conv.io.out
+  }
+  def apply(utl: ClientUncachedTileLinkIO): ClientTileLinkIO = {
+    val conv = Module(new ClientTileLinkIOWrapper)
+    conv.io.in <> utl
+    conv.io.out
+  }
+  def apply(tl: ClientTileLinkIO): ClientTileLinkIO = tl
+  def apply(utl: UncachedTileLinkIO, p: Parameters): TileLinkIO = {
+    val conv = Module(new TileLinkIOWrapper)(p)
+    conv.io.in <> utl
+    conv.io.out
+  }
+  def apply(utl: UncachedTileLinkIO): TileLinkIO = {
+    val conv = Module(new TileLinkIOWrapper)
+    conv.io.in <> utl
+    conv.io.out
+  }
+  def apply(tl: TileLinkIO): TileLinkIO = tl
+}
+
+class TileLinkIOWrapper extends TLModule {
+  val io = new Bundle {
+    val in = new UncachedTileLinkIO().flip
+    val out = new TileLinkIO
+  }
+  io.out.acquire <> io.in.acquire
+  io.out.grant <> io.in.grant
+  io.out.finish <> io.in.finish
+  io.out.probe.ready := Bool(true)
+  io.out.release.valid := Bool(false)
+}
+
+class ClientTileLinkIOWrapper extends TLModule {
+  val io = new Bundle {
+    val in = new ClientUncachedTileLinkIO().flip
+    val out = new ClientTileLinkIO
+  }
+  io.out.acquire <> io.in.acquire
+  io.out.grant <> io.in.grant
+  io.out.probe.ready := Bool(true)
+  io.out.release.valid := Bool(false)
+}
+
+/** Used to track metadata for transactions where multiple secondary misses have been merged
+  * and handled by a single transaction tracker.
+  */
+class SecondaryMissInfo extends TLBundle // TODO: add a_type to merge e.g. Get+GetBlocks, and/or HasClientId
+    with HasTileLinkBeatId
+    with HasClientTransactionId
+
+/** A helper module that automatically issues [[uncore.Finish]] messages in repsonse
+  * to [[uncore.Grant]] that it receives from a manager and forwards to a client
+  */
+class FinishUnit(srcId: Int = 0, outstanding: Int = 2) extends TLModule with HasDataBeatCounters {
+  val io = new Bundle {
+    val grant = Decoupled(new LogicalNetworkIO(new Grant)).flip
+    val refill = Decoupled(new Grant)
+    val finish = Decoupled(new LogicalNetworkIO(new Finish))
+    val ready = Bool(OUTPUT)
+  }
+
+  val g = io.grant.bits.payload
+
+  if(tlNetworkPreservesPointToPointOrdering) {
+    io.finish.valid := Bool(false)
+    io.refill.valid := io.grant.valid
+    io.refill.bits := g
+    io.grant.ready := io.refill.ready
+    io.ready := Bool(true)
+  } else {
+    // We only want to send Finishes after we have collected all beats of
+    // a multibeat Grant. But Grants from multiple managers or transactions may
+    // get interleaved, so we could need a counter for each.
+    val done = if(tlNetworkDoesNotInterleaveBeats) {
+      connectIncomingDataBeatCounterWithHeader(io.grant)
+    } else {
+      val entries = 1 << tlClientXactIdBits
+      def getId(g: LogicalNetworkIO[Grant]) = g.payload.client_xact_id
+      assert(getId(io.grant.bits) <= UInt(entries), "Not enough grant beat counters, only " + entries + " entries.")
+      connectIncomingDataBeatCountersWithHeader(io.grant, entries, getId).reduce(_||_)
+    }
+    val q = Module(new FinishQueue(outstanding))
+    q.io.enq.valid := io.grant.fire() && g.requiresAck() && (!g.hasMultibeatData() || done)
+    q.io.enq.bits.fin := g.makeFinish()
+    q.io.enq.bits.dst := io.grant.bits.header.src
+
+    io.finish.bits.header.src := UInt(srcId)
+    io.finish.bits.header.dst := q.io.deq.bits.dst
+    io.finish.bits.payload := q.io.deq.bits.fin
+    io.finish.valid := q.io.deq.valid
+    q.io.deq.ready := io.finish.ready
+
+    io.refill.valid := io.grant.valid
+    io.refill.bits := g
+    io.grant.ready := (q.io.enq.ready || !g.requiresAck()) && io.refill.ready
+    io.ready := q.io.enq.ready
+  }
+}
+
+class FinishQueueEntry extends TLBundle {
+    val fin = new Finish
+    val dst = UInt(width = log2Up(params(LNEndpoints)))
+}
+
+class FinishQueue(entries: Int) extends Queue(new FinishQueueEntry, entries)
+
+/** A port to convert [[uncore.ClientTileLinkIO]].flip into [[uncore.TileLinkIO]]
+  *
+  * Creates network headers for [[uncore.Acquire]] and [[uncore.Release]] messages,
+  * calculating header.dst and filling in header.src.
+  * Strips headers from [[uncore.Probe Probes]].
+  * Responds to [[uncore.Grant]] by automatically issuing [[uncore.Finish]] to the granting managers.
+  *
+  * @param clientId network port id of this agent
+  * @param addrConvert how a physical address maps to a destination manager port id
+  */
+class ClientTileLinkNetworkPort(clientId: Int, addrConvert: UInt => UInt) extends TLModule {
+  val io = new Bundle {
+    val client = new ClientTileLinkIO().flip
+    val network = new TileLinkIO
+  }
+
+  val finisher = Module(new FinishUnit(clientId))
+  finisher.io.grant <> io.network.grant
+  io.network.finish <> finisher.io.finish
+
+  val acq_with_header = ClientTileLinkHeaderCreator(io.client.acquire, clientId, addrConvert)
+  val rel_with_header = ClientTileLinkHeaderCreator(io.client.release, clientId, addrConvert)
+  val prb_without_header = DecoupledLogicalNetworkIOUnwrapper(io.network.probe)
+  val gnt_without_header = finisher.io.refill
+
+  io.network.acquire.bits := acq_with_header.bits
+  io.network.acquire.valid := acq_with_header.valid && finisher.io.ready
+  acq_with_header.ready := io.network.acquire.ready && finisher.io.ready
+  io.network.release <> rel_with_header
+  io.client.probe <> prb_without_header
+  io.client.grant <> gnt_without_header
+}
+
+object ClientTileLinkHeaderCreator {
+  def apply[T <: ClientToManagerChannel with HasCacheBlockAddress](
+      in: DecoupledIO[T],
+      clientId: Int,
+      addrConvert: UInt => UInt): DecoupledIO[LogicalNetworkIO[T]] = {
+    val out = new DecoupledIO(new LogicalNetworkIO(in.bits.clone)).asDirectionless
+    out.bits.payload := in.bits
+    out.bits.header.src := UInt(clientId)
+    out.bits.header.dst := addrConvert(in.bits.addr_block)
+    out.valid := in.valid
+    in.ready := out.ready
+    out
+  }
+}
+
+/** A port to convert [[uncore.ManagerTileLinkIO]].flip into [[uncore.TileLinkIO]].flip
+  *
+  * Creates network headers for [[uncore.Probe]] and [[uncore.Grant]] messagess,
+  * calculating header.dst and filling in header.src.
+  * Strips headers from [[uncore.Acquire]], [[uncore.Release]] and [[uncore.Finish]],
+  * but supplies client_id instead.
+  *
+  * @param managerId the network port id of this agent
+  * @param idConvert how a sharer id maps to a destination client port id
+  */
+class ManagerTileLinkNetworkPort(managerId: Int, idConvert: UInt => UInt) extends TLModule {
+  val io = new Bundle {
+    val manager = new ManagerTileLinkIO().flip
+    val network = new TileLinkIO().flip
+  }
+  io.network.grant <> ManagerTileLinkHeaderCreator(io.manager.grant, managerId, (u: UInt) => u)
+  io.network.probe <> ManagerTileLinkHeaderCreator(io.manager.probe, managerId, idConvert)
+  io.manager.acquire.bits.client_id := io.network.acquire.bits.header.src
+  io.manager.acquire <> DecoupledLogicalNetworkIOUnwrapper(io.network.acquire)
+  io.manager.release.bits.client_id := io.network.release.bits.header.src
+  io.manager.release <> DecoupledLogicalNetworkIOUnwrapper(io.network.release)
+  io.manager.finish <> DecoupledLogicalNetworkIOUnwrapper(io.network.finish)
+}
+
+object ManagerTileLinkHeaderCreator {
+  def apply[T <: ManagerToClientChannel with HasClientId](
+      in: DecoupledIO[T],
+      managerId: Int,
+      idConvert: UInt => UInt): DecoupledIO[LogicalNetworkIO[T]] = {
+    val out = new DecoupledIO(new LogicalNetworkIO(in.bits.clone)).asDirectionless
+    out.bits.payload := in.bits
+    out.bits.header.src := UInt(managerId)
+    out.bits.header.dst := idConvert(in.bits.client_id)
+    out.valid := in.valid
+    in.ready := out.ready
+    out
+  }
+}
+
+/** Struct for describing per-channel queue depths */
+case class TileLinkDepths(acq: Int, prb: Int, rel: Int, gnt: Int, fin: Int)
+
+/** Optionally enqueues each [[uncore.TileLinkChannel]] individually */
+class TileLinkEnqueuer(depths: TileLinkDepths) extends Module {
+  val io = new Bundle {
+    val client = new TileLinkIO().flip
+    val manager = new TileLinkIO
+  }
+  io.manager.acquire <> (if(depths.acq > 0) Queue(io.client.acquire, depths.acq) else io.client.acquire)
+  io.client.probe    <> (if(depths.prb > 0) Queue(io.manager.probe,  depths.prb) else io.manager.probe)
+  io.manager.release <> (if(depths.rel > 0) Queue(io.client.release, depths.rel) else io.client.release)
+  io.client.grant    <> (if(depths.gnt > 0) Queue(io.manager.grant,  depths.gnt) else io.manager.grant)
+  io.manager.finish  <> (if(depths.fin > 0) Queue(io.client.finish,  depths.fin) else io.client.finish)
+}
+
+object TileLinkEnqueuer {
+  def apply(in: TileLinkIO, depths: TileLinkDepths)(p: Parameters): TileLinkIO = {
+    val t = Module(new TileLinkEnqueuer(depths))(p)
+    t.io.client <> in
+    t.io.manager
+  }
+  def apply(in: TileLinkIO, depth: Int)(p: Parameters): TileLinkIO = {
+    apply(in, TileLinkDepths(depth, depth, depth, depth, depth))(p)
+  }
+}
+
+/** Utility functions for constructing TileLinkIO arbiters */
+trait TileLinkArbiterLike extends TileLinkParameters {
+  // Some shorthand type variables
+  type ManagerSourcedWithId = ManagerToClientChannel with HasClientTransactionId
+  type ClientSourcedWithId = ClientToManagerChannel with HasClientTransactionId
+  type ClientSourcedWithIdAndData = ClientToManagerChannel with HasClientTransactionId with HasTileLinkData
+
+  val arbN: Int // The number of ports on the client side
+
+  // These abstract funcs are filled in depending on whether the arbiter mucks with the 
+  // outgoing client ids to track sourcing and then needs to revert them on the way back
+  def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int): Bits
+  def managerSourcedClientXactId(in: ManagerSourcedWithId): Bits
+  def arbIdx(in: ManagerSourcedWithId): UInt
+
+  // The following functions are all wiring helpers for each of the different types of TileLink channels
+
+  def hookupClientSource[M <: ClientSourcedWithIdAndData](
+      clts: Seq[DecoupledIO[LogicalNetworkIO[M]]],
+      mngr: DecoupledIO[LogicalNetworkIO[M]]) {
+    def hasData(m: LogicalNetworkIO[M]) = m.payload.hasMultibeatData()
+    val arb = Module(new LockingRRArbiter(mngr.bits.clone, arbN, tlDataBeats, Some(hasData _)))
+    clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => {
+      arb.valid := req.valid
+      arb.bits := req.bits
+      arb.bits.payload.client_xact_id := clientSourcedClientXactId(req.bits.payload, id)
+      req.ready := arb.ready
+    }}
+    arb.io.out <> mngr
+  }
+
+  def hookupClientSourceHeaderless[M <: ClientSourcedWithIdAndData](
+      clts: Seq[DecoupledIO[M]],
+      mngr: DecoupledIO[M]) {
+    def hasData(m: M) = m.hasMultibeatData()
+    val arb = Module(new LockingRRArbiter(mngr.bits.clone, arbN, tlDataBeats, Some(hasData _)))
+    clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => {
+      arb.valid := req.valid
+      arb.bits := req.bits
+      arb.bits.client_xact_id := clientSourcedClientXactId(req.bits, id)
+      req.ready := arb.ready
+    }}
+    arb.io.out <> mngr
+  }
+
+  def hookupManagerSourceWithHeader[M <: ManagerToClientChannel](
+      clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], 
+      mngr: DecoupledIO[LogicalNetworkIO[M]]) {
+    mngr.ready := Bool(false)
+    for (i <- 0 until arbN) {
+      clts(i).valid := Bool(false)
+      when (mngr.bits.header.dst === UInt(i)) {
+        clts(i).valid := mngr.valid
+        mngr.ready := clts(i).ready
+      }
+      clts(i).bits := mngr.bits
+    }
+  }
+
+  def hookupManagerSourceWithId[M <: ManagerSourcedWithId](
+      clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], 
+      mngr: DecoupledIO[LogicalNetworkIO[M]]) {
+    mngr.ready := Bool(false)
+    for (i <- 0 until arbN) {
+      clts(i).valid := Bool(false)
+      when (arbIdx(mngr.bits.payload) === UInt(i)) {
+        clts(i).valid := mngr.valid
+        mngr.ready := clts(i).ready
+      }
+      clts(i).bits := mngr.bits
+      clts(i).bits.payload.client_xact_id := managerSourcedClientXactId(mngr.bits.payload)
+    }
+  }
+
+  def hookupManagerSourceHeaderlessWithId[M <: ManagerSourcedWithId](
+      clts: Seq[DecoupledIO[M]], 
+      mngr: DecoupledIO[M]) {
+    mngr.ready := Bool(false)
+    for (i <- 0 until arbN) {
+      clts(i).valid := Bool(false)
+      when (arbIdx(mngr.bits) === UInt(i)) {
+        clts(i).valid := mngr.valid
+        mngr.ready := clts(i).ready
+      }
+      clts(i).bits := mngr.bits
+      clts(i).bits.client_xact_id := managerSourcedClientXactId(mngr.bits)
+    }
+  }
+
+  def hookupManagerSourceBroadcast[M <: Data](clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) {
+    clts.map{ _.valid := mngr.valid }
+    clts.map{ _.bits := mngr.bits }
+    mngr.ready := clts.map(_.ready).reduce(_&&_)
+  }
+
+  def hookupFinish[M <: LogicalNetworkIO[Finish]]( clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) {
+    val arb = Module(new RRArbiter(mngr.bits.clone, arbN))
+    arb.io.in <> clts
+    arb.io.out <> mngr
+  }
+}
+
+/** Abstract base case for any Arbiters that have UncachedTileLinkIOs */
+abstract class UncachedTileLinkIOArbiter(val arbN: Int) extends Module with TileLinkArbiterLike {
+  val io = new Bundle {
+    val in = Vec.fill(arbN){new UncachedTileLinkIO}.flip
+    val out = new UncachedTileLinkIO
+  }
+  hookupClientSource(io.in.map(_.acquire), io.out.acquire)
+  hookupFinish(io.in.map(_.finish), io.out.finish)
+  hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant)
+}
+
+/** Abstract base case for any Arbiters that have cached TileLinkIOs */
+abstract class TileLinkIOArbiter(val arbN: Int) extends Module with TileLinkArbiterLike {
+  val io = new Bundle {
+    val in = Vec.fill(arbN){new TileLinkIO}.flip
+    val out = new TileLinkIO
+  }
+  hookupClientSource(io.in.map(_.acquire), io.out.acquire)
+  hookupClientSource(io.in.map(_.release), io.out.release)
+  hookupFinish(io.in.map(_.finish), io.out.finish)
+  hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe)
+  hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant)
+}
+
+/** Appends the port index of the arbiter to the client_xact_id */
+trait AppendsArbiterId extends TileLinkArbiterLike {
+  def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) =
+    Cat(in.client_xact_id, UInt(id, log2Up(arbN)))
+  def managerSourcedClientXactId(in: ManagerSourcedWithId) = 
+    in.client_xact_id >> UInt(log2Up(arbN))
+  def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id(log2Up(arbN)-1,0).toUInt
+}
+
+/** Uses the client_xact_id as is (assumes it has been set to port index) */
+trait PassesId extends TileLinkArbiterLike {
+  def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = in.client_xact_id
+  def managerSourcedClientXactId(in: ManagerSourcedWithId) = in.client_xact_id
+  def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id
+}
+
+/** Overwrites some default client_xact_id with the port idx */
+trait UsesNewId extends TileLinkArbiterLike {
+  def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = UInt(id, log2Up(arbN))
+  def managerSourcedClientXactId(in: ManagerSourcedWithId) = UInt(0)
+  def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id
+}
+
+// Now we can mix-in thevarious id-generation traits to make concrete arbiter classes
+class UncachedTileLinkIOArbiterThatAppendsArbiterId(val n: Int) extends UncachedTileLinkIOArbiter(n) with AppendsArbiterId
+class UncachedTileLinkIOArbiterThatPassesId(val n: Int) extends UncachedTileLinkIOArbiter(n) with PassesId
+class UncachedTileLinkIOArbiterThatUsesNewId(val n: Int) extends UncachedTileLinkIOArbiter(n) with UsesNewId
+class TileLinkIOArbiterThatAppendsArbiterId(val n: Int) extends TileLinkIOArbiter(n) with AppendsArbiterId
+class TileLinkIOArbiterThatPassesId(val n: Int) extends TileLinkIOArbiter(n) with PassesId
+class TileLinkIOArbiterThatUsesNewId(val n: Int) extends TileLinkIOArbiter(n) with UsesNewId
+
+/** Concrete uncached client-side arbiter that appends the arbiter's port id to client_xact_id */
+class ClientUncachedTileLinkIOArbiter(val arbN: Int) extends Module with TileLinkArbiterLike with AppendsArbiterId {
+  val io = new Bundle {
+    val in = Vec.fill(arbN){new ClientUncachedTileLinkIO}.flip
+    val out = new ClientUncachedTileLinkIO
+  }
+  hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire)
+  hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant)
+}
+
+/** Concrete client-side arbiter that appends the arbiter's port id to client_xact_id */
+class ClientTileLinkIOArbiter(val arbN: Int) extends Module with TileLinkArbiterLike with AppendsArbiterId {
+  val io = new Bundle {
+    val in = Vec.fill(arbN){new ClientTileLinkIO}.flip
+    val out = new ClientTileLinkIO
+  }
+  hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire)
+  hookupClientSourceHeaderless(io.in.map(_.release), io.out.release)
+  hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe)
+  hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant)
+}
+
+/** Utility trait containing wiring functions to keep track of how many data beats have 
+  * been sent or recieved over a particular [[uncore.TileLinkChannel]] or pair of channels. 
+  *
+  * Won't count message types that don't have data. 
+  * Used in [[uncore.XactTracker]] and [[uncore.FinishUnit]].
+  */
+trait HasDataBeatCounters {
+  type HasBeat = TileLinkChannel with HasTileLinkBeatId
+
+  /** Returns the current count on this channel and when a message is done
+    * @param inc increment the counter (usually .valid or .fire())
+    * @param data the actual channel data
+    * @param beat count to return for single-beat messages
+    */
+  def connectDataBeatCounter[S <: TileLinkChannel](inc: Bool, data: S, beat: UInt) = {
+    val multi = data.hasMultibeatData()
+    val (multi_cnt, multi_done) = Counter(inc && multi, data.tlDataBeats)
+    val cnt = Mux(multi, multi_cnt, beat)
+    val done = Mux(multi, multi_done, inc)
+    (cnt, done)
+  }
+
+  /** Counter for beats on outgoing [[chisel.DecoupledIO]] */
+  def connectOutgoingDataBeatCounter[T <: TileLinkChannel](in: DecoupledIO[T], beat: UInt = UInt(0)): (UInt, Bool) =
+    connectDataBeatCounter(in.fire(), in.bits, beat)
+
+  /** Returns done but not cnt. Use the addr_beat subbundle instead of cnt for beats on 
+    * incoming channels in case of network reordering.
+    */
+  def connectIncomingDataBeatCounter[T <: TileLinkChannel](in: DecoupledIO[T]): Bool =
+    connectDataBeatCounter(in.fire(), in.bits, UInt(0))._2
+
+  /** Counter for beats on incoming DecoupledIO[LogicalNetworkIO[]]s returns done */
+  def connectIncomingDataBeatCounterWithHeader[T <: TileLinkChannel](in: DecoupledIO[LogicalNetworkIO[T]]): Bool =
+    connectDataBeatCounter(in.fire(), in.bits.payload, UInt(0))._2
+
+  /** If the network might interleave beats from different messages, we need a Vec of counters,
+    * one for every outstanding message id that might be interleaved.
+    *
+    * @param getId mapping from Message to counter id
+    */
+  def connectIncomingDataBeatCountersWithHeader[T <: TileLinkChannel with HasClientTransactionId](
+      in: DecoupledIO[LogicalNetworkIO[T]],
+      entries: Int,
+      getId: LogicalNetworkIO[T] => UInt): Vec[Bool] = {
+    Vec((0 until entries).map { i =>
+      connectDataBeatCounter(in.fire() && getId(in.bits) === UInt(i), in.bits.payload, UInt(0))._2 
+    })
+  }
+
+  /** Provides counters on two channels, as well a meta-counter that tracks how many
+    * messages have been sent over the up channel but not yet responded to over the down channel
+    *
+    * @param max max number of outstanding ups with no down
+    * @param up outgoing channel
+    * @param down incoming channel
+    * @param beat overrides cnts on single-beat messages
+    * @param track whether up's message should be tracked
+    * @return a tuple containing whether their are outstanding messages, up's count,
+    *         up's done, down's count, down's done
+    */
+  def connectTwoWayBeatCounter[T <: TileLinkChannel, S <: TileLinkChannel](
+      max: Int,
+      up: DecoupledIO[T],
+      down: DecoupledIO[S],
+      beat: UInt = UInt(0),
+      track: T => Bool = (t: T) => Bool(true)): (Bool, UInt, Bool, UInt, Bool) = {
+    val cnt = Reg(init = UInt(0, width = log2Up(max+1)))
+    val (up_idx, up_done) = connectDataBeatCounter(up.fire(), up.bits, beat)
+    val (down_idx, down_done) = connectDataBeatCounter(down.fire(), down.bits, beat)
+    val do_inc = up_done && track(up.bits)
+    val do_dec = down_done
+    cnt := Mux(do_dec,
+            Mux(do_inc, cnt, cnt - UInt(1)),
+            Mux(do_inc, cnt + UInt(1), cnt))
+    (cnt > UInt(0), up_idx, up_done, down_idx, down_done)
+  }
+}
diff --git a/uncore/src/main/scala/uncore.scala b/uncore/src/main/scala/uncore.scala
new file mode 100644
index 00000000..d7573aec
--- /dev/null
+++ b/uncore/src/main/scala/uncore.scala
@@ -0,0 +1,129 @@
+// See LICENSE for license details.
+
+package uncore
+import Chisel._
+
+case object NReleaseTransactors extends Field[Int]
+case object NProbeTransactors extends Field[Int]
+case object NAcquireTransactors extends Field[Int]
+
+trait CoherenceAgentParameters extends UsesParameters {
+  val nReleaseTransactors = 1
+  val nAcquireTransactors = params(NAcquireTransactors)
+  val nTransactors = nReleaseTransactors + nAcquireTransactors
+  def outerTLParams = params.alterPartial({ case TLId => params(OuterTLId)})
+  val outerDataBeats = outerTLParams(TLDataBeats)
+  val outerDataBits = outerTLParams(TLDataBits)
+  val outerBeatAddrBits = log2Up(outerDataBeats)
+  val outerByteAddrBits = log2Up(outerDataBits/8)
+  def innerTLParams = params.alterPartial({case TLId => params(InnerTLId)})
+  val innerDataBeats = innerTLParams(TLDataBeats)
+  val innerDataBits = innerTLParams(TLDataBits)
+  val innerBeatAddrBits = log2Up(innerDataBeats)
+  val innerByteAddrBits = log2Up(innerDataBits/8)
+  require(outerDataBeats == innerDataBeats) //TODO: must fix all xact_data Vecs to remove this requirement
+}
+
+abstract class CoherenceAgentBundle extends Bundle with CoherenceAgentParameters
+abstract class CoherenceAgentModule extends Module with CoherenceAgentParameters
+
+trait HasCoherenceAgentWiringHelpers {
+  def doOutputArbitration[T <: TileLinkChannel](
+      out: DecoupledIO[T],
+      ins: Seq[DecoupledIO[T]]) {
+    def lock(o: T) = o.hasMultibeatData()
+    val arb = Module(new LockingRRArbiter(out.bits.clone, ins.size, out.bits.tlDataBeats, lock _))
+    out <> arb.io.out
+    arb.io.in <> ins
+  }
+
+  def doInputRouting[T <: HasManagerTransactionId](
+        in: DecoupledIO[T],
+        outs: Seq[DecoupledIO[T]]) {
+    val idx = in.bits.manager_xact_id
+    outs.map(_.bits := in.bits)
+    outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && idx === UInt(i) }
+    in.ready := Vec(outs.map(_.ready)).read(idx)
+  }
+}
+
+trait HasInnerTLIO extends CoherenceAgentBundle {
+  val inner = Bundle(new ManagerTileLinkIO)(innerTLParams)
+  val incoherent = Vec.fill(inner.tlNCachingClients){Bool()}.asInput
+  def iacq(dummy: Int = 0) = inner.acquire.bits
+  def iprb(dummy: Int = 0) = inner.probe.bits
+  def irel(dummy: Int = 0) = inner.release.bits
+  def ignt(dummy: Int = 0) = inner.grant.bits
+  def ifin(dummy: Int = 0) = inner.finish.bits
+}
+
+trait HasUncachedOuterTLIO extends CoherenceAgentBundle {
+  val outer = Bundle(new ClientUncachedTileLinkIO)(outerTLParams)
+  def oacq(dummy: Int = 0) = outer.acquire.bits
+  def ognt(dummy: Int = 0) = outer.grant.bits
+}
+
+trait HasCachedOuterTLIO extends CoherenceAgentBundle {
+  val outer = Bundle(new ClientTileLinkIO)(outerTLParams)
+  def oacq(dummy: Int = 0) = outer.acquire.bits
+  def oprb(dummy: Int = 0) = outer.probe.bits
+  def orel(dummy: Int = 0) = outer.release.bits
+  def ognt(dummy: Int = 0) = outer.grant.bits
+}
+
+class ManagerTLIO extends HasInnerTLIO with HasUncachedOuterTLIO
+
+abstract class CoherenceAgent extends CoherenceAgentModule {
+  def innerTL: ManagerTileLinkIO
+  def outerTL: ClientTileLinkIO
+  def incoherent: Vec[Bool]
+}
+
+abstract class ManagerCoherenceAgent extends CoherenceAgent
+    with HasCoherenceAgentWiringHelpers {
+  val io = new ManagerTLIO
+  def innerTL = io.inner
+  def outerTL = TileLinkIOWrapper(io.outer, outerTLParams)
+  def incoherent = io.incoherent
+}
+
+class HierarchicalTLIO extends HasInnerTLIO with HasCachedOuterTLIO
+
+abstract class HierarchicalCoherenceAgent extends CoherenceAgent {
+  val io = new HierarchicalTLIO
+  def innerTL = io.inner
+  def outerTL = io.outer
+  def incoherent = io.incoherent
+}
+
+trait HasTrackerConflictIO extends Bundle {
+  val has_acquire_conflict = Bool(OUTPUT)
+  val has_acquire_match = Bool(OUTPUT)
+  val has_release_match = Bool(OUTPUT)
+}
+
+class ManagerXactTrackerIO extends ManagerTLIO with HasTrackerConflictIO
+class HierarchicalXactTrackerIO extends HierarchicalTLIO with HasTrackerConflictIO
+
+abstract class XactTracker extends CoherenceAgentModule with HasDataBeatCounters {
+  def addPendingBitWhenBeat[T <: HasBeat](inc: Bool, in: T): UInt =
+    Fill(in.tlDataBeats, inc) &  UIntToOH(in.addr_beat)
+  def dropPendingBitWhenBeat[T <: HasBeat](dec: Bool, in: T): UInt =
+    ~Fill(in.tlDataBeats, dec) | ~UIntToOH(in.addr_beat)
+
+  def addPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T]): UInt =
+    addPendingBitWhenBeat(in.fire() && in.bits.hasData(), in.bits)
+
+  def addPendingBitWhenBeatIsGetOrAtomic(in: DecoupledIO[AcquireFromSrc]): UInt = {
+    val a = in.bits
+    val isGetOrAtomic = a.isBuiltInType() &&
+      (Vec(Acquire.getType, Acquire.getBlockType, Acquire.putAtomicType).contains(a.a_type))
+    addPendingBitWhenBeat(in.fire() && isGetOrAtomic, a)
+  }
+
+  def dropPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T]): UInt =
+    dropPendingBitWhenBeat(in.fire() && in.bits.hasData(), in.bits)
+
+  def dropPendingBitAtDest(in: DecoupledIO[ProbeToDst]): UInt =
+    ~Fill(in.bits.tlNCachingClients, in.fire()) | ~UIntToOH(in.bits.client_id)
+}
diff --git a/uncore/src/main/scala/util.scala b/uncore/src/main/scala/util.scala
new file mode 100644
index 00000000..65c5d6cd
--- /dev/null
+++ b/uncore/src/main/scala/util.scala
@@ -0,0 +1,106 @@
+// See LICENSE for license details.
+
+package uncore
+
+import Chisel._
+import scala.math._
+
+class Unsigned(x: Int) {
+  require(x >= 0)
+  def clog2: Int = { require(x > 0); ceil(log(x)/log(2)).toInt }
+  def log2: Int = { require(x > 0); floor(log(x)/log(2)).toInt }
+  def isPow2: Boolean = x > 0 && (x & (x-1)) == 0
+  def nextPow2: Int = if (x == 0) 1 else 1 << clog2
+}
+
+object MuxBundle {
+  def apply[T <: Data] (default: T, mapping: Seq[(Bool, T)]): T = {
+    mapping.reverse.foldLeft(default)((b, a) => Mux(a._1, a._2, b))
+  }
+}
+
+// Produces 0-width value when counting to 1
+class ZCounter(val n: Int) {
+  val value = Reg(init=UInt(0, log2Ceil(n)))
+  def inc(): Bool = {
+    if (n == 1) Bool(true)
+    else {
+      val wrap = value === UInt(n-1)
+      value := Mux(Bool(!isPow2(n)) && wrap, UInt(0), value + UInt(1))
+      wrap
+    }
+  }
+}
+
+object ZCounter {
+  def apply(n: Int) = new ZCounter(n)
+  def apply(cond: Bool, n: Int): (UInt, Bool) = {
+    val c = new ZCounter(n)
+    var wrap: Bool = null
+    when (cond) { wrap = c.inc() }
+    (c.value, cond && wrap)
+  }
+}
+
+class FlowThroughSerializer[T <: HasTileLinkData](gen: T, n: Int) extends Module {
+  val io = new Bundle {
+    val in = Decoupled(gen.clone).flip
+    val out = Decoupled(gen.clone)
+    val cnt = UInt(OUTPUT, log2Up(n))
+    val done = Bool(OUTPUT)
+  }
+  val narrowWidth = io.in.bits.data.getWidth / n
+  require(io.in.bits.data.getWidth % narrowWidth == 0)
+
+  if(n == 1) {
+    io.in <> io.out
+    io.cnt := UInt(width = 0)
+    io.done := Bool(true)
+  } else {
+    val cnt = Reg(init=UInt(0, width = log2Up(n)))
+    val wrap = cnt === UInt(n-1)
+    val rbits = Reg(io.in.bits.clone)
+    val active = Reg(init=Bool(false))
+
+    val shifter = Vec.fill(n){Bits(width = narrowWidth)}
+    (0 until n).foreach { 
+      i => shifter(i) := rbits.data((i+1)*narrowWidth-1,i*narrowWidth)
+    }
+
+    io.done := Bool(false)
+    io.cnt := cnt
+    io.in.ready := !active
+    io.out.valid := active || io.in.valid
+    io.out.bits := io.in.bits
+    when(!active && io.in.valid) {
+      when(io.in.bits.hasData()) {
+        cnt := Mux(io.out.ready, UInt(1), UInt(0))
+        rbits := io.in.bits
+        active := Bool(true)
+      }
+      io.done := !io.in.bits.hasData()
+    }
+    when(active) {
+      io.out.bits := rbits
+      io.out.bits.data := shifter(cnt)
+      when(io.out.ready) { 
+        cnt := cnt + UInt(1)
+        when(wrap) {
+          cnt := UInt(0)
+          io.done := Bool(true)
+          active := Bool(false)
+        }
+      }
+    }
+  }
+}
+
+object FlowThroughSerializer {
+  def apply[T <: HasTileLinkData](in: DecoupledIO[T], n: Int): DecoupledIO[T] = {
+    val fs = Module(new FlowThroughSerializer(in.bits, n))
+    fs.io.in.valid := in.valid
+    fs.io.in.bits := in.bits
+    in.ready := fs.io.in.ready
+    fs.io.out
+  }
+}