WIP uncore and rocket changes compile

2016-11-10 15:56:42 -08:00
parent 32fd11935c
commit afa1a6d549
9 changed files with 713 additions and 536 deletions
--- a/src/main/scala/groundtest/Tile.scala
+++ b/src/main/scala/groundtest/Tile.scala
@@ -3,6 +3,9 @@ package groundtest
 import Chisel._
 import rocket._
 import uncore.tilelink._
+import uncore.agents.CacheName
+import uncore.tilelink2._
+import diplomacy._
 import scala.util.Random
 import scala.collection.mutable.ListBuffer
 import junctions.HasAddrMapParameters
@@ -96,20 +99,25 @@ abstract class GroundTest(implicit val p: Parameters) extends Module
  val io = new GroundTestIO
 }

-class GroundTestTile(implicit val p: Parameters) extends LazyTile {
+class GroundTestTile(implicit val p: Parameters) extends LazyModule with HasGroundTestParameters {
+  val dcacheParams = p.alterPartial({ case CacheName => "L1D" })
  val slave = None
-  lazy val module = new TileImp(this) with HasGroundTestParameters {
-    val io = new TileIO(bc) {
+  val dcache = HellaCache(p(DCacheKey))(dcacheParams)
+  val ucLegacy = LazyModule(new TLLegacy()(p))
+
+  lazy val module = new LazyModuleImp(this) {
+    val io = new Bundle {
+      val cached = dcache.node.bundleOut
+      val uncached = ucLegacy.node.bundleOut
      val success = Bool(OUTPUT)
    }

    val test = p(BuildGroundTest)(dcacheParams)

    val ptwPorts = ListBuffer.empty ++= test.io.ptw
-    val memPorts = ListBuffer.empty ++= test.io.mem
+    val uncachedArbPorts = ListBuffer.empty ++= test.io.mem

    if (nCached > 0) {
-      val dcache_io = HellaCache(p(DCacheKey))(dcacheParams)
      val dcacheArb = Module(new HellaCacheArbiter(nCached)(dcacheParams))

      dcacheArb.io.requestor.zip(test.io.cache).foreach {
@@ -118,13 +126,12 @@ class GroundTestTile(implicit val p: Parameters) extends LazyTile {
          dcacheIF.io.requestor <> cache
          requestor <> dcacheIF.io.cache
      }
-      dcache_io.cpu <> dcacheArb.io.mem
-      io.cached.head <> dcache_io.mem
+      dcache.module.io.cpu <> dcacheArb.io.mem

      // SimpleHellaCacheIF leaves invalidate_lr dangling, so we wire it to false
-      dcache_io.cpu.invalidate_lr := Bool(false)
+      dcache.module.io.cpu.invalidate_lr := Bool(false)

-      ptwPorts += dcache_io.ptw
+      ptwPorts += dcache.module.io.ptw
    }

    if (ptwPorts.size > 0) {
@@ -132,10 +139,9 @@ class GroundTestTile(implicit val p: Parameters) extends LazyTile {
      ptw.io.requestors <> ptwPorts
    }

-    require(memPorts.size == io.uncached.size)
-    if (memPorts.size > 0) {
-      io.uncached <> memPorts
-    }
+    val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size))
+    uncachedArb.io.in <> uncachedArbPorts
+    ucLegacy.module.io.legacy <> uncachedArb.io.out

    io.success := test.io.status.finished
  }
--- a/src/main/scala/rocket/dcache.scala
+++ b/src/main/scala/rocket/dcache.scala
@@ -5,13 +5,12 @@ package rocket
 import Chisel._
 import junctions._
 import diplomacy._
-import uncore.tilelink._
 import uncore.tilelink2._
-import uncore.agents._
-import uncore.coherence._
 import uncore.constants._
+import uncore.agents._
 import uncore.util._
 import util._
+import TLMessages._
 import Chisel.ImplicitConversions._
 import cde.{Parameters, Field}

@@ -41,14 +40,21 @@ class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
  }
 }

-class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
+class DCache(maxUncachedInFlight: Int = 2)(implicit val p: Parameters) extends LazyModule with HasL1HellaCacheParameters {
+
+  val node = TLClientNode(TLClientParameters(supportsProbe = TransferSizes(cacheBlockBytes)))
+
+  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
      val cpu = (new HellaCacheIO).flip
      val ptw = new TLBPTWIO()
-    val mem = new ClientTileLinkIO
+      val mem = node.bundleOut
    }

-  val fq = Module(new FinishQueue(1))
+    val edge = node.edgesOut(0)
+    val tl_out = io.mem(0)
+
+    val grantackq = Module(new Queue(tl_out.e.bits,1))

    require(rowBits == encRowBits) // no ECC
    require(refillCyclesPerBeat == 1)
@@ -67,8 +73,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
    dataArb.io.out.ready := true

    val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
-  val s1_probe = Reg(next=io.mem.probe.fire(), init=Bool(false))
-  val probe_bits = RegEnable(io.mem.probe.bits, io.mem.probe.fire())
+    val s1_probe = Reg(next=tl_out.b.fire(), init=Bool(false))
+    val probe_bits = RegEnable(tl_out.b.bits, tl_out.b.fire()) // TODO has data now :(
    val s1_nack = Wire(init=Bool(false))
    val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR
    val s1_valid_not_nacked = s1_valid_masked && !s1_nack
@@ -93,7 +99,6 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
    io.cpu.req.ready := (release_state === s_ready) && !cached_grant_wait && !s1_nack

    // I/O MSHRs
-  val maxUncachedInFlight = (1 << io.mem.acquire.bits.client_xact_id.getWidth) - 1
    val uncachedInFlight = Reg(init=Vec.fill(maxUncachedInFlight)(Bool(false)))
    val uncachedReqs = Reg(Vec(maxUncachedInFlight, new HellaCacheReq))

@@ -120,7 +125,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
    when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true }

    val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
-  val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits))
+    val s1_tag = Mux(s1_probe, probe_bits.address, s1_paddr)(paddrBits-1, untagBits)
    val s1_victim_way = Wire(init = replacer.way)
    val (s1_hit_way, s1_hit_state, s1_victim_meta) =
      if (usingDataScratchpad) {
@@ -128,7 +133,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
        metaWriteArb.io.out.ready := true
        metaReadArb.io.out.ready := !metaWriteArb.io.out.valid
        val inScratchpad = addrMap(s"TL2:dmem${tileId}").containsAddress(s1_paddr)
-      val hitState = Mux(inScratchpad, ClientMetadata.onReset.onHit(M_XWR), ClientMetadata.onReset)
+        val hitState = Mux(inScratchpad, ClientMetadata.maximum, ClientMetadata.onReset)
        (inScratchpad, hitState, L1Metadata(UInt(0), ClientMetadata.onReset))
      } else {
        val meta = Module(new MetadataArray(onReset _))
@@ -164,20 +169,22 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
    val s2_probe_state = RegEnable(s1_hit_state, s1_probe)
    val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked)
    val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked)
-  val s2_hit = s2_hit_state.isHit(s2_req.cmd)
+    val s2_hit_valid = s2_hit_state.isValid()
+    val (s2_hit, s2_grow_param, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd)
    val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit
    val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait
    val s2_valid_cached_miss = s2_valid_miss && !s2_uncached
    val s2_victimize = s2_valid_cached_miss || s2_flush_valid
    val s2_valid_uncached = s2_valid_miss && s2_uncached
-  val s2_victim_way = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid)))
+    val s2_victim_way = Mux(s2_hit_valid && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid)))
    val s2_victim_tag = RegEnable(s1_victim_meta.tag, s1_valid_not_nacked || s1_flush_valid)
-  val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(s1_victim_meta.coh, s1_valid_not_nacked || s1_flush_valid))
+    val s2_victim_state = Mux(s2_hit_valid && !s2_flush_valid, s2_hit_state, RegEnable(s1_victim_meta.coh, s1_valid_not_nacked || s1_flush_valid))
    val s2_victim_valid = s2_victim_state.isValid()
-  val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback()
-  val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd)
+    val (prb_ack_data, s2_report_param, probeNewCoh)= s2_probe_state.onProbe(probe_bits.param)
+    val (needs_vol_wb, s2_shrink_param, voluntaryNewCoh) = s2_victim_state.onCacheControl(M_FLUSH) 
+    val s2_victim_dirty = needs_vol_wb
    val s2_update_meta = s2_hit_state =/= s2_new_hit_state
-  io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && io.mem.acquire.ready && !uncachedInFlight.asUInt.andR)
+    io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && tl_out.a.ready && !uncachedInFlight.asUInt.andR)
    when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true }

    // exceptions
@@ -250,105 +257,98 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
    metaWriteArb.io.in(0).bits.data.coh := Mux(s2_valid_hit, s2_new_hit_state, ClientMetadata.onReset)
    metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)

-  // acquire
-  val xact_id = PriorityEncoder(~uncachedInFlight.asUInt)
-  val cachedGetMessage = s2_hit_state.makeAcquire(
-    client_xact_id = UInt(maxUncachedInFlight - 1),
-    addr_block = s2_req.addr(paddrBits-1, blockOffBits),
-    op_code = s2_req.cmd)
-  val uncachedGetMessage = Get(
-    client_xact_id = xact_id,
-    addr_block = s2_req.addr(paddrBits-1, blockOffBits),
-    addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
-    addr_byte = s2_req.addr(beatOffBits-1, 0),
-    operand_size = s2_req.typ,
-    alloc = Bool(false))
-  val uncachedPutOffset = s2_req.addr.extract(beatOffBits-1, wordOffBits)
-  val uncachedPutMessage = Put(
-    client_xact_id = xact_id,
-    addr_block = s2_req.addr(paddrBits-1, blockOffBits),
-    addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
-    data = Fill(beatWords, pstore1_storegen.data),
-    wmask = Some(pstore1_storegen.mask << (uncachedPutOffset << wordOffBits)),
-    alloc = Bool(false))
-  val uncachedPutAtomicMessage = PutAtomic(
-    client_xact_id = xact_id,
-    addr_block = s2_req.addr(paddrBits-1, blockOffBits),
-    addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
-    addr_byte = s2_req.addr(beatOffBits-1, 0),
-    atomic_opcode = s2_req.cmd,
-    operand_size = s2_req.typ,
-    data = Fill(beatWords, pstore1_storegen.data))
-  io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || (s2_valid_uncached && !uncachedInFlight.asUInt.andR)) && fq.io.enq.ready
-  io.mem.acquire.bits := cachedGetMessage
+    // Prepare a TileLink request message that initiates a transaction
+    val a_source = PriorityEncoder(~uncachedInFlight.asUInt)
+    val a_address = s2_req.addr
+    val a_size = s2_req.typ
+    val a_data = Fill(beatWords, pstore1_storegen.data)
+    val acquire = edge.Acquire(a_source, a_address, lgCacheBlockBytes, s2_grow_param)._2 // TODO check cacheability
+    val get     = edge.Get(a_source, a_address, a_size)._2
+    val put     = edge.Put(a_source, a_address, a_size, a_data)._2
+    val atomics = if (edge.manager.anySupportLogical) {
+      MuxLookup(s2_req.cmd, Wire(new TLBundleA(edge.bundle)), Array(
+        M_XA_SWAP -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.SWAP)._2,
+        M_XA_XOR  -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.XOR) ._2,
+        M_XA_OR   -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.OR)  ._2,
+        M_XA_AND  -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.AND) ._2,
+        M_XA_ADD  -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.ADD)._2,
+        M_XA_MIN  -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MIN)._2,
+        M_XA_MAX  -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MAX)._2,
+        M_XA_MINU -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MINU)._2,
+        M_XA_MAXU -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MAXU)._2))
+    } else {
+      // If no managers support atomics, assert fail if processor asks for them
+      assert (!(tl_out.a.valid && pstore1_amo && s2_write && s2_uncached))
+      Wire(new TLBundleA(edge.bundle))
+    }
+
+    tl_out.a.valid := grantackq.io.enq.ready && ((s2_valid_cached_miss && !s2_victim_dirty) ||
+                                          (s2_valid_uncached && !uncachedInFlight.asUInt.andR))
+    tl_out.a.bits := Mux(pstore1_amo && s2_write && s2_uncached, atomics,
+                       Mux(s2_write && s2_uncached, put,
+                        Mux(s2_uncached, get, acquire)))
+
+    // Set pending bits for outstanding TileLink transaction
+    when (tl_out.a.fire()) {
      when (s2_uncached) {
-    if (!usingDataScratchpad)
-      assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access")
-    io.mem.acquire.bits := uncachedGetMessage
-    when (s2_write) {
-      io.mem.acquire.bits := uncachedPutMessage
-      when (pstore1_amo) {
-        io.mem.acquire.bits := uncachedPutAtomicMessage
-      }
-    }
-  }
-  when (io.mem.acquire.fire()) {
-    when (s2_uncached) {
-      uncachedInFlight(xact_id) := true
-      uncachedReqs(xact_id) := s2_req
+        uncachedInFlight(a_source) := true
+        uncachedReqs(a_source) := s2_req
      }.otherwise {
        cached_grant_wait := true
      }
    }

    // grant
-  val grantIsRefill = io.mem.grant.bits.hasMultibeatData()
-  val grantIsVoluntary = io.mem.grant.bits.isVoluntary()
-  val grantIsUncached = !grantIsRefill && !grantIsVoluntary
-  io.mem.grant.ready := true
-  when (io.mem.grant.fire()) {
-    when (grantIsRefill) { assert(cached_grant_wait) }
-    when (grantIsUncached) {
-      assert(uncachedInFlight(io.mem.grant.bits.client_xact_id))
-      uncachedInFlight(io.mem.grant.bits.client_xact_id) := false
-      s2_data := io.mem.grant.bits.data
-      val req = uncachedReqs(io.mem.grant.bits.client_xact_id)
+    val (d_first, d_last, d_address_inc) = edge.firstlast(tl_out.d)
+    val grantIsCached = tl_out.d.bits.opcode.isOneOf(Grant, GrantData)
+    val grantIsUncached = tl_out.d.bits.opcode.isOneOf(AccessAck, AccessAckData, HintAck)
+    val grantIsVoluntary = tl_out.d.bits.opcode === ReleaseAck // Clears a different pending bit
+    val grantIsRefill = tl_out.d.bits.opcode === GrantData     // Writes the data array
+    tl_out.d.ready := true
+    when (tl_out.d.fire() && d_last) {
+      when (grantIsCached) {
+        assert(cached_grant_wait, "A GrantData was unexpected by the dcache.")
+        cached_grant_wait := false
+      } .elsewhen (grantIsUncached) {
+        // TODO this requires that uncached accesses only take a single beat
+        val id = tl_out.d.bits.source
+        val req = uncachedReqs(id)
+        assert(uncachedInFlight(id), "An AccessAck was unexpected by the dcache.")
+        uncachedInFlight(id) := false
+        s2_data := tl_out.d.bits.data
        s2_req.cmd := req.cmd
        s2_req.typ := req.typ
        s2_req.tag := req.tag
        s2_req.addr := Cat(s1_paddr >> wordOffBits /* don't-care */, req.addr(wordOffBits-1, 0))
-    }
-    when (grantIsVoluntary) {
-      assert(release_ack_wait)
+      } .elsewhen (grantIsVoluntary) {
+        assert(release_ack_wait, "A ReleaseAck was unexpected by the dcache.")
        release_ack_wait := false
      }
    }
-  val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles)
-  when (io.mem.grant.fire() && refillDone) { cached_grant_wait := false }

    // data refill
-  val doRefillBeat = grantIsRefill && io.mem.grant.valid
+    val doRefillBeat = grantIsRefill && tl_out.d.valid
    dataArb.io.in(1).valid := doRefillBeat
    assert(dataArb.io.in(1).ready || !doRefillBeat)
    dataArb.io.in(1).bits.write := true
-  dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits
+    dataArb.io.in(1).bits.addr :=  s2_req.addr | d_address_inc
    dataArb.io.in(1).bits.way_en := s2_victim_way
-  dataArb.io.in(1).bits.wdata := io.mem.grant.bits.data
+    dataArb.io.in(1).bits.wdata := tl_out.d.bits.data
    dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes)
    // tag updates on refill
-  metaWriteArb.io.in(1).valid := refillDone
+    metaWriteArb.io.in(1).valid := grantIsCached && tl_out.d.fire() && d_last
    assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready)
    metaWriteArb.io.in(1).bits.way_en := s2_victim_way
    metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB)
-  metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd)
+    metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(s2_req.cmd, tl_out.d.bits.param)
    metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
    // don't accept uncached grants if there's a structural hazard on s2_data...
    val blockUncachedGrant = Reg(Bool())
    blockUncachedGrant := dataArb.io.out.valid
    when (grantIsUncached) {
-    io.mem.grant.ready := !(blockUncachedGrant || s1_valid)
+      tl_out.d.ready := !(blockUncachedGrant || s1_valid)
      // ...but insert bubble to guarantee grant's eventual forward progress
-    when (io.mem.grant.valid && !io.mem.grant.ready) {
+      when (tl_out.d.valid && !tl_out.d.ready) {
        io.cpu.req.ready := false
        dataArb.io.in(1).valid := true
        dataArb.io.in(1).bits.write := false
@@ -356,58 +356,72 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
      }
    }

-  // finish
-  fq.io.enq.valid := io.mem.grant.fire() && io.mem.grant.bits.requiresAck() && (!grantIsRefill || refillDone)
-  fq.io.enq.bits := io.mem.grant.bits.makeFinish()
-  io.mem.finish <> fq.io.deq
-  when (fq.io.enq.valid) { assert(fq.io.enq.ready) }
-  when (refillDone) { replacer.miss }
+    // Finish TileLink transaction by issuing a GrantAck
+    grantackq.io.enq.valid := tl_out.d.fire() && d_last && edge.hasFollowUp(tl_out.d.bits)
+    grantackq.io.enq.bits := edge.GrantAck(tl_out.d.bits)
+    tl_out.e <> grantackq.io.deq
+    assert(!grantackq.io.enq.valid || grantackq.io.enq.ready, "Too many Grants received by dcache.")
+    when (tl_out.d.fire() && d_last) { replacer.miss }

-  // probe
+    // Handle an incoming TileLink Probe message
    val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr)
-  metaReadArb.io.in(1).valid := io.mem.probe.valid && !block_probe
-  io.mem.probe.ready := metaReadArb.io.in(1).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit)
-  metaReadArb.io.in(1).bits.idx := io.mem.probe.bits.addr_block
+    metaReadArb.io.in(1).valid := tl_out.b.valid && !block_probe
+    tl_out.b.ready := metaReadArb.io.in(1).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit)
+    metaReadArb.io.in(1).bits.idx := tl_out.b.bits.address(idxMSB, idxLSB)
    metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays)

    // release
-  val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles)
-  val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback)
-  val releaseRejected = io.mem.release.valid && !io.mem.release.ready
+    val (writebackCount, writebackDone) = Counter(tl_out.c.fire() && inWriteback, refillCycles) //TODO firstlast?
+    val releaseDone = writebackDone || (tl_out.c.fire() && !inWriteback)
+    val releaseRejected = tl_out.c.valid && !tl_out.c.ready
    val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire())
    val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected)
    val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid))
-  io.mem.release.valid := s2_release_data_valid
-  io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits)
-  val voluntaryReleaseMessage = s2_victim_state.makeVoluntaryWriteback(UInt(maxUncachedInFlight - 1), UInt(0))
-  val voluntaryNewCoh = s2_victim_state.onCacheControl(M_FLUSH)
-  val probeResponseMessage = s2_probe_state.makeRelease(probe_bits)
-  val probeNewCoh = s2_probe_state.onProbe(probe_bits)
+
+    val voluntaryReleaseMessage = edge.Release(
+                                    fromSource = UInt(maxUncachedInFlight - 1),
+                                    toAddress = probe_bits.address,
+                                    lgSize = lgCacheBlockBytes,
+                                    shrinkPermissions = s2_shrink_param,
+                                    data = s2_data)._2
+
+    val probeResponseMessage = Mux(prb_ack_data,
+                                  edge.ProbeAck(
+                                    b = probe_bits,
+                                    reportPermissions = s2_report_param),
+                                  edge.ProbeAck(
+                                    b = probe_bits,
+                                    reportPermissions = s2_report_param,
+                                    data = s2_data))
+
+    tl_out.c.valid := s2_release_data_valid
+    tl_out.c.bits := voluntaryReleaseMessage // TODO was ClientMetadata.onReset.makeRelease(probe_bits) ... s2_victim_state ok?
    val newCoh = Wire(init = probeNewCoh)
    releaseWay := s2_probe_way
+
    when (s2_victimize && s2_victim_dirty) {
-    assert(!(s2_valid && s2_hit_state.isValid()))
+      assert(!(s2_valid && s2_hit_valid))
      release_state := s_voluntary_writeback
-    probe_bits.addr_block := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB))
+      probe_bits.address := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB)) << rowOffBits
    }
    when (s2_probe) {
-    when (s2_probe_state.requiresVoluntaryWriteback()) { release_state := s_probe_rep_dirty }
+      when (needs_vol_wb) { release_state := s_probe_rep_dirty }
      .elsewhen (s2_probe_state.isValid()) { release_state := s_probe_rep_clean }
      .otherwise {
-      io.mem.release.valid := true
+        tl_out.c.valid := true
        release_state := s_probe_rep_miss
      }
    }
    when (releaseDone) { release_state := s_ready }
    when (release_state.isOneOf(s_probe_rep_miss, s_probe_rep_clean)) {
-    io.mem.release.valid := true
+      tl_out.c.valid := true
    }
    when (release_state.isOneOf(s_probe_rep_clean, s_probe_rep_dirty)) {
-    io.mem.release.bits := probeResponseMessage
+      tl_out.c.bits := probeResponseMessage
      when (releaseDone) { release_state := s_probe_write_meta }
    }
    when (release_state.isOneOf(s_voluntary_writeback, s_voluntary_write_meta)) {
-    io.mem.release.bits := voluntaryReleaseMessage
+      tl_out.c.bits := voluntaryReleaseMessage
      newCoh := voluntaryNewCoh
      releaseWay := s2_victim_way
      when (releaseDone) {
@@ -415,21 +429,20 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
        release_ack_wait := true
      }
    }
-  when (s2_probe && !io.mem.release.fire()) { s1_nack := true }
-  io.mem.release.bits.addr_block := probe_bits.addr_block
-  io.mem.release.bits.addr_beat := writebackCount
-  io.mem.release.bits.data := s2_data
+    when (s2_probe && !tl_out.c.fire()) { s1_nack := true }
+    tl_out.c.bits.address := probe_bits.address
+    tl_out.c.bits.data := s2_data

    dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles
    dataArb.io.in(2).bits.write := false
-  dataArb.io.in(2).bits.addr := Cat(io.mem.release.bits.addr_block, releaseDataBeat(log2Up(refillCycles)-1,0)) << rowOffBits
+    dataArb.io.in(2).bits.addr := tl_out.c.bits.address | (releaseDataBeat(log2Up(refillCycles)-1,0) << rowOffBits)
    dataArb.io.in(2).bits.way_en := ~UInt(0, nWays)

    metaWriteArb.io.in(2).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta)
    metaWriteArb.io.in(2).bits.way_en := releaseWay
-  metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB)
+    metaWriteArb.io.in(2).bits.idx := tl_out.c.bits.address(idxMSB, idxLSB)
    metaWriteArb.io.in(2).bits.data.coh := newCoh
-  metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits)
+    metaWriteArb.io.in(2).bits.data.tag := tl_out.c.bits.address(paddrBits-1, untagBits)
    when (metaWriteArb.io.in(2).fire()) { release_state := s_ready }

    // cached response
@@ -440,7 +453,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
    io.cpu.ordered := !(s1_valid || s2_valid || cached_grant_wait || uncachedInFlight.asUInt.orR)

    // uncached response
-  io.cpu.replay_next := io.mem.grant.fire() && grantIsUncached
+    io.cpu.replay_next := tl_out.d.fire() && tl_out.d.bits.opcode <= AccessAckData
    val doUncachedResp = Reg(next = io.cpu.replay_next)
    when (doUncachedResp) {
      assert(!s2_valid_hit)
@@ -473,7 +486,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
    val flushed = Reg(init=Bool(true))
    val flushing = Reg(init=Bool(false))
    val flushCounter = Counter(nSets * nWays)
-  when (io.mem.acquire.fire() && !s2_uncached) { flushed := false }
+    when (tl_out.a.fire() && !s2_uncached) { flushed := false }
    when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
      io.cpu.s2_nack := !flushed
      when (!flushed) {
@@ -496,6 +509,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
      }
    }
  }
+}

 class ScratchpadSlavePort(implicit val p: Parameters) extends LazyModule with HasCoreParameters {
  val node = TLManagerNode(TLManagerPortParameters(
--- a/src/main/scala/rocket/nbdcache.scala
+++ b/src/main/scala/rocket/nbdcache.scala
@@ -4,10 +4,11 @@ package rocket

 import Chisel._
 import uncore.tilelink._
-import uncore.coherence._
+import uncore.tilelink2._
 import uncore.agents._
 import uncore.constants._
 import uncore.util._
+import diplomacy._
 import util._
 import Chisel.ImplicitConversions._
 import cde.{Parameters, Field}
@@ -19,11 +20,19 @@ case class DCacheConfig(

 case object DCacheKey extends Field[DCacheConfig]

-trait HasL1HellaCacheParameters extends HasL1CacheParameters {
-  val wordBits = xLen // really, xLen max fLen
+trait HasL1HellaCacheParameters extends HasCacheParameters with HasCoreParameters {
+  val outerDataBeats = p(TLKey(p(TLId))).dataBeats
+  val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat
+  val refillCyclesPerBeat = outerDataBits/rowBits
+  val refillCycles = refillCyclesPerBeat*outerDataBeats
+
+  val cacheBlockBytes = p(CacheBlockBytes)
+  val lgCacheBlockBytes = log2Up(cacheBlockBytes)
+
+  val wordBits = xLen // really, xLen max 
  val wordBytes = wordBits/8
  val wordOffBits = log2Up(wordBytes)
-  val beatBytes = p(CacheBlockBytes) / outerDataBeats
+  val beatBytes = cacheBlockBytes / outerDataBeats
  val beatWords = beatBytes / wordBytes
  val beatOffBits = log2Up(beatBytes)
  val idxMSB = untagBits-1
@@ -310,10 +319,10 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa
  rpq.io.enq.bits := io.req_bits
  rpq.io.deq.ready := (io.replay.ready && state === s_drain_rpq) || state === s_invalid

-  val coh_on_grant = req.old_meta.coh.onGrant(
-                          incoming = io.mem_grant.bits,
-                          pending = Mux(dirties_coh, M_XWR, req.cmd))
-  val coh_on_hit =  io.req_bits.old_meta.coh.onHit(io.req_bits.cmd)
+  val coh_on_grant = req.old_meta.coh.onGrant(UInt(0), UInt(0))
+                          //incoming = io.mem_grant.bits,
+                          //pending = Mux(dirties_coh, M_XWR, req.cmd))
+  val coh_on_hit =  coh_on_grant //io.req_bits.old_meta.coh.onHit(io.req_bits.cmd)

  when (state === s_drain_rpq && !rpq.io.deq.valid) {
    state := s_invalid
@@ -355,14 +364,14 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa
    req := io.req_bits
    dirties_coh := isWrite(io.req_bits.cmd)
    when (io.req_bits.tag_match) {
-      when(coh.isHit(io.req_bits.cmd)) { // set dirty bit
+      when(Bool(false)) { // TODO coh.isHit(io.req_bits.cmd)) { // set dirty bit
        state := s_meta_write_req
        new_coh_state := coh_on_hit
      }.otherwise { // upgrade permissions
        state := s_refill_req
      }
    }.otherwise { // writback if necessary and refill
-      state := Mux(coh.requiresVoluntaryWriteback(), s_wb_req, s_meta_clear)
+      //TODO state := Mux(coh.requiresVoluntaryWriteback(), s_wb_req, s_meta_clear)
    }
  }

@@ -390,22 +399,22 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa
  io.meta_write.valid := state.isOneOf(s_meta_write_req, s_meta_clear)
  io.meta_write.bits.idx := req_idx
  io.meta_write.bits.data.coh := Mux(state === s_meta_clear,
-                                      req.old_meta.coh.onCacheControl(M_FLUSH),
+                                      req.old_meta.coh.onCacheControl(M_FLUSH)._2,
                                      new_coh_state)
  io.meta_write.bits.data.tag := io.tag
  io.meta_write.bits.way_en := req.way_en

  io.wb_req.valid := state === s_wb_req
-  io.wb_req.bits := req.old_meta.coh.makeVoluntaryWriteback(
-                      client_xact_id = UInt(id),
-                      addr_block = Cat(req.old_meta.tag, req_idx))
+  //TODO io.wb_req.bits := req.old_meta.coh.makeVoluntaryWriteback(
+  //                    client_xact_id = UInt(id),
+  //                    addr_block = Cat(req.old_meta.tag, req_idx))
  io.wb_req.bits.way_en := req.way_en

  io.mem_req.valid := state === s_refill_req && fq.io.enq.ready
-  io.mem_req.bits := req.old_meta.coh.makeAcquire(
-                       addr_block = Cat(io.tag, req_idx),
-                       client_xact_id = Bits(id),
-                       op_code = req.cmd)
+  //TODO io.mem_req.bits := req.old_meta.coh.makeAcquire(
+  //                     addr_block = Cat(io.tag, req_idx),
+  //                     client_xact_id = Bits(id),
+  //                     op_code = req.cmd)

  io.meta_read.valid := state === s_drain_rpq
  io.meta_read.bits.idx := req_idx
@@ -669,10 +678,10 @@ class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) {

  val miss_coh = ClientMetadata.onReset
  val reply_coh = Mux(tag_matches, old_coh, miss_coh)
-  val reply = reply_coh.makeRelease(req)
+  //TODO val reply = reply_coh.makeRelease(req)
  io.req.ready := state === s_invalid
  io.rep.valid := state === s_release
-  io.rep.bits := reply
+  //TODO io.rep.bits := reply

  assert(!io.rep.valid || !io.rep.bits.hasData(),
    "ProbeUnit should not send releases with data")
@@ -685,10 +694,10 @@ class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) {
  io.meta_write.bits.way_en := way_en
  io.meta_write.bits.idx := req.addr_block
  io.meta_write.bits.data.tag := req.addr_block >> idxBits
-  io.meta_write.bits.data.coh := old_coh.onProbe(req)
+  //TODO io.meta_write.bits.data.coh := old_coh.onProbe(req)

  io.wb_req.valid := state === s_writeback_req
-  io.wb_req.bits := reply
+  //TODO io.wb_req.bits := reply
  io.wb_req.bits.way_en := way_en

  // state === s_invalid
@@ -716,7 +725,7 @@ class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) {
  }

  when (state === s_mshr_resp) {
-    val needs_writeback = tag_matches && old_coh.requiresVoluntaryWriteback() 
+    val needs_writeback = tag_matches // TODO && old_coh.requiresVoluntaryWriteback() 
    state := Mux(needs_writeback, s_writeback_req, s_release)
  }

@@ -912,9 +921,8 @@ class HellaCache(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCache
  val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en)
  val s2_tag_match = s2_tag_match_way.orR
  val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en)))
-  val s2_hit = s2_tag_match && 
-                s2_hit_state.isHit(s2_req.cmd) && 
-                s2_hit_state === s2_hit_state.onHit(s2_req.cmd)
+  val (s2_has_permission, s2_grow_param, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd)
+  val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state

  // load-reserved/store-conditional
  val lrsc_count = Reg(init=UInt(0))
@@ -1236,7 +1244,7 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module
 }

 object HellaCache {
-  def apply(cfg: DCacheConfig)(implicit p: Parameters) =
-    if (cfg.nMSHRs == 0) Module(new DCache()).io
-    else Module(new HellaCache(cfg)).io
+  def apply(cfg: DCacheConfig)(implicit p: Parameters) = LazyModule(new DCache)
+  //  if (cfg.nMSHRs == 0) Module(new DCache()).io
+  //  else Module(new HellaCache(cfg)).io
 }
--- a/src/main/scala/rocket/tile.scala
+++ b/src/main/scala/rocket/tile.scala
@@ -11,10 +11,9 @@ import uncore.converters._
 import uncore.devices._
 import util._
 import cde.{Parameters, Field}
+import scala.collection.mutable.ListBuffer

 case object BuildRoCC extends Field[Seq[RoccParameters]]
-case object NCachedTileLinkPorts extends Field[Int]
-case object NUncachedTileLinkPorts extends Field[Int]
 case object TileId extends Field[Int]

 case class RoccParameters(
@@ -24,61 +23,38 @@ case class RoccParameters(
  nPTWPorts : Int = 0,
  useFPU: Boolean = false)

-case class TileBundleConfig(
-  nCachedTileLinkPorts: Int,
-  nUncachedTileLinkPorts: Int,
-  xLen: Int)
-
-class TileIO(c: TileBundleConfig, node: Option[TLInwardNode] = None)(implicit p: Parameters) extends Bundle {
-  val cached = Vec(c.nCachedTileLinkPorts, new ClientTileLinkIO)
-  val uncached = Vec(c.nUncachedTileLinkPorts, new ClientUncachedTileLinkIO)
-  val hartid = UInt(INPUT, c.xLen)
-  val interrupts = new TileInterrupts().asInput
-  val slave = node.map(_.inward.bundleIn)
-  val resetVector = UInt(INPUT, c.xLen)
-
-  override def cloneType = new TileIO(c).asInstanceOf[this.type]
-}
-
-abstract class TileImp(l: LazyTile)(implicit val p: Parameters) extends LazyModuleImp(l) {
-  val io: TileIO
-}
-
-abstract class LazyTile(implicit p: Parameters) extends LazyModule {
-  val nCachedTileLinkPorts = p(NCachedTileLinkPorts)
-  val nUncachedTileLinkPorts = p(NUncachedTileLinkPorts)
+class RocketTile(implicit p: Parameters) extends LazyModule {
  val dcacheParams = p.alterPartial({ case CacheName => "L1D" })
-  val bc = TileBundleConfig(
-    nCachedTileLinkPorts = nCachedTileLinkPorts,
-    nUncachedTileLinkPorts = nUncachedTileLinkPorts,
-    xLen = p(XLen))
+  val icacheParams = p.alterPartial({ case CacheName => "L1I" })

-  val module: TileImp
-  val slave: Option[TLInputNode]
+  val slaveNode = if (p(DataScratchpadSize) == 0) None else Some(TLInputNode())
+  val scratch = if (p(DataScratchpadSize) == 0) None else Some(LazyModule(new ScratchpadSlavePort()(dcacheParams)))
+  val dcache = HellaCache(p(DCacheKey))(dcacheParams)
+  val ucLegacy = LazyModule(new TLLegacy()(p))
+
+  (slaveNode zip scratch) foreach { case (node, lm) => lm.node := TLFragmenter(p(XLen)/8, p(CacheBlockBytes))(node) }
+  
+  lazy val module = new LazyModuleImp(this) {
+    val io = new Bundle {
+      val cached = dcache.node.bundleOut
+      val uncached = ucLegacy.node.bundleOut
+      val slave = slaveNode.map(_.bundleIn)
+      val hartid = UInt(INPUT, p(XLen))
+      val interrupts = new TileInterrupts().asInput
+      val resetVector = UInt(INPUT, p(XLen))
    }

-class RocketTile(implicit p: Parameters) extends LazyTile {
-  val slave = if (p(DataScratchpadSize) == 0) None else Some(TLInputNode())
-  val scratch = if (p(DataScratchpadSize) == 0) None else Some(LazyModule(new ScratchpadSlavePort()(dcacheParams)))
-
-  (slave zip scratch) foreach { case (node, lm) => lm.node := TLFragmenter(p(XLen)/8, p(CacheBlockBytes))(node) }
-
-  lazy val module = new TileImp(this) {
-    val io = new TileIO(bc, slave)
    val buildRocc = p(BuildRoCC)
    val usingRocc = !buildRocc.isEmpty
    val nRocc = buildRocc.size
    val nFPUPorts = buildRocc.filter(_.useFPU).size

    val core = Module(new Rocket)
-    val icache = Module(new Frontend()(p.alterPartial({ case CacheName => "L1I" })))
-    val dcache = HellaCache(p(DCacheKey))(dcacheParams)
+    val icache = Module(new Frontend()(icacheParams))

-    val ptwPorts = collection.mutable.ArrayBuffer(icache.io.ptw, dcache.ptw)
-    val dcPorts = collection.mutable.ArrayBuffer(core.io.dmem)
-    val uncachedArbPorts = collection.mutable.ArrayBuffer(icache.io.mem)
-    val uncachedPorts = collection.mutable.ArrayBuffer[ClientUncachedTileLinkIO]()
-    val cachedPorts = collection.mutable.ArrayBuffer(dcache.mem)
+    val ptwPorts = ListBuffer(icache.io.ptw, dcache.module.io.ptw)
+    val dcPorts = ListBuffer(core.io.dmem)
+    val uncachedArbPorts = ListBuffer(icache.io.mem)
    core.io.interrupts := io.interrupts
    core.io.hartid := io.hartid
    icache.io.cpu <> core.io.imem
@@ -129,19 +105,12 @@ class RocketTile(implicit p: Parameters) extends LazyTile {
      respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp))

      ptwPorts ++= roccs.flatMap(_.io.ptw)
-      uncachedPorts ++= roccs.flatMap(_.io.utl)
+      uncachedArbPorts ++= roccs.flatMap(_.io.utl) // TODO no difference between io.autl and io.utl for now
    }

    val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size))
    uncachedArb.io.in <> uncachedArbPorts
-    uncachedArb.io.out +=: uncachedPorts
-
-    // Connect the caches and RoCC to the outer memory system
-    io.uncached <> uncachedPorts
-    io.cached <> cachedPorts
-    // TODO remove nCached/nUncachedTileLinkPorts parameters and these assertions
-    require(uncachedPorts.size == nUncachedTileLinkPorts)
-    require(cachedPorts.size == nCachedTileLinkPorts)
+    ucLegacy.module.io.legacy <> uncachedArb.io.out

    if (p(UseVM)) {
      val ptw = Module(new PTW(ptwPorts.size)(dcacheParams))
@@ -155,7 +124,7 @@ class RocketTile(implicit p: Parameters) extends LazyTile {
    require(dcPorts.size == core.dcacheArbPorts)
    val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams))
    dcArb.io.requestor <> dcPorts
-    dcache.cpu <> dcArb.io.mem
+    dcache.module.io.cpu <> dcArb.io.mem

    if (nFPUPorts == 0) {
      fpuOpt.foreach { fpu =>
--- a/src/main/scala/uncore/agents/Cache.scala
+++ b/src/main/scala/uncore/agents/Cache.scala
@@ -130,7 +130,6 @@ class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy {

 abstract class Metadata(implicit p: Parameters) extends CacheBundle()(p) {
  val tag = Bits(width = tagBits)
-  val coh: CoherenceMetadata
 }

 class MetaReadReq(implicit p: Parameters) extends CacheBundle()(p) {
--- a/src/main/scala/uncore/tilelink2/Bundles.scala
+++ b/src/main/scala/uncore/tilelink2/Bundles.scala
@@ -43,6 +43,14 @@ object TLMessages
  def isD(x: UInt) = x <= ReleaseAck
 }

+/**
+  * The three primary TileLink permissions are:
+  *   (T)runk: the agent is (or is on the path to) the global point of serialization.
+  *   (B)ranch: the agent 
+  *   (N)one: 
+  * These permissions are permuted by transfer operations in various ways.
+  * Messages for 
+  */
 object TLPermissions
 {
  // Cap types (Grant = new permissions, Probe = permisions <= target)
--- a/src/main/scala/uncore/tilelink2/Edges.scala
+++ b/src/main/scala/uncore/tilelink2/Edges.scala
@@ -238,7 +238,10 @@ class TLEdgeOut(
    (legal, c)
  }

-  def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt) = {
+  def ProbeAck(b: TLBundleB, reportPermissions: UInt): TLBundleC =
+    ProbeAck(b.source, b.address, b.size, reportPermissions)
+
+  def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt): TLBundleC = {
    val c = Wire(new TLBundleC(bundle))
    c.opcode  := TLMessages.ProbeAck
    c.param   := reportPermissions
@@ -250,7 +253,10 @@ class TLEdgeOut(
    c
  }

-  def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt, data: UInt) = {
+  def ProbeAck(b: TLBundleB, reportPermissions: UInt, data: UInt): TLBundleC =
+    ProbeAck(b.source, b.address, b.size, reportPermissions, data)
+
+  def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt, data: UInt): TLBundleC = {
    val c = Wire(new TLBundleC(bundle))
    c.opcode  := TLMessages.ProbeAckData
    c.param   := reportPermissions
@@ -262,7 +268,8 @@ class TLEdgeOut(
    c
  }

-  def GrantAck(toSink: UInt) = {
+  def GrantAck(d: TLBundleD): TLBundleE = GrantAck(d.sink)
+  def GrantAck(toSink: UInt): TLBundleE = {
    val e = Wire(new TLBundleE(bundle))
    e.sink := toSink
    e
--- a/src/main/scala/uncore/tilelink2/Metadata.scala
+++ b/src/main/scala/uncore/tilelink2/Metadata.scala
@@ -0,0 +1,149 @@
+// See LICENSE for license details.
+
+package uncore.tilelink2
+
+import Chisel._
+import chisel3.internal.sourceinfo.SourceInfo
+import util._
+import uncore.constants.MemoryOpConstants
+
+object ClientStates {
+  val width = 2
+
+  val Nothing = UInt(0) 
+  val Branch  = UInt(1)
+  val Trunk   = UInt(2)
+  val Dirty   = UInt(3)
+
+  def hasReadPermission(state: UInt): Bool = state > Nothing
+  def hasWritePermission(state: UInt): Bool = state > Branch
+}
+
+object MemoryOpCategories extends MemoryOpConstants {
+  val wr = Cat(Bool(true), Bool(true))   // Op actually writes
+  val wi = Cat(Bool(false), Bool(true))  // Future op will write
+  val rd = Cat(Bool(false), Bool(false)) // Op only reads
+
+  def categorize(cmd: UInt): UInt = Cat(isWrite(cmd), isWriteIntent(cmd))
+}
+
+/** Stores the client-side coherence information,
+  * such as permissions on the data and whether the data is dirty.
+  * Its API can be used to make TileLink messages in response to
+  * memory operations, cache control oeprations, or Probe messages.
+  */
+class ClientMetadata extends Bundle {
+  /** Actual state information stored in this bundle */
+  val state = UInt(width = ClientStates.width)
+
+  /** Metadata equality */
+  def ===(rhs: UInt): Bool = state === rhs
+  def ===(rhs: ClientMetadata): Bool = state === rhs.state
+  def =/=(rhs: ClientMetadata): Bool = !this.===(rhs)
+
+  /** Is the block's data present in this cache */
+  def isValid(dummy: Int = 0): Bool = state > ClientStates.Nothing
+
+  /** Determine whether this cmd misses, and the new state (on hit) or param to be sent (on miss) */
+  private def growStarter(cmd: UInt): (Bool, UInt) = {
+    import MemoryOpCategories._
+    import TLPermissions._
+    import ClientStates._
+    MuxTLookup(Cat(categorize(cmd), state), (Bool(false), UInt(0)), Seq(
+    //(effect, am now) -> (was a hit,   next)
+      Cat(rd, Dirty)   -> (Bool(true),  Dirty),
+      Cat(rd, Trunk)   -> (Bool(true),  Trunk),
+      Cat(rd, Branch)  -> (Bool(true),  Branch),
+      Cat(wi, Dirty)   -> (Bool(true),  Dirty),
+      Cat(wi, Trunk)   -> (Bool(true),  Trunk),
+      Cat(wr, Dirty)   -> (Bool(true),  Dirty),
+      Cat(wr, Trunk)   -> (Bool(true),  Dirty),
+    //(effect, am now) -> (was a miss,  param)
+      Cat(rd, Nothing) -> (Bool(false), NtoB),
+      Cat(wi, Branch)  -> (Bool(false), BtoT),
+      Cat(wi, Nothing) -> (Bool(false), NtoT),
+      Cat(wr, Branch)  -> (Bool(false), BtoT),
+      Cat(wr, Nothing) -> (Bool(false), NtoT)))
+  }
+
+  /** Determine what state to go to after miss based on Grant param */
+  private def growFinisher(cmd: UInt, param: UInt): UInt = {
+    import MemoryOpCategories._
+    import TLPermissions._
+    import ClientStates._
+    MuxLookup(Cat(categorize(cmd), param), UInt(0), Seq(
+    //(effect param) -> (next)
+      Cat(rd, toB)   -> Branch,
+      Cat(rd, toT)   -> Trunk,
+      Cat(wi, toT)   -> Trunk,
+      Cat(wr, toT)   -> Dirty))
+  }
+
+
+  /** Does a secondary miss on the block require another Acquire message */
+  def requiresAcquireOnSecondaryMiss(first_cmd: UInt, second_cmd: UInt): Bool = {
+    import MemoryOpCategories._
+    isWriteIntent(second_cmd) && !isWriteIntent(first_cmd)
+  }
+
+  /** Does this cache have permissions on this block sufficient to perform op,
+    * and what to do next (Acquire message param or updated metadata). */
+  def onAccess(cmd: UInt): (Bool, UInt, ClientMetadata) = {
+    val r = growStarter(cmd)
+    (r._1, r._2, ClientMetadata(r._2))
+  }
+
+  /** Metadata change on a returned Grant */
+  def onGrant(cmd: UInt, param: UInt): ClientMetadata = ClientMetadata(growFinisher(cmd, param))
+
+  /** Determine what state to go to based on Probe param */
+  private def shrinkHelper(param: UInt): (Bool, UInt, UInt) = {
+    import ClientStates._
+    import TLPermissions._
+    MuxTLookup(Cat(param, state), (Bool(false), UInt(0), UInt(0)), Seq(
+    //(wanted, am now)  -> (dirtyWB      resp, next)
+      Cat(toT, Dirty)   -> (Bool(true),  TtoT, Trunk),
+      Cat(toT, Trunk)   -> (Bool(false), TtoT, Trunk),
+      Cat(toT, Branch)  -> (Bool(false), BtoB, Branch),
+      Cat(toT, Nothing) -> (Bool(false), NtoN, Nothing),
+      Cat(toB, Dirty)   -> (Bool(true),  TtoB, Branch),
+      Cat(toB, Trunk)   -> (Bool(false), TtoB, Branch),  // Policy: Don't notify on clean downgrade
+      Cat(toB, Branch)  -> (Bool(false), BtoB, Branch),
+      Cat(toB, Nothing) -> (Bool(false), BtoN, Nothing),
+      Cat(toN, Dirty)   -> (Bool(true),  TtoN, Nothing),
+      Cat(toN, Trunk)   -> (Bool(false), TtoN, Nothing), // Policy: Don't notify on clean downgrade
+      Cat(toN, Branch)  -> (Bool(false), BtoN, Nothing), // Policy: Don't notify on clean downgrade
+      Cat(toN, Nothing) -> (Bool(false), NtoN, Nothing)))
+  }
+
+  /** Translate cache control cmds into Probe param */
+  private def cmdToPermCap(cmd: UInt): UInt = {
+    import MemoryOpCategories._
+    import TLPermissions._
+    MuxLookup(cmd, toN, Seq(
+      M_FLUSH   -> toN,
+      M_PRODUCE -> toB,
+      M_CLEAN   -> toT))
+  }
+
+  def onCacheControl(cmd: UInt): (Bool, UInt, ClientMetadata) = {
+    val r = shrinkHelper(cmdToPermCap(cmd))
+    (r._1, r._2, ClientMetadata(r._3))
+  }
+
+  def onProbe(param: UInt): (Bool, UInt, ClientMetadata) = { 
+    val r = shrinkHelper(param)
+    (Bool(true), r._2, ClientMetadata(r._3))
+  }
+}
+
+/** Factories for ClientMetadata, including on reset */
+object ClientMetadata {
+  def apply(perm: UInt) = {
+    val meta = Wire(new ClientMetadata)
+    meta.state := perm
+    meta
+  }
+  def onReset = ClientMetadata(ClientStates.Nothing)
+  def maximum = ClientMetadata(ClientStates.Dirty)
+}
--- a/src/main/scala/util/Misc.scala
+++ b/src/main/scala/util/Misc.scala
@@ -36,6 +36,23 @@ object MuxT {
    (Mux(cond, con._1, alt._1), Mux(cond, con._2, alt._2), Mux(cond, con._3, alt._3))
 }

+/** Creates a cascade of n MuxTs to search for a key value. */
+object MuxTLookup {
+  def apply[S <: UInt, T <: Data, U <: Data](key: S, default: (T, U), mapping: Seq[(S, (T, U))]): (T, U) = {
+    var res = default
+    for ((k, v) <- mapping.reverse)
+      res = MuxT(k === key, v, res)
+    res
+  }
+
+  def apply[S <: UInt, T <: Data, U <: Data, W <: Data](key: S, default: (T, U, W), mapping: Seq[(S, (T, U, W))]): (T, U, W) = {
+    var res = default
+    for ((k, v) <- mapping.reverse)
+      res = MuxT(k === key, v, res)
+    res
+  }
+}
+
 object Str
 {
  def apply(s: String): UInt = {