reorganize moving non-submodule packages into src/main/scala

2016-08-19 10:58:56 -07:00
parent f78da0b0ea
commit 7b20609d4d
110 changed files with 3 additions and 381 deletions
--- a/src/main/scala/groundtest/BusMasterTest.scala
+++ b/src/main/scala/groundtest/BusMasterTest.scala
@ -0,0 +1,115 @@
+package groundtest
+
+import Chisel._
+import uncore.tilelink._
+import uncore.agents._
+import uncore.coherence.{InnerTLId, OuterTLId}
+import uncore.util._
+import junctions.HasAddrMapParameters
+import cde.Parameters
+
+/**
+ * An example bus mastering devices that writes some preset data to memory.
+ * When it receives an MMIO put request, it starts writing out the data.
+ * When it receives an MMIO get request, it responds with the progress of
+ * the write. A grant data of 1 means it is still writing, grant data 0 
+ * means it has finished.
+ */
+class ExampleBusMaster(implicit val p: Parameters) extends Module
+    with HasAddrMapParameters
+    with HasTileLinkParameters {
+  val mmioParams = p.alterPartial({ case TLId => p(InnerTLId) })
+  val memParams = p.alterPartial({ case TLId => p(OuterTLId) })
+  val memStart = addrMap("mem").start
+  val memStartBlock = memStart >> p(CacheBlockOffsetBits)
+
+  val io = new Bundle {
+    val mmio = new ClientUncachedTileLinkIO()(mmioParams).flip
+    val mem = new ClientUncachedTileLinkIO()(memParams)
+  }
+
+  val s_idle :: s_put :: s_resp :: Nil = Enum(Bits(), 3)
+  val state = Reg(init = s_idle)
+  val send_resp = Reg(init = Bool(false))
+  val r_acq = Reg(new AcquireMetadata)
+
+  io.mmio.acquire.ready := !send_resp
+  io.mmio.grant.valid := send_resp
+  io.mmio.grant.bits := Grant(
+    is_builtin_type = Bool(true),
+    g_type = r_acq.getBuiltInGrantType(),
+    client_xact_id = r_acq.client_xact_id,
+    manager_xact_id = UInt(0),
+    addr_beat = r_acq.addr_beat,
+    data = Mux(state === s_idle, UInt(0), UInt(1)))
+
+  when (io.mmio.acquire.fire()) {
+    send_resp := Bool(true)
+    r_acq := io.mmio.acquire.bits
+    when (state === s_idle && io.mmio.acquire.bits.hasData()) { state := s_put }
+  }
+  when (io.mmio.grant.fire()) { send_resp := Bool(false) }
+
+  val (put_beat, put_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
+  when (put_done) { state := s_resp }
+  when (io.mem.grant.fire()) { state := s_idle }
+
+  io.mem.acquire.valid := state === s_put
+  io.mem.acquire.bits := PutBlock(
+    client_xact_id = UInt(0),
+    addr_block = UInt(memStartBlock),
+    addr_beat = put_beat,
+    data = put_beat)
+  io.mem.grant.ready := state === s_resp
+}
+
+class BusMasterTest(implicit p: Parameters) extends GroundTest()(p)
+    with HasTileLinkParameters {
+  val (s_idle :: s_req_start :: s_resp_start :: s_req_poll :: s_resp_poll ::
+       s_req_check :: s_resp_check :: s_done :: Nil) = Enum(Bits(), 8)
+  val state = Reg(init = s_idle)
+
+  val busMasterBlock = addrMap("io:ext:busmaster").start >> p(CacheBlockOffsetBits)
+  val start_acq = Put(
+    client_xact_id = UInt(0),
+    addr_block = UInt(busMasterBlock),
+    addr_beat = UInt(0),
+    data = UInt(1))
+  val poll_acq = Get(
+    client_xact_id = UInt(0),
+    addr_block = UInt(busMasterBlock),
+    addr_beat = UInt(0))
+  val check_acq = GetBlock(
+    client_xact_id = UInt(0),
+    addr_block = UInt(memStartBlock))
+
+  val acq = io.mem.head.acquire
+  val gnt = io.mem.head.grant
+
+  acq.valid := state.isOneOf(s_req_start, s_req_poll, s_req_check)
+  acq.bits := MuxLookup(state, check_acq, Seq(
+    s_req_start -> start_acq,
+    s_req_poll -> poll_acq))
+  gnt.ready := state.isOneOf(s_resp_start, s_resp_poll, s_resp_check)
+
+  val (get_beat, get_done) = Counter(
+    state === s_resp_check && gnt.valid, tlDataBeats)
+
+  when (state === s_idle) { state := s_req_start }
+  when (state === s_req_start && acq.ready) { state := s_resp_start }
+  when (state === s_resp_start && gnt.valid) { state := s_req_poll }
+  when (state === s_req_poll && acq.ready) { state := s_resp_poll }
+  when (state === s_resp_poll && gnt.valid) {
+    when (gnt.bits.data === UInt(0)) {
+      state := s_req_check
+    } .otherwise { state := s_req_poll }
+  }
+  when (state === s_req_check && acq.ready) { state := s_resp_check }
+  when (get_done) { state := s_done }
+
+  io.status.finished := state === s_done
+
+  assert(state =/= s_resp_check || !gnt.valid ||
+         gnt.bits.data === get_beat,
+         "BusMasterTest: data does not match")
+}
--- a/src/main/scala/groundtest/CacheFillTest.scala
+++ b/src/main/scala/groundtest/CacheFillTest.scala
@ -0,0 +1,50 @@
+package groundtest
+
+import Chisel._
+import uncore.tilelink._
+import uncore.constants._
+import uncore.agents._
+import uncore.util._
+import cde.{Parameters, Field}
+
+class CacheFillTest(implicit p: Parameters) extends GroundTest()(p)
+    with HasTileLinkParameters {
+  val capacityKb: Int = p("L2_CAPACITY_IN_KB")
+  val nblocks = capacityKb * 1024 / p(CacheBlockBytes)
+  val s_start :: s_prefetch :: s_retrieve :: s_finished :: Nil = Enum(Bits(), 4)
+  val state = Reg(init = s_start)
+
+  val active = state.isOneOf(s_prefetch, s_retrieve)
+
+  val xact_pending = Reg(init = UInt(0, tlMaxClientXacts))
+  val xact_id = PriorityEncoder(~xact_pending)
+
+  val (req_block, round_done) = Counter(io.mem.head.acquire.fire(), nblocks)
+
+  io.mem.head.acquire.valid := active && !xact_pending.andR
+  io.mem.head.acquire.bits := Mux(state === s_prefetch,
+    GetPrefetch(xact_id, UInt(memStartBlock) + req_block),
+    GetBlock(xact_id, UInt(memStartBlock) + req_block))
+  io.mem.head.grant.ready := xact_pending.orR
+
+  def add_pending(acq: DecoupledIO[Acquire]): UInt =
+    Mux(acq.fire(), UIntToOH(acq.bits.client_xact_id), UInt(0))
+
+  def remove_pending(gnt: DecoupledIO[Grant]): UInt = {
+    val last_grant = !gnt.bits.hasMultibeatData() ||
+                      gnt.bits.addr_beat === UInt(tlDataBeats - 1)
+    ~Mux(gnt.fire() && last_grant, UIntToOH(gnt.bits.client_xact_id), UInt(0))
+  }
+
+  xact_pending := (xact_pending |
+    add_pending(io.mem.head.acquire)) &
+    remove_pending(io.mem.head.grant)
+
+  when (state === s_start) { state := s_prefetch }
+  when (state === s_prefetch && round_done) { state := s_retrieve }
+  when (state === s_retrieve && round_done) { state := s_finished }
+
+  io.status.finished := (state === s_finished)
+  io.status.timeout.valid := Bool(false)
+  io.status.error.valid := Bool(false)
+}
--- a/src/main/scala/groundtest/Comparator.scala
+++ b/src/main/scala/groundtest/Comparator.scala
@ -0,0 +1,387 @@
+package groundtest
+
+import Chisel._
+import uncore.tilelink._
+import uncore.constants._
+import junctions._
+import rocket._
+import scala.util.Random
+import cde.{Parameters, Field}
+
+case class ComparatorParameters(
+  targets:    Seq[Long], 
+  width:      Int,
+  operations: Int,
+  atomics:    Boolean,
+  prefetches: Boolean)
+case object ComparatorKey extends Field[ComparatorParameters]
+
+trait HasComparatorParameters {
+  implicit val p: Parameters
+  val comparatorParams = p(ComparatorKey)
+  val targets     = comparatorParams.targets
+  val nTargets    = targets.size
+  val targetWidth = comparatorParams.width
+  val nOperations = comparatorParams.operations
+  val atomics     = comparatorParams.atomics
+  val prefetches  = comparatorParams.prefetches
+}
+
+object LFSR64
+{
+  private var counter = 0
+  private def next: Int = {
+    counter += 1
+    counter
+  }
+  
+  def apply(increment: Bool = Bool(true), seed: Int = next): UInt =
+  {
+    val wide = 64
+    val lfsr = RegInit(UInt((seed * 0xDEADBEEFCAFEBAB1L) >>> 1, width = wide))
+    val xor = lfsr(0) ^ lfsr(1) ^ lfsr(3) ^ lfsr(4)
+    when (increment) { lfsr := Cat(xor, lfsr(wide-1,1)) }
+    lfsr
+  }
+}
+
+object NoiseMaker
+{
+  def apply(wide: Int, increment: Bool = Bool(true)): UInt = {
+    val lfsrs = Seq.fill((wide+63)/64) { LFSR64(increment) }
+    Cat(lfsrs)(wide-1,0)
+  }
+}
+
+object MaskMaker
+{
+  def apply(wide: Int, bits: UInt): UInt = 
+    Vec.tabulate(wide) {UInt(_) < bits} .asUInt
+}
+
+class ComparatorSource(implicit val p: Parameters) extends Module
+    with HasComparatorParameters
+    with HasTileLinkParameters
+{
+  val io = new Bundle {
+    val out = Decoupled(new Acquire)
+    val finished = Bool(OUTPUT)
+  }
+  
+  // Output exactly nOperations of Acquires
+  val finished = RegInit(Bool(false))
+  val valid    = RegInit(Bool(false))
+  
+  valid := Bool(true)
+  
+  io.finished  := finished
+  io.out.valid := !finished && valid
+  
+  // Generate random operand sizes
+  val inc = io.out.fire()
+  val raw_operand_size = NoiseMaker(2, inc) | UInt(0, M_SZ)
+  val max_operand_size = UInt(log2Up(tlDataBytes))
+  val get_operand_size = Mux(raw_operand_size > max_operand_size, max_operand_size, raw_operand_size)
+  val atomic_operand_size = UInt(2) + NoiseMaker(1, inc) // word or dword
+  
+  // Generate random, but valid addr_bytes
+  val raw_addr_byte = NoiseMaker(tlByteAddrBits, inc)
+  val get_addr_byte    = raw_addr_byte & ~MaskMaker(tlByteAddrBits, get_operand_size)
+  val atomic_addr_byte = raw_addr_byte & ~MaskMaker(tlByteAddrBits, atomic_operand_size)
+  
+  // Only allow some of the possible choices (M_XA_MAXU untested)
+  val atomic_opcode = MuxLookup(NoiseMaker(3, inc), M_XA_SWAP, Array(
+    UInt("b000") -> M_XA_ADD,
+    UInt("b001") -> M_XA_XOR,
+    UInt("b010") -> M_XA_OR,
+    UInt("b011") -> M_XA_AND,
+    UInt("b100") -> M_XA_MIN,
+    UInt("b101") -> M_XA_MAX,
+    UInt("b110") -> M_XA_MINU,
+    UInt("b111") -> M_XA_SWAP))
+  
+  // Addr_block range
+  val addr_block_mask = MaskMaker(tlBlockAddrBits, UInt(targetWidth-tlBeatAddrBits-tlByteAddrBits))
+  
+  // Generate some random values
+  val addr_block = NoiseMaker(tlBlockAddrBits, inc) & addr_block_mask
+  val addr_beat  = NoiseMaker(tlBeatAddrBits, inc)
+  val wmask      = NoiseMaker(tlDataBytes, inc)
+  val data       = NoiseMaker(tlDataBits, inc)
+  val client_xact_id = UInt(0) // filled by Client
+  
+  // Random transactions
+  val get         = Get(client_xact_id, addr_block, addr_beat, get_addr_byte, get_operand_size, Bool(false))
+  val getBlock    = GetBlock(client_xact_id, addr_block)
+  val put         = Put(client_xact_id, addr_block, addr_beat, data, Some(wmask))
+  val putBlock    = PutBlock(client_xact_id, addr_block, UInt(0), data)
+  val putAtomic   = if (atomics)
+    PutAtomic(client_xact_id, addr_block, addr_beat,
+      atomic_addr_byte, atomic_opcode, atomic_operand_size, data)
+    else put
+  val putPrefetch = if (prefetches)
+    PutPrefetch(client_xact_id, addr_block)
+    else put
+  val getPrefetch = if (prefetches)
+    GetPrefetch(client_xact_id, addr_block)
+    else get
+  val a_type_sel  = NoiseMaker(3, inc)
+
+  // We must initially putBlock all of memory to have a consistent starting state
+  val final_addr_block = addr_block_mask + UInt(1)
+  val wipe_addr_block  = RegInit(UInt(0, width = tlBlockAddrBits))
+  val done_wipe        = wipe_addr_block === final_addr_block
+
+  io.out.bits := Mux(!done_wipe,
+    // Override whatever else we were going to do if we are wiping
+    PutBlock(client_xact_id, wipe_addr_block, UInt(0), data),
+    // Generate a random a_type
+    MuxLookup(a_type_sel, get, Array(
+      UInt("b000") -> get,
+      UInt("b001") -> getBlock,
+      UInt("b010") -> put,
+      UInt("b011") -> putBlock,
+      UInt("b100") -> putAtomic,
+      UInt("b101") -> getPrefetch,
+      UInt("b110") -> putPrefetch)))
+  
+  val idx = Reg(init = UInt(0, log2Up(nOperations)))
+  when (io.out.fire()) {
+    when (idx === UInt(nOperations - 1)) { finished := Bool(true) }
+    when (!done_wipe) {
+      printf("[acq %d]: PutBlock(addr_block = %x, data = %x)\n",
+        idx, wipe_addr_block, data)
+      wipe_addr_block := wipe_addr_block + UInt(1)
+    } .otherwise {
+      switch (a_type_sel) {
+        is (UInt("b000")) {
+          printf("[acq %d]: Get(addr_block = %x, addr_beat = %x, addr_byte = %x, op_size = %x)\n",
+            idx, addr_block, addr_beat, get_addr_byte, get_operand_size)
+        }
+        is (UInt("b001")) {
+          printf("[acq %d]: GetBlock(addr_block = %x)\n", idx, addr_block)
+        }
+        is (UInt("b010")) {
+          printf("[acq %d]: Put(addr_block = %x, addr_beat = %x, data = %x, wmask = %x)\n",
+            idx, addr_block, addr_beat, data, wmask)
+        }
+        is (UInt("b011")) {
+          printf("[acq %d]: PutBlock(addr_block = %x, data = %x)\n", idx, addr_block, data)
+        }
+        is (UInt("b100")) {
+          if (atomics) {
+            printf("[acq %d]: PutAtomic(addr_block = %x, addr_beat = %x, addr_byte = %x, " +
+                   "opcode = %x, op_size = %x, data = %x)\n",
+                   idx, addr_block, addr_beat, atomic_addr_byte,
+                   atomic_opcode, atomic_operand_size, data)
+          } else {
+            printf("[acq %d]: Put(addr_block = %x, addr_beat = %x, data = %x, wmask = %x)\n",
+              idx, addr_block, addr_beat, data, wmask)
+          }
+        }
+        is (UInt("b101")) {
+          if (prefetches) {
+            printf("[acq %d]: GetPrefetch(addr_block = %x)\n", idx, addr_block)
+          } else {
+            printf("[acq %d]: Get(addr_block = %x, addr_beat = %x, addr_byte = %x, op_size = %x)\n",
+              idx, addr_block, addr_beat, get_addr_byte, get_operand_size)
+          }
+        }
+        is (UInt("b110")) {
+          if (prefetches) {
+            printf("[acq %d]: PutPrefetch(addr_block = %x)\n", idx, addr_block)
+          } else {
+            printf("[acq %d]: Put(addr_block = %x, addr_beat = %x, data = %x, wmask = %x)\n",
+              idx, addr_block, addr_beat, data, wmask)
+          }
+        }
+        is (UInt("b111")) {
+          printf("[acq %d]: Get(addr_block = %x, addr_beat = %x, addr_byte = %x, op_size = %x)\n",
+            idx, addr_block, addr_beat, get_addr_byte, get_operand_size)
+        }
+      }
+    }
+    idx := idx + UInt(1)
+  }
+}
+
+class ComparatorClient(val target: Long)(implicit val p: Parameters) extends Module
+    with HasComparatorParameters
+    with HasTileLinkParameters
+{
+  val io = new Bundle {
+    val in  = Decoupled(new Acquire).flip
+    val tl  = new ClientUncachedTileLinkIO()
+    val out = Decoupled(new Grant)
+    val finished = Bool(OUTPUT)
+    val timeout = Bool(OUTPUT)
+  }
+
+  val xacts = tlMaxClientXacts
+  val offset = (UInt(target) >> UInt(tlBeatAddrBits+tlByteAddrBits))
+
+  // Track the status of inflight requests
+  val issued  = RegInit(Vec.fill(xacts) {Bool(false)})
+  val ready   = RegInit(Vec.fill(xacts) {Bool(false)})
+  val result  = Reg(Vec(xacts, new Grant))
+  
+  val buffer = Queue(io.in, xacts)
+  val queue  = Module(new Queue(io.tl.acquire.bits.client_xact_id, xacts))
+  
+  val isMultiOut = buffer.bits.hasMultibeatData()
+  val isMultiIn  = io.tl.grant.bits.hasMultibeatData()
+  
+  val beatOut  = RegInit(UInt(0, width = tlBeatAddrBits))
+  val lastBeat = UInt(tlDataBeats-1)
+  val isFirstBeatOut= Mux(isMultiOut, beatOut === UInt(0),  Bool(true))
+  val isLastBeatOut = Mux(isMultiOut, beatOut === lastBeat, Bool(true))
+  val isLastBeatIn  = Mux(isMultiIn,  io.tl.grant.bits.addr_beat === lastBeat, Bool(true))
+  
+  // Remove this once HoldUnless is in chisel3
+  def holdUnless[T <: Data](in : T, enable: Bool): T = Mux(!enable, RegEnable(in, enable), in)
+
+  // Potentially issue a request, using a free xact id
+  // NOTE: we may retract valid and change xact_id on a !ready (allowed by spec)
+  val allow_acq = NoiseMaker(1)(0) && issued.map(!_).reduce(_ || _)
+  val xact_id   = holdUnless(PriorityEncoder(issued.map(!_)), isFirstBeatOut)
+  buffer.ready        := allow_acq && io.tl.acquire.ready && isLastBeatOut
+  io.tl.acquire.valid := allow_acq && buffer.valid
+  io.tl.acquire.bits  := buffer.bits
+  io.tl.acquire.bits.addr_block := buffer.bits.addr_block + offset
+  io.tl.acquire.bits.client_xact_id := xact_id
+  when (isMultiOut) {
+    val dataOut = (buffer.bits.data << beatOut) + buffer.bits.data // mix the data up a bit
+    io.tl.acquire.bits.addr_beat := beatOut
+    io.tl.acquire.bits.data := dataOut
+  }
+  
+  when (io.tl.acquire.fire()) {
+    issued(xact_id) := isLastBeatOut
+    when (isMultiOut) { beatOut := beatOut + UInt(1) }
+  }
+  
+  // Remember the xact ID so we can return results in-order
+  queue.io.enq.valid := io.tl.acquire.fire() && isLastBeatOut
+  queue.io.enq.bits  := xact_id
+  assert (queue.io.enq.ready || !queue.io.enq.valid) // should be big enough
+  
+  // Capture the results from the manager
+  io.tl.grant.ready := NoiseMaker(1)(0)
+  when (io.tl.grant.fire()) {
+    val id = io.tl.grant.bits.client_xact_id
+    assert (!ready(id)) // got same xact_id twice?
+    ready(id) := isLastBeatIn
+    result(id) := io.tl.grant.bits
+  }
+  
+  // Bad xact_id returned if ready but not issued!
+  assert ((ready zip issued) map {case (r,i) => i || !r} reduce (_ && _))
+  
+  // When we have the next grant result, send it to the sink
+  val next_id = queue.io.deq.bits
+  queue.io.deq.ready := io.out.ready && ready(next_id) // TODO: only compares last getBlock
+  io.out.valid := queue.io.deq.valid && ready(next_id)
+  io.out.bits  := result(queue.io.deq.bits)
+  
+  when (io.out.fire()) {
+    ready(next_id) := Bool(false)
+    issued(next_id) := Bool(false)
+  }
+  
+  io.finished := !buffer.valid && !issued.reduce(_ || _)
+
+  val (idx, acq_done) = Counter(
+    io.tl.acquire.fire() && io.tl.acquire.bits.last(), nOperations)
+  debug(idx)
+
+  val timer = Module(new Timer(8192, xacts))
+  timer.io.start.valid := io.tl.acquire.fire() && io.tl.acquire.bits.first()
+  timer.io.start.bits  := xact_id
+  timer.io.stop.valid  := io.tl.grant.fire() && io.tl.grant.bits.first()
+  timer.io.stop.bits   := io.tl.grant.bits.client_xact_id
+  assert(!timer.io.timeout.valid, "Comparator TL client timed out")
+  io.timeout := timer.io.timeout.valid
+}
+
+class ComparatorSink(implicit val p: Parameters) extends Module
+    with HasComparatorParameters
+    with HasTileLinkParameters
+    with HasGroundTestConstants
+{
+  val io = new Bundle {
+    val in = Vec(nTargets, Decoupled(new Grant)).flip
+    val finished = Bool(OUTPUT)
+    val error = Valid(UInt(width = errorCodeBits))
+  }
+  
+  // could use a smaller Queue here, but would couple targets flow controls together
+  val queues = io.in.map(Queue(_, nOperations))
+  
+  io.finished := queues.map(!_.valid).reduce(_ && _)
+  val all_valid = queues.map(_.valid).reduce(_ && _)
+  queues.foreach(_.ready := all_valid)
+  
+  val base = queues(0).bits
+  val idx = Reg(init = UInt(0, log2Up(nOperations)))
+
+  def check(g: Grant) = {
+    when (g.hasData() && base.data =/= g.data) {
+      printf("%d: %x =/= %x, g_type = %x\n", idx, base.data, g.data, g.g_type)
+    }
+
+    val assert_conds = Seq(
+      g.is_builtin_type,
+      base.g_type === g.g_type,
+      base.addr_beat === g.addr_beat || !g.hasData(),
+      base.data === g.data || !g.hasData())
+
+    assert (g.is_builtin_type, "grant not builtin")
+    assert (base.g_type === g.g_type, "g_type mismatch")
+    assert (base.addr_beat === g.addr_beat || !g.hasData(), "addr_beat mismatch")
+    assert (base.data === g.data || !g.hasData(), "data mismatch")
+
+    assert_conds.zipWithIndex.foreach { case (cond, i) =>
+      when (!cond) {
+        io.error.valid := Bool(true)
+        io.error.bits := UInt(i)
+      }
+    }
+  }
+  when (all_valid) {
+    when (base.hasData()) {
+      printf("[gnt %d]: g_type = %x, addr_beat = %x, data = %x\n",
+        idx, base.g_type, base.addr_beat, base.data)
+    } .otherwise {
+      printf("[gnt %d]: g_type = %x\n", idx, base.g_type)
+    }
+    queues.drop(1).map(_.bits).foreach(check)
+    idx := idx + UInt(1)
+  }
+}
+
+class ComparatorCore(implicit p: Parameters) extends GroundTest()(p)
+    with HasComparatorParameters
+    with HasTileLinkParameters {
+
+  require (io.mem.size == nTargets)
+  
+  val source = Module(new ComparatorSource)
+  val sink   = Module(new ComparatorSink)
+  val broadcast = Broadcaster(source.io.out, nTargets)
+  val clients = targets.zipWithIndex.map { case (target, index) =>
+    val client = Module(new ComparatorClient(target))
+    client.io.in <> broadcast(index)
+    io.mem(index) <> client.io.tl
+    sink.io.in(index) <> client.io.out
+    client
+  }
+  val client_timeouts = clients.map(_.io.timeout)
+  
+  io.status.finished := source.io.finished && sink.io.finished && clients.map(_.io.finished).reduce(_ && _)
+  io.status.timeout.valid := client_timeouts.reduce(_ || _)
+  io.status.timeout.bits := MuxCase(UInt(0),
+    client_timeouts.zipWithIndex.map {
+      case (timeout, i) => (timeout -> UInt(i))
+    })
+  io.status.error := sink.io.error
+}
--- a/src/main/scala/groundtest/Generator.scala
+++ b/src/main/scala/groundtest/Generator.scala
@ -0,0 +1,212 @@
+package groundtest
+
+import Chisel._
+import uncore.tilelink._
+import uncore.devices.NTiles
+import uncore.constants._
+import junctions._
+import rocket._
+import scala.util.Random
+import cde.{Parameters, Field}
+
+case class GeneratorParameters(
+  maxRequests: Int,
+  startAddress: BigInt)
+case object GeneratorKey extends Field[GeneratorParameters]
+
+trait HasGeneratorParameters extends HasGroundTestParameters {
+  implicit val p: Parameters
+
+  val genParams = p(GeneratorKey)
+  val nGens = p(GroundTestKey).map(
+    cs => cs.uncached + cs.cached).reduce(_ + _)
+  val genTimeout = 8192
+  val maxRequests = genParams.maxRequests
+  val startAddress = genParams.startAddress
+
+  val genWordBits = 32
+  val genWordBytes = genWordBits / 8
+  val wordOffset = log2Ceil(genWordBytes)
+  val wordSize = UInt(log2Ceil(genWordBytes))
+
+  require(startAddress % BigInt(genWordBytes) == 0)
+}
+
+class UncachedTileLinkGenerator(id: Int)
+    (implicit p: Parameters) extends TLModule()(p) with HasGeneratorParameters {
+
+  private val tlBlockOffset = tlBeatAddrBits + tlByteAddrBits
+
+  val io = new Bundle {
+    val mem = new ClientUncachedTileLinkIO
+    val status = new GroundTestStatus
+  }
+
+  val (s_start :: s_put :: s_get :: s_finished :: Nil) = Enum(Bits(), 4)
+  val state = Reg(init = s_start)
+
+  val (req_cnt, req_wrap) = Counter(io.mem.grant.fire(), maxRequests)
+
+  val sending = Reg(init = Bool(false))
+
+  when (state === s_start) {
+    sending := Bool(true)
+    state := s_put
+  }
+
+  when (io.mem.acquire.fire()) { sending := Bool(false) }
+  when (io.mem.grant.fire()) { sending := Bool(true) }
+  when (req_wrap) { state := Mux(state === s_put, s_get, s_finished) }
+
+  val timeout = Timer(genTimeout, io.mem.acquire.fire(), io.mem.grant.fire())
+  assert(!timeout, s"Uncached generator ${id} timed out waiting for grant")
+
+  io.status.finished := (state === s_finished)
+  io.status.timeout.valid := timeout
+  io.status.timeout.bits := UInt(id)
+
+  val part_of_full_addr =
+    if (log2Ceil(nGens) > 0) {
+      Cat(UInt(id, log2Ceil(nGens)),
+          UInt(0, wordOffset))
+    } else {
+      UInt(0, wordOffset)
+    }
+  val full_addr = UInt(startAddress) + Cat(req_cnt, part_of_full_addr)
+
+  val addr_block = full_addr >> UInt(tlBlockOffset)
+  val addr_beat = full_addr(tlBlockOffset - 1, tlByteAddrBits)
+  val addr_byte = full_addr(tlByteAddrBits - 1, 0)
+
+  val data_prefix = Cat(UInt(id, log2Up(nGens)), req_cnt)
+  val word_data = Wire(UInt(width = genWordBits))
+  word_data := Cat(data_prefix, part_of_full_addr)
+  val beat_data = Fill(tlDataBits / genWordBits, word_data)
+  val wshift = Cat(beatOffset(full_addr), UInt(0, wordOffset))
+  val wmask = Fill(genWordBits / 8, Bits(1, 1)) << wshift
+
+  val put_acquire = Put(
+    client_xact_id = UInt(0),
+    addr_block = addr_block,
+    addr_beat = addr_beat,
+    data = beat_data,
+    wmask = Some(wmask),
+    alloc = Bool(false))
+
+  val get_acquire = Get(
+    client_xact_id = UInt(0),
+    addr_block = addr_block,
+    addr_beat = addr_beat,
+    addr_byte = addr_byte,
+    operand_size = wordSize,
+    alloc = Bool(false))
+
+  io.mem.acquire.valid := sending && !io.status.finished
+  io.mem.acquire.bits := Mux(state === s_put, put_acquire, get_acquire)
+  io.mem.grant.ready := !sending && !io.status.finished
+
+  def wordFromBeat(addr: UInt, dat: UInt) = {
+    val shift = Cat(beatOffset(addr), UInt(0, wordOffset + 3))
+    (dat >> shift)(genWordBits - 1, 0)
+  }
+
+  val data_mismatch = io.mem.grant.fire() && state === s_get &&
+    wordFromBeat(full_addr, io.mem.grant.bits.data) =/= word_data
+
+  io.status.error.valid := data_mismatch
+  io.status.error.bits := UInt(id)
+
+  assert(!data_mismatch,
+    s"Get received incorrect data in uncached generator ${id}")
+
+  def beatOffset(addr: UInt) = // TODO zero-width
+    if (tlByteAddrBits > wordOffset) addr(tlByteAddrBits - 1, wordOffset)
+    else UInt(0)
+}
+
+class HellaCacheGenerator(id: Int)
+    (implicit p: Parameters) extends L1HellaCacheModule()(p) with HasGeneratorParameters {
+  val io = new Bundle {
+    val mem = new HellaCacheIO
+    val status = new GroundTestStatus
+  }
+
+  val timeout = Timer(genTimeout, io.mem.req.fire(), io.mem.resp.valid)
+  assert(!timeout, s"Cached generator ${id} timed out waiting for response")
+  io.status.timeout.valid := timeout
+  io.status.timeout.bits := UInt(id)
+
+  val (s_start :: s_write :: s_read :: s_finished :: Nil) = Enum(Bits(), 4)
+  val state = Reg(init = s_start)
+  val sending = Reg(init = Bool(false))
+
+  val (req_cnt, req_wrap) = Counter(io.mem.resp.valid, maxRequests)
+
+  val part_of_req_addr =
+    if (log2Ceil(nGens) > 0) {
+      Cat(UInt(id, log2Ceil(nGens)),
+          UInt(0, wordOffset))
+    } else {
+      UInt(0, wordOffset)
+    }
+  val req_addr = UInt(startAddress) + Cat(req_cnt, part_of_req_addr)
+  val req_data = Cat(UInt(id, log2Up(nGens)), req_cnt, part_of_req_addr)
+
+  io.mem.req.valid := sending && !io.status.finished
+  io.mem.req.bits.addr := req_addr
+  io.mem.req.bits.data := req_data
+  io.mem.req.bits.typ  := wordSize
+  io.mem.req.bits.cmd  := Mux(state === s_write, M_XWR, M_XRD)
+  io.mem.req.bits.tag  := UInt(0)
+
+  when (state === s_start) { sending := Bool(true); state := s_write }
+
+  when (io.mem.req.fire()) { sending := Bool(false) }
+  when (io.mem.resp.valid) { sending := Bool(true) }
+
+  when (req_wrap) { state := Mux(state === s_write, s_read, s_finished) }
+
+  io.status.finished := (state === s_finished)
+
+  def data_match(recv: Bits, expected: Bits): Bool = {
+    val recv_resized = Wire(Bits(width = genWordBits))
+    val exp_resized = Wire(Bits(width = genWordBits))
+
+    recv_resized := recv
+    exp_resized := expected
+    recv_resized === exp_resized
+  }
+
+  val data_mismatch = io.mem.resp.valid && io.mem.resp.bits.has_data &&
+    !data_match(io.mem.resp.bits.data, req_data)
+
+  io.status.error.valid := data_mismatch
+  io.status.error.bits := UInt(id)
+
+  assert(!data_mismatch,
+    s"Received incorrect data in cached generator ${id}")
+}
+
+class GeneratorTest(implicit p: Parameters)
+    extends GroundTest()(p) with HasGeneratorParameters {
+
+  val idStart = p(GroundTestKey).take(tileId)
+    .map(settings => settings.cached + settings.uncached)
+    .foldLeft(0)(_ + _)
+
+  val cached = List.tabulate(nCached) { i =>
+    val realId = idStart + i
+    Module(new HellaCacheGenerator(realId))
+  }
+
+  val uncached = List.tabulate(nUncached) { i =>
+    val realId = idStart + nCached + i
+    Module(new UncachedTileLinkGenerator(realId))
+  }
+
+  io.cache <> cached.map(_.io.mem)
+  io.mem <> uncached.map(_.io.mem)
+
+  val gen_debug = cached.map(_.io.status) ++ uncached.map(_.io.status)
+  io.status := DebugCombiner(gen_debug)
+}
--- a/src/main/scala/groundtest/NastiTest.scala
+++ b/src/main/scala/groundtest/NastiTest.scala
@ -0,0 +1,121 @@
+package groundtest
+
+import Chisel._
+import uncore.tilelink._
+import uncore.converters._
+import junctions._
+import cde.Parameters
+
+class NastiGenerator(id: Int)(implicit val p: Parameters) extends Module
+    with HasNastiParameters
+    with HasMIFParameters
+    with HasAddrMapParameters
+    with HasGeneratorParameters {
+
+  val io = new Bundle {
+    val status = new GroundTestStatus
+    val mem = new NastiIO
+  }
+
+  val mifDataBytes = mifDataBits / 8
+
+  val (s_start :: s_write_addr :: s_write_data ::
+       s_read  :: s_wait :: s_finish :: Nil) = Enum(Bits(), 6)
+  val state = Reg(init = s_start)
+
+  def ref_data(idx: UInt) = UInt(0x35abffcd, genWordBits) + (idx << UInt(3))
+
+  val part_of_addr =
+    if (log2Ceil(nGens) > 0) {
+      Cat(UInt(id, log2Ceil(nGens)),
+          UInt(0, wordOffset))
+    } else {
+      UInt(0, wordOffset)
+    }
+
+  val (write_idx, write_done) = Counter(io.mem.w.fire(), maxRequests)
+  val write_addr = UInt(startAddress) + Cat(write_idx, part_of_addr)
+  val write_data = Fill(mifDataBits / genWordBits, ref_data(write_idx))
+  val write_align = write_addr(log2Up(mifDataBytes) - 1, 0)
+  val write_mask = UInt((1 << genWordBytes) - 1, nastiWStrobeBits) << write_align
+
+  val (read_idx, read_done) = Counter(io.mem.ar.fire(), maxRequests)
+  val read_addr = UInt(startAddress) + Cat(read_idx, part_of_addr)
+
+  io.mem.aw.valid := (state === s_write_addr)
+  io.mem.aw.bits := NastiWriteAddressChannel(
+    id = write_idx(nastiXIdBits - 1, 0),
+    addr = write_addr,
+    len = UInt(0),
+    size = UInt(log2Ceil(genWordBytes)))
+
+  io.mem.w.valid := (state === s_write_data)
+  io.mem.w.bits := NastiWriteDataChannel(
+    data = write_data,
+    strb = Some(write_mask),
+    last = Bool(true))
+
+  io.mem.ar.valid := (state === s_read)
+  io.mem.ar.bits := NastiReadAddressChannel(
+    id = UInt(0),
+    addr = read_addr,
+    len = UInt(0),
+    size = UInt(log2Ceil(genWordBytes)))
+
+  io.mem.r.ready := Bool(true)
+  io.mem.b.ready := Bool(true)
+
+  io.status.finished := (state === s_finish)
+
+  val (read_resp_idx,  read_resp_done)  = Counter(io.mem.r.fire(), maxRequests)
+  val read_resp_addr = UInt(startAddress) + Cat(read_resp_idx, part_of_addr)
+  val read_offset = read_resp_addr(log2Up(nastiXDataBits / 8) - 1, 0)
+  val read_shift = Cat(read_offset, UInt(0, 3))
+  val read_data = (io.mem.r.bits.data >> read_shift)(genWordBits - 1, 0)
+
+  val data_mismatch = io.mem.r.valid && read_data =/= ref_data(read_resp_idx)
+  assert(!data_mismatch, "NASTI Test: results do not match")
+  io.status.error.valid := data_mismatch
+  io.status.error.bits := UInt(1)
+
+  when (state === s_start) { state := s_write_addr }
+  when (io.mem.aw.fire()) { state := s_write_data  }
+  when (io.mem.w.fire()) { state := s_write_addr }
+  when (write_done) { state := s_read }
+  when (read_done) { state := s_wait }
+  when (read_resp_done) { state := s_finish }
+
+  val r_timer = Module(new Timer(1000, 2))
+  r_timer.io.start.valid := io.mem.ar.fire()
+  r_timer.io.start.bits := io.mem.ar.bits.id
+  r_timer.io.stop.valid := io.mem.r.fire() && io.mem.r.bits.last
+  r_timer.io.stop.bits := io.mem.r.bits.id
+  assert(!r_timer.io.timeout.valid, "NASTI Read timed out")
+
+  val w_timer = Module(new Timer(1000, 2))
+  w_timer.io.start.valid := io.mem.aw.fire()
+  w_timer.io.start.bits := io.mem.aw.bits.id
+  w_timer.io.stop.valid := io.mem.b.fire()
+  w_timer.io.stop.bits := io.mem.b.bits.id
+  assert(!w_timer.io.timeout.valid, "NASTI Write timed out")
+
+  io.status.timeout.valid := r_timer.io.timeout.valid || w_timer.io.timeout.valid
+  io.status.timeout.bits := Mux(r_timer.io.timeout.valid, UInt(1), UInt(2))
+}
+
+class NastiConverterTest(implicit p: Parameters) extends GroundTest()(p)
+    with HasNastiParameters {
+  require(tileSettings.uncached == 1 && tileSettings.cached == 0)
+
+  val genId = p(GroundTestKey).take(tileId)
+    .map(settings => settings.cached + settings.uncached)
+    .foldLeft(0)(_ + _)
+
+  val test = Module(new NastiGenerator(genId))
+  val converter = Module(new TileLinkIONastiIOConverter()(
+    p.alterPartial { case TLId => "Outermost" }))
+
+  converter.io.nasti <> test.io.mem
+  TileLinkWidthAdapter(io.mem.head, converter.io.tl)
+  io.status := test.io.status
+}
--- a/src/main/scala/groundtest/Regression.scala
+++ b/src/main/scala/groundtest/Regression.scala
@ -0,0 +1,776 @@
+package groundtest
+
+import Chisel._
+import uncore.tilelink._
+import uncore.constants._
+import uncore.agents._
+import uncore.util._
+import junctions.{ParameterizedBundle, HasAddrMapParameters, Timer}
+import rocket.HellaCacheIO
+import cde.{Parameters, Field}
+
+class RegressionIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
+  val start = Bool(INPUT)
+  val cache = new HellaCacheIO
+  val mem = new ClientUncachedTileLinkIO
+  val finished = Bool(OUTPUT)
+  val errored = Bool(OUTPUT)
+}
+
+abstract class Regression(implicit val p: Parameters)
+    extends Module with HasTileLinkParameters with HasAddrMapParameters {
+  val memStart = addrMap("mem").start
+  val memStartBlock = memStart >> p(CacheBlockOffsetBits)
+  val io = new RegressionIO
+
+  def disableCache() {
+    io.cache.req.valid := Bool(false)
+    io.cache.req.bits.addr := UInt(memStart)
+    io.cache.req.bits.typ  := UInt(0)
+    io.cache.req.bits.cmd  := M_XRD
+    io.cache.req.bits.tag  := UInt(0)
+    io.cache.req.bits.data := Bits(0)
+    io.cache.req.bits.phys := Bool(true)
+    io.cache.invalidate_lr := Bool(false)
+  }
+
+  def disableMem() {
+    io.mem.acquire.valid := Bool(false)
+    io.mem.grant.ready := Bool(false)
+  }
+}
+
+/**
+ * This was a bug in which the TileLinkIONarrower logic screwed up
+ * when a PutBlock request and a narrow Get request are sent to it at the
+ * same time. Repeating this sequence enough times will cause a queue to
+ * get filled up and deadlock the system.
+ */
+class IOGetAfterPutBlockRegression(implicit p: Parameters) extends Regression()(p) {
+  val nRuns = 7
+  val run = Reg(init = UInt(0, log2Up(nRuns + 1)))
+
+  val (put_beat, put_done) = Counter(
+    io.mem.acquire.fire() && io.mem.acquire.bits.hasData(), tlDataBeats)
+
+  val started = Reg(init = Bool(false))
+  val put_sent = Reg(init = Bool(false))
+  val get_sent = Reg(init = Bool(false))
+  val put_acked = Reg(init = Bool(false))
+  val get_acked = Reg(init = Bool(false))
+  val both_acked = put_acked && get_acked
+
+  when (!started && io.start) { started := Bool(true) }
+
+  io.mem.acquire.valid := !put_sent && started
+  io.mem.acquire.bits := PutBlock(
+    client_xact_id = UInt(0),
+    addr_block = UInt(memStartBlock),
+    addr_beat = put_beat,
+    data = UInt(0))
+  io.mem.grant.ready := Bool(true)
+
+  io.cache.req.valid := !get_sent && started
+  io.cache.req.bits.addr := UInt(addrMap("io:int:bootrom").start)
+  io.cache.req.bits.typ := UInt(log2Ceil(32 / 8))
+  io.cache.req.bits.cmd := M_XRD
+  io.cache.req.bits.tag := UInt(0)
+  io.cache.invalidate_lr := Bool(false)
+
+  when (put_done) { put_sent := Bool(true) }
+  when (io.cache.req.fire()) { get_sent := Bool(true) }
+  when (io.mem.grant.fire()) { put_acked := Bool(true) }
+  when (io.cache.resp.valid) { get_acked := Bool(true) }
+
+  when (both_acked) {
+    when (run < UInt(nRuns - 1)) {
+      put_sent := Bool(false)
+      get_sent := Bool(false)
+    }
+    put_acked := Bool(false)
+    get_acked := Bool(false)
+    run := run + UInt(1)
+  }
+
+  io.finished := (run === UInt(nRuns))
+}
+
+/* This was a bug with merging two PutBlocks to the same address in the L2.
+ * The transactor would start accepting beats of the second transaction but
+ * acknowledge both of them when the first one finished.
+ * This caused the state to go funky since the next time around it would
+ * start the put in the middle */
+class PutBlockMergeRegression(implicit p: Parameters)
+    extends Regression()(p) with HasTileLinkParameters {
+  val s_idle :: s_put :: s_wait :: s_done :: Nil = Enum(Bits(), 4)
+  val state = Reg(init = s_idle)
+
+  disableCache()
+
+  val l2params = p.alterPartial({ case CacheName => "L2Bank" })
+  val nSets = l2params(NSets)
+  val addr_blocks = Vec(Seq(0, 0, nSets).map(num => UInt(num + memStartBlock)))
+  val nSteps = addr_blocks.size
+  val (acq_beat, acq_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
+  val (send_cnt, send_done) = Counter(acq_done, nSteps)
+  val (ack_cnt, ack_done) = Counter(io.mem.grant.fire(), nSteps)
+
+  io.mem.acquire.valid := (state === s_put)
+  io.mem.acquire.bits := PutBlock(
+    client_xact_id = send_cnt,
+    addr_block = addr_blocks(send_cnt),
+    addr_beat = acq_beat,
+    data = Cat(send_cnt, acq_beat))
+  io.mem.grant.ready := Bool(true)
+
+  when (state === s_idle && io.start) { state := s_put }
+  when (send_done) { state := s_wait }
+  when (ack_done) { state := s_done }
+
+  io.finished := (state === s_done)
+}
+
+/* Make sure the L2 does "the right thing" when a put is sent no-alloc but
+ * the block is already in cache. It should just treat the request as a
+ * regular allocating put */
+class NoAllocPutHitRegression(implicit p: Parameters) extends Regression()(p) {
+  val (s_idle :: s_prefetch :: s_put :: s_get ::
+       s_wait :: s_done :: Nil) = Enum(Bits(), 6)
+  val state = Reg(init = s_idle)
+
+  val acq = io.mem.acquire.bits
+  val gnt = io.mem.grant.bits
+
+  val (put_beat, put_done) = Counter(io.mem.acquire.fire() && acq.hasData(), tlDataBeats)
+  val acked = Reg(init = UInt(0, tlDataBeats + 2))
+
+  val addr_block = UInt(memStartBlock + 2)
+  val test_data = UInt(0x3446)
+
+  val prefetch_acq = GetPrefetch(
+    client_xact_id = UInt(0),
+    addr_block = addr_block)
+  val put_acq = PutBlock(
+    client_xact_id = UInt(1),
+    addr_block = addr_block,
+    addr_beat = put_beat,
+    data = test_data,
+    alloc = Bool(false))
+  val get_acq = GetBlock(
+    client_xact_id = UInt(2),
+    addr_block = addr_block)
+
+  io.mem.acquire.valid := state.isOneOf(s_prefetch, s_get, s_put)
+  io.mem.acquire.bits := MuxCase(get_acq, Seq(
+    (state === s_prefetch) -> prefetch_acq,
+    (state === s_put) -> put_acq))
+  io.mem.grant.ready := Bool(true)
+
+  when (state === s_idle && io.start) { state := s_prefetch }
+  when (state === s_prefetch && io.mem.acquire.ready) { state := s_put }
+  when (put_done) { state := s_get }
+  when (state === s_get && io.mem.acquire.ready) { state := s_wait }
+  when (state === s_wait && acked.andR) { state := s_done }
+
+  when (io.mem.grant.fire()) {
+    switch (gnt.client_xact_id) {
+      is (UInt(0)) { acked := acked | UInt(1 << tlDataBeats) }
+      is (UInt(1)) { acked := acked | UInt(1 << (tlDataBeats + 1)) }
+      is (UInt(2)) { acked := acked | UIntToOH(gnt.addr_beat) }
+    }
+  }
+
+  val data_mismatch = io.mem.grant.fire() && gnt.hasData() && gnt.data =/= test_data
+  assert(!data_mismatch, "NoAllocPutHitRegression: data does not match")
+
+  io.finished := (state === s_done)
+  io.errored := data_mismatch
+
+  disableCache()
+}
+
+/** Make sure L2 does the right thing when multiple puts are sent for the
+ *  same block, but only the first one has the alloc bit set. */
+class MixedAllocPutRegression(implicit p: Parameters) extends Regression()(p) {
+  val (s_idle :: s_pf_send :: s_pf_wait :: s_put_send :: s_put_wait ::
+       s_get_send :: s_get_wait :: s_done :: Nil) = Enum(Bits(), 8)
+  val state = Reg(init = s_idle)
+
+  /** We have to test two cases: one when the block is already cached
+   *  and one when the block is not yet cached.
+   *  We use prefetching to assure the first case. */
+  val test_data = Vec(
+    UInt("h2222222211111111"),
+    UInt("h3333333333333333"),
+    UInt("h4444444444444444"),
+    UInt("h5555555555555555"))
+  val test_alloc = Vec(Bool(false), Bool(false), Bool(true), Bool(false))
+  val test_block = Vec(
+    Seq.fill(2) { UInt(memStartBlock + 15) } ++
+    Seq.fill(2) { UInt(memStartBlock + 16) })
+  val test_beat = Vec(UInt(0), UInt(2), UInt(1), UInt(2))
+
+  val (put_acq_id, put_acq_done) = Counter(
+    state === s_put_send && io.mem.acquire.ready, test_data.size)
+  val (put_gnt_cnt, put_gnt_done) = Counter(
+    state === s_put_wait && io.mem.grant.valid, test_data.size)
+
+  val (get_acq_id, get_acq_done) = Counter(
+    state === s_get_send && io.mem.acquire.ready, test_data.size)
+  val (get_gnt_cnt, get_gnt_done) = Counter(
+    state === s_get_wait && io.mem.grant.valid, test_data.size)
+
+  val pf_acquire = PutPrefetch(
+    client_xact_id = UInt(0),
+    addr_block = UInt(memStartBlock + 15))
+
+  val put_acquire = Put(
+    client_xact_id = put_acq_id,
+    addr_block = test_block(put_acq_id),
+    addr_beat = test_beat(put_acq_id),
+    data = test_data(put_acq_id),
+    alloc = test_alloc(put_acq_id))
+
+  val get_acquire = Get(
+    client_xact_id = get_acq_id,
+    addr_block = test_block(get_acq_id),
+    addr_beat = test_beat(get_acq_id))
+
+  io.mem.acquire.valid := state.isOneOf(s_pf_send, s_put_send, s_get_send)
+  io.mem.acquire.bits := MuxLookup(state, pf_acquire, Seq(
+    s_put_send -> put_acquire,
+    s_get_send -> get_acquire))
+  io.mem.grant.ready := state.isOneOf(s_pf_wait, s_put_wait, s_get_wait)
+
+  when (state === s_idle && io.start) { state := s_pf_send }
+  when (state === s_pf_send && io.mem.acquire.ready) { state := s_pf_wait }
+  when (state === s_pf_wait && io.mem.grant.valid) { state := s_put_send }
+  when (put_acq_done) { state := s_put_wait }
+  when (put_gnt_done) { state := s_get_send }
+  when (get_acq_done) { state := s_get_wait }
+  when (get_gnt_done) { state := s_done }
+
+  io.finished := (state === s_done)
+
+  val data_mismatch = state === s_get_wait && io.mem.grant.fire() &&
+    io.mem.grant.bits.data =/= test_data(io.mem.grant.bits.client_xact_id)
+  assert(!data_mismatch, "MixedAllocPutRegression: data mismatch")
+  io.errored := data_mismatch
+
+  disableCache()
+}
+
+/* Make sure each no-alloc put triggers a request to outer memory.
+ * Unfortunately, there's no way to verify that this works except by looking
+ * at the waveform */
+class RepeatedNoAllocPutRegression(implicit p: Parameters) extends Regression()(p) {
+  disableCache()
+
+  val nPuts = 2
+  val (put_beat, put_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
+  val (req_cnt, req_done) = Counter(put_done, nPuts)
+
+  val sending = Reg(init = Bool(false))
+  val acked = Reg(init = UInt(0, nPuts))
+
+  when (!sending && io.start) { sending := Bool(true) }
+  when (sending && req_done) { sending := Bool(false) }
+
+  io.mem.acquire.valid := sending
+  io.mem.acquire.bits := PutBlock(
+    client_xact_id = req_cnt,
+    addr_block = UInt(memStartBlock + 5),
+    addr_beat = put_beat,
+    data = Cat(req_cnt, UInt(0, 8)),
+    alloc = Bool(false))
+  io.mem.grant.ready := Bool(true)
+
+  when (io.mem.grant.fire()) {
+    acked := acked | UIntToOH(io.mem.grant.bits.client_xact_id)
+  }
+
+  io.finished := acked.andR
+}
+
+/* Make sure write masking works properly by writing a block of data
+ * piece by piece */
+class WriteMaskedPutBlockRegression(implicit p: Parameters) extends Regression()(p) {
+  disableCache()
+
+  val (s_idle :: s_put_send :: s_put_ack :: s_stall ::
+       s_get_send :: s_get_ack :: s_done :: Nil) = Enum(Bits(), 7)
+  val state = Reg(init = s_idle)
+  val post_stall_state = Reg(init = s_idle)
+
+  val gnt = io.mem.grant.bits
+  val acq = io.mem.acquire.bits
+
+  val stage = Reg(init = UInt(0, 1))
+
+  val (put_beat, put_block_done) = Counter(
+    io.mem.acquire.fire() && acq.hasData(), tlDataBeats)
+  val put_data = UInt(0x30010040, tlDataBits) + (put_beat << UInt(2))
+
+  val put_acq = PutBlock(
+    client_xact_id = UInt(0),
+    addr_block = UInt(memStartBlock + 7),
+    addr_beat = put_beat,
+    data = Mux(put_beat(0) === stage, put_data, UInt(0)),
+    wmask = Some(Mux(put_beat(0) === stage, Acquire.fullWriteMask, Bits(0))))
+
+  val get_acq = GetBlock(
+    client_xact_id = UInt(0),
+    addr_block = UInt(memStartBlock + 6) + stage)
+
+  io.mem.acquire.valid := state.isOneOf(s_put_send, s_get_send)
+  io.mem.acquire.bits := Mux(state === s_get_send, get_acq, put_acq)
+  io.mem.grant.ready := state.isOneOf(s_put_ack, s_get_ack)
+
+  val (get_cnt, get_done) = Counter(
+    io.mem.grant.fire() && gnt.hasData(), tlDataBeats)
+  val get_data = UInt(0x30010040, tlDataBits) + (get_cnt << UInt(2))
+
+  val (stall_cnt, stall_done) = Counter(state === s_stall, 16)
+
+  when (state === s_idle && io.start) { state := s_put_send }
+  when (put_block_done) { state := s_put_ack }
+  when (state === s_put_ack && io.mem.grant.valid) {
+    post_stall_state := s_get_send
+    state := s_stall
+  }
+  when (stall_done) { state := post_stall_state }
+  when (state === s_get_send && io.mem.acquire.ready) { state := s_get_ack }
+  when (get_done) {
+    // do a read in-between the two put-blocks to overwrite the data buffer
+    when (stage === UInt(0)) {
+      stage := stage + UInt(1)
+      post_stall_state := s_put_send
+      state := s_stall
+    } .otherwise { state := s_done }
+  }
+
+  io.finished := (state === s_done)
+
+  val data_mismatch = io.mem.grant.fire() && io.mem.grant.bits.hasData() &&
+                      stage =/= UInt(0) && io.mem.grant.bits.data =/= get_data
+  assert(!data_mismatch, "WriteMaskedPutBlockRegression: data does not match")
+  io.errored := data_mismatch
+}
+
+/* Make sure a prefetch that hits returns immediately. */
+class PrefetchHitRegression(implicit p: Parameters) extends Regression()(p) {
+  disableCache()
+
+  val sending = Reg(init = Bool(false))
+  val nPrefetches = 2
+  val (pf_cnt, pf_done) = Counter(io.mem.acquire.fire(), nPrefetches)
+  val acked = Reg(init = UInt(0, nPrefetches))
+
+  val acq_bits = Vec(
+    PutPrefetch(client_xact_id = UInt(0), addr_block = UInt(memStartBlock + 12)),
+    GetPrefetch(client_xact_id = UInt(1), addr_block = UInt(memStartBlock + 12)))
+
+  io.mem.acquire.valid := sending
+  io.mem.acquire.bits := acq_bits(pf_cnt)
+  io.mem.grant.ready := Bool(true)
+
+  when (io.mem.grant.fire()) {
+    acked := acked | UIntToOH(io.mem.grant.bits.client_xact_id)
+  }
+
+  when (!sending && io.start) { sending := Bool(true) }
+  when (sending && pf_done) { sending := Bool(false) }
+
+  io.finished := acked.andR
+  io.errored := Bool(false)
+}
+
+/* This tests the sort of access the pattern that Hwacha uses.
+ * Instead of using PutBlock/GetBlock, it uses word-sized puts and gets
+ * to the same block.
+ * Each request has the same client_xact_id, but there are multiple in flight.
+ * The responses therefore must come back in the order they are sent. */
+class SequentialSameIdGetRegression(implicit p: Parameters) extends Regression()(p) {
+  disableCache()
+
+  val sending = Reg(init = Bool(false))
+  val finished = Reg(init = Bool(false))
+
+  val (send_cnt, send_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
+  val (recv_cnt, recv_done) = Counter(io.mem.grant.fire(), tlDataBeats)
+
+  when (!sending && io.start) { sending := Bool(true) }
+  when (send_done) { sending := Bool(false) }
+  when (recv_done) { finished := Bool(true) }
+
+  io.mem.acquire.valid := sending
+  io.mem.acquire.bits := Get(
+    client_xact_id = UInt(0),
+    addr_block = UInt(memStartBlock + 9),
+    addr_beat = send_cnt)
+  io.mem.grant.ready := !finished
+
+  io.finished := finished
+
+  val beat_mismatch = io.mem.grant.fire() && io.mem.grant.bits.addr_beat =/= recv_cnt
+  assert(!beat_mismatch, "SequentialSameIdGetRegression: grant received out of order")
+  io.errored := beat_mismatch
+}
+
+/* Test that a writeback will occur by writing nWays + 1 blocks to the same
+ * set. This assumes that there is only a single cache bank. If we want to
+ * test multibank configurations, we'll have to think of some other way to
+ * determine which banks are conflicting */
+class WritebackRegression(implicit p: Parameters) extends Regression()(p) {
+  disableCache()
+
+  val l2params = p.alterPartial({ case CacheName => "L2Bank" })
+  val nSets = l2params(NSets)
+  val nWays = l2params(NWays)
+
+  val addr_blocks = Vec.tabulate(nWays + 1) { i => UInt(memStartBlock + i * nSets) }
+  val data = Vec.tabulate(nWays + 1) { i => UInt((i + 1) * 1423) }
+
+  val (put_beat, put_done) = Counter(
+    io.mem.acquire.fire() && io.mem.acquire.bits.hasData(), tlDataBeats)
+  val (get_beat, get_done) = Counter(
+    io.mem.grant.fire() && io.mem.grant.bits.hasData(), tlDataBeats)
+  val (put_cnt, _) = Counter(put_done, nWays + 1)
+  val (get_cnt, _) = Counter(
+    io.mem.acquire.fire() && !io.mem.acquire.bits.hasData(), nWays + 1)
+  val (ack_cnt, ack_done) = Counter(
+    io.mem.grant.fire() && !io.mem.grant.bits.hasData() || get_done, nWays + 1)
+
+  val s_idle :: s_put :: s_get :: s_done :: Nil = Enum(Bits(), 4)
+  val state = Reg(init = s_idle)
+  val sending = Reg(init = Bool(false))
+
+  io.mem.acquire.valid := sending
+  io.mem.acquire.bits := Mux(state === s_put,
+    PutBlock(
+      client_xact_id = UInt(0),
+      addr_block = addr_blocks(put_cnt),
+      addr_beat = put_beat,
+      data = data(put_cnt)),
+    GetBlock(
+      client_xact_id = UInt(0),
+      addr_block = addr_blocks(get_cnt)))
+  io.mem.grant.ready := !sending
+
+  when (state === s_idle && io.start) { state := s_put; sending := Bool(true) }
+  when (put_done || state === s_get && io.mem.acquire.fire()) {
+    sending := Bool(false)
+  }
+  when (get_done && !ack_done || state === s_put && io.mem.grant.fire()) {
+    sending := Bool(true)
+  }
+  when (ack_done) { state := Mux(state === s_put, s_get, s_done) }
+
+  io.finished := (state === s_done)
+
+  val data_mismatch = io.mem.grant.fire() && io.mem.grant.bits.hasData() &&
+                      io.mem.grant.bits.data =/= data(ack_cnt)
+  assert(!data_mismatch, "WritebackRegression: incorrect data")
+  io.errored := data_mismatch
+}
+
+class ReleaseRegression(implicit p: Parameters) extends Regression()(p) {
+  disableMem()
+
+  val l1params = p.alterPartial({ case CacheName => "L1D" })
+  val nSets = l1params(NSets)
+  val nWays = l1params(NWays)
+  val blockOffset = l1params(CacheBlockOffsetBits)
+
+  val startBlock = memStartBlock + 10
+  val addr_blocks = Vec.tabulate(nWays + 1) { i => UInt(startBlock + i * nSets) }
+  val data = Vec.tabulate(nWays + 1) { i => UInt((i + 1) * 1522) }
+  val (req_idx, req_done) = Counter(io.cache.req.fire(), nWays + 1)
+  val (resp_idx, resp_done) = Counter(io.cache.resp.valid, nWays + 1)
+
+  val sending = Reg(init = Bool(false))
+  val s_idle :: s_write :: s_read :: s_done :: Nil = Enum(Bits(), 4)
+  val state = Reg(init = s_idle)
+
+  io.cache.req.valid := sending && state.isOneOf(s_write, s_read)
+  io.cache.req.bits.addr := Cat(addr_blocks(req_idx), UInt(0, blockOffset))
+  io.cache.req.bits.typ := UInt(log2Ceil(64 / 8))
+  io.cache.req.bits.cmd := Mux(state === s_write, M_XWR, M_XRD)
+  io.cache.req.bits.tag := UInt(0)
+  io.cache.req.bits.data := data(req_idx)
+  io.cache.req.bits.phys := Bool(true)
+  io.cache.invalidate_lr := Bool(false)
+
+  when (state === s_idle && io.start) {
+    sending := Bool(true)
+    state := s_write
+  }
+
+  when (resp_done) { state := Mux(state === s_write, s_read, s_done) }
+  when (io.cache.req.fire()) { sending := Bool(false) }
+  when (io.cache.resp.valid) { sending := Bool(true) }
+
+  io.finished := (state === s_done)
+
+  val data_mismatch = io.cache.resp.valid && io.cache.resp.bits.has_data &&
+                      io.cache.resp.bits.data =/= data(resp_idx)
+  assert(!data_mismatch, "ReleaseRegression: data mismatch")
+  io.errored := data_mismatch
+}
+
+class PutBeforePutBlockRegression(implicit p: Parameters) extends Regression()(p) {
+  val (s_idle :: s_put :: s_putblock :: s_wait ::
+       s_finished :: Nil) = Enum(Bits(), 5)
+  val state = Reg(init = s_idle)
+
+  disableCache()
+
+  val (put_block_beat, put_block_done) = Counter(
+    state === s_putblock && io.mem.acquire.ready, tlDataBeats)
+
+  val put_acquire = Put(
+    client_xact_id = UInt(0),
+    addr_block = UInt(memStartBlock),
+    addr_beat = UInt(0),
+    data = UInt(0),
+    wmask = Some(UInt((1 << 8) - 1)))
+
+  val put_block_acquire = PutBlock(
+    client_xact_id = UInt(1),
+    addr_block = UInt(memStartBlock + 1),
+    addr_beat = put_block_beat,
+    data = UInt(0))
+
+  val put_acked = Reg(init = UInt(0, 2))
+
+  val (ack_cnt, all_acked) = Counter(io.mem.grant.fire(), 2)
+
+  io.mem.acquire.valid := state.isOneOf(s_put, s_putblock)
+  io.mem.acquire.bits := Mux(state === s_put, put_acquire, put_block_acquire)
+  io.mem.grant.ready := (state === s_wait)
+
+  when (state === s_idle && io.start) { state := s_put }
+  when (state === s_put && io.mem.acquire.ready) { state := s_putblock }
+  when (put_block_done) { state := s_wait }
+  when (all_acked) { state := s_finished }
+
+  io.finished := (state === s_finished)
+  io.errored := Bool(false)
+}
+
+/**
+ * Make sure that multiple gets to the same line and beat are merged
+ * correctly, even if it is a cache miss.
+ */
+class MergedGetRegression(implicit p: Parameters) extends Regression()(p) {
+  disableCache()
+
+  val l2params = p.alterPartial({ case CacheName => "L2Bank" })
+  val nSets = l2params(NSets)
+  val nWays = l2params(NWays)
+
+  val (s_idle :: s_put :: s_get :: s_done :: Nil) = Enum(Bits(), 4)
+  val state = Reg(init = s_idle)
+
+  // Write NWays + 1 different conflicting lines to force an eviction of the first line
+  val (put_acq_cnt, put_acq_done) = Counter(state === s_put && io.mem.acquire.fire(), nWays + 1)
+  val (put_gnt_cnt, put_gnt_done) = Counter(state === s_put && io.mem.grant.fire(), nWays + 1)
+  val put_addr = UInt(memStartBlock) + Cat(put_acq_cnt, UInt(0, log2Up(nSets)))
+
+  val (get_acq_cnt, get_acq_done) = Counter(state === s_get && io.mem.acquire.fire(), 2)
+  val (get_gnt_cnt, get_gnt_done) = Counter(state === s_get && io.mem.grant.fire(), 2)
+  val sending = Reg(init = Bool(false))
+
+  when (state === s_idle && io.start) { state := s_put; sending := Bool(true) }
+  when (state === s_put) {
+    when (io.mem.acquire.fire()) { sending := Bool(false) }
+    when (io.mem.grant.fire()) { sending := Bool(true) }
+    when (put_gnt_done) { state := s_get }
+  }
+  when (state === s_get) {
+    when (get_acq_done) { sending := Bool(false) }
+    when (get_gnt_done) { state := s_done }
+  }
+
+  io.mem.acquire.valid := sending
+  io.mem.acquire.bits := Mux(state === s_put,
+    Put(
+      client_xact_id = UInt(0),
+      addr_block = put_addr,
+      addr_beat = UInt(3),
+      data = UInt("hdabb9321")),
+    Get(
+      client_xact_id = get_acq_cnt,
+      addr_block = UInt(memStartBlock),
+      addr_beat = UInt(3)))
+  io.mem.grant.ready := !sending
+
+  val data_mismatch = io.mem.grant.valid && io.mem.grant.bits.hasData() &&
+                      io.mem.grant.bits.data =/= UInt("hdabb9321")
+  assert(!data_mismatch, "RepeatedGetRegression: wrong data back")
+
+  io.finished := state === s_done
+  io.errored := data_mismatch
+}
+
+/**
+ * Make sure that multiple puts to the same line and beat are merged
+ * correctly, even if there is a release from the L1
+ */
+class MergedPutRegression(implicit p: Parameters) extends Regression()(p)
+    with HasTileLinkParameters {
+  val (s_idle :: s_cache_req :: s_cache_wait ::
+       s_put :: s_get :: s_done :: Nil) = Enum(Bits(), 6)
+  val state = Reg(init = s_idle)
+
+  io.cache.req.valid := (state === s_cache_req)
+  io.cache.req.bits.cmd := M_XWR
+  io.cache.req.bits.typ := UInt(log2Ceil(64 / 8))
+  io.cache.req.bits.addr := UInt(memStart)
+  io.cache.req.bits.data := UInt(1)
+  io.cache.req.bits.tag := UInt(0)
+
+  val sending = Reg(init = Bool(false))
+  val delaying = Reg(init = Bool(false))
+  val (put_cnt, put_done) = Counter(io.mem.acquire.fire(), tlMaxClientXacts)
+  val (delay_cnt, delay_done) = Counter(delaying, 8)
+  val put_acked = Reg(UInt(width = 3), init = UInt(0))
+
+  io.mem.acquire.valid := sending && !delaying
+  io.mem.acquire.bits := Mux(state === s_put,
+    Put(
+      client_xact_id = put_cnt,
+      addr_block = UInt(memStartBlock),
+      addr_beat = UInt(0),
+      data = put_cnt + UInt(2)),
+    Get(
+      client_xact_id = UInt(0),
+      addr_block = UInt(memStartBlock),
+      addr_beat = UInt(0)))
+  io.mem.grant.ready := Bool(true)
+
+  when (state === s_idle && io.start) { state := s_cache_req }
+  when (io.cache.req.fire()) { state := s_cache_wait }
+  when (io.cache.resp.valid) { state := s_put; sending := Bool(true) }
+
+  when (io.mem.acquire.fire()) {
+    delaying := Bool(true)
+    when (put_done || state === s_get) { sending := Bool(false) }
+  }
+  when (delay_done) { delaying := Bool(false) }
+
+  when (io.mem.grant.fire()) {
+    when (state === s_put) {
+      put_acked := put_acked | UIntToOH(io.mem.grant.bits.client_xact_id)
+    }
+    when (state === s_get) { state := s_done }
+  }
+
+  when (state === s_put && put_acked.andR) {
+    state := s_get
+    sending := Bool(true)
+  }
+
+  val expected_data = UInt(2 + tlMaxClientXacts - 1)
+  val data_mismatch = io.mem.grant.valid && io.mem.grant.bits.hasData() &&
+    io.mem.grant.bits.data =/= expected_data
+
+  assert(!data_mismatch, "MergedPutRegression: data mismatch")
+
+  io.finished := (state === s_done)
+  io.errored := data_mismatch
+}
+
+object RegressionTests {
+  def cacheRegressions(implicit p: Parameters) = Seq(
+    Module(new PutBlockMergeRegression),
+    Module(new NoAllocPutHitRegression),
+    Module(new RepeatedNoAllocPutRegression),
+    Module(new WriteMaskedPutBlockRegression),
+    Module(new PrefetchHitRegression),
+    Module(new SequentialSameIdGetRegression),
+    Module(new WritebackRegression),
+    Module(new PutBeforePutBlockRegression),
+    Module(new MixedAllocPutRegression),
+    Module(new ReleaseRegression),
+    Module(new MergedGetRegression),
+    Module(new MergedPutRegression))
+  def broadcastRegressions(implicit p: Parameters) = Seq(
+    Module(new IOGetAfterPutBlockRegression),
+    Module(new WriteMaskedPutBlockRegression),
+    Module(new PutBeforePutBlockRegression),
+    Module(new ReleaseRegression))
+}
+
+case object GroundTestRegressions extends Field[Parameters => Seq[Regression]]
+
+class RegressionTest(implicit p: Parameters) extends GroundTest()(p) {
+  val regressions = p(GroundTestRegressions)(p)
+  val regress_idx = Reg(init = UInt(0, log2Up(regressions.size + 1)))
+  val cur_finished = Wire(init = Bool(false))
+  val all_done = (regress_idx === UInt(regressions.size))
+  val start = Reg(init = Bool(true))
+
+  // default output values
+  io.mem.head.acquire.valid := Bool(false)
+  io.mem.head.acquire.bits := GetBlock(
+    client_xact_id = UInt(0),
+    addr_block = UInt(0))
+  io.mem.head.grant.ready := Bool(false)
+  io.cache.head.req.valid := Bool(false)
+  io.cache.head.req.bits.addr := UInt(0)
+  io.cache.head.req.bits.typ := UInt(log2Ceil(64 / 8))
+  io.cache.head.req.bits.cmd := M_XRD
+  io.cache.head.req.bits.tag := UInt(0)
+  io.cache.head.req.bits.phys := Bool(true)
+  io.cache.head.req.bits.data := UInt(0)
+  io.cache.head.invalidate_lr := Bool(false)
+
+  regressions.zipWithIndex.foreach { case (regress, i) =>
+    val me = regress_idx === UInt(i)
+    regress.io.start := me && start
+    regress.io.mem.acquire.ready := io.mem.head.acquire.ready && me
+    regress.io.mem.grant.valid   := io.mem.head.grant.valid && me
+    regress.io.mem.grant.bits    := io.mem.head.grant.bits
+    regress.io.cache.req.ready   := io.cache.head.req.ready && me
+    regress.io.cache.resp.valid  := io.cache.head.resp.valid && me
+    regress.io.cache.resp.bits   := io.cache.head.resp.bits
+
+    when (me) {
+      io.mem.head.acquire.valid := regress.io.mem.acquire.valid
+      io.mem.head.acquire.bits := regress.io.mem.acquire.bits
+      io.mem.head.grant.ready := regress.io.mem.grant.ready
+      io.cache.head.req.valid := regress.io.cache.req.valid
+      io.cache.head.req.bits := regress.io.cache.req.bits
+      io.cache.head.invalidate_lr := regress.io.cache.invalidate_lr
+      io.status.error.valid := regress.io.errored
+      io.status.error.bits := UInt(i)
+      cur_finished := regress.io.finished
+    }
+
+    when (regress.io.start) {
+      printf(s"Starting regression ${regress.getClass.getSimpleName}\n")
+    }
+  }
+
+  when (cur_finished && !all_done) {
+    start := Bool(true)
+    regress_idx := regress_idx + UInt(1)
+  }
+  when (start) { start := Bool(false) }
+
+  val timeout = Timer(5000, start, cur_finished)
+  assert(!timeout, "Regression timed out")
+
+  io.status.finished := all_done
+  io.status.timeout.valid := timeout
+  io.status.timeout.bits := UInt(0)
+
+  assert(!(all_done && io.mem.head.grant.valid),
+    "Getting grant after test completion")
+
+  when (all_done) {
+    io.status.error.valid := io.mem.head.grant.valid
+    io.status.error.bits := UInt(regressions.size)
+  }
+}
--- a/src/main/scala/groundtest/Tile.scala
+++ b/src/main/scala/groundtest/Tile.scala
@ -0,0 +1,139 @@
+package groundtest
+
+import Chisel._
+import rocket._
+import uncore.tilelink._
+import junctions._
+import scala.util.Random
+import scala.collection.mutable.ListBuffer
+import cde.{Parameters, Field}
+
+case object BuildGroundTest extends Field[Parameters => GroundTest]
+
+case class GroundTestTileSettings(
+  uncached: Int = 0, cached: Int = 0, ptw: Int = 0, maxXacts: Int = 1)
+case object GroundTestKey extends Field[Seq[GroundTestTileSettings]]
+case object GroundTestId extends Field[Int]
+
+trait HasGroundTestConstants {
+  val timeoutCodeBits = 4
+  val errorCodeBits = 4
+}
+
+trait HasGroundTestParameters extends HasAddrMapParameters {
+  implicit val p: Parameters
+  val tileId = p(GroundTestId)
+  val tileSettings = p(GroundTestKey)(tileId)
+  val nUncached = tileSettings.uncached
+  val nCached = tileSettings.cached
+  val nPTW = tileSettings.ptw
+  val memStart = addrMap("mem").start
+  val memStartBlock = memStart >> p(CacheBlockOffsetBits)
+}
+
+class DummyPTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
+  val io = new Bundle {
+    val requestors = Vec(n, new TLBPTWIO).flip
+  }
+
+  val req_arb = Module(new RRArbiter(new PTWReq, n))
+  req_arb.io.in <> io.requestors.map(_.req)
+  req_arb.io.out.ready := Bool(true)
+
+  def vpn_to_ppn(vpn: UInt): UInt = vpn(ppnBits - 1, 0)
+
+  class QueueChannel extends ParameterizedBundle()(p) {
+    val ppn = UInt(width = ppnBits)
+    val chosen = UInt(width = log2Up(n))
+  }
+
+  val s1_ppn = vpn_to_ppn(req_arb.io.out.bits.addr)
+  val s2_ppn = RegEnable(s1_ppn, req_arb.io.out.valid)
+  val s2_chosen = RegEnable(req_arb.io.chosen, req_arb.io.out.valid)
+  val s2_valid = Reg(next = req_arb.io.out.valid)
+
+  val s2_resp = Wire(new PTWResp)
+  s2_resp.pte.ppn := s2_ppn
+  s2_resp.pte.reserved_for_software := UInt(0)
+  s2_resp.pte.d := Bool(true)
+  s2_resp.pte.a := Bool(false)
+  s2_resp.pte.g := Bool(false)
+  s2_resp.pte.u := Bool(true)
+  s2_resp.pte.r := Bool(true)
+  s2_resp.pte.w := Bool(true)
+  s2_resp.pte.x := Bool(false)
+  s2_resp.pte.v := Bool(true)
+
+  io.requestors.zipWithIndex.foreach { case (requestor, i) =>
+    requestor.resp.valid := s2_valid && s2_chosen === UInt(i)
+    requestor.resp.bits := s2_resp
+    requestor.status.vm := UInt("b01000")
+    requestor.status.prv := UInt(PRV.S)
+    requestor.invalidate := Bool(false)
+  }
+}
+
+class GroundTestStatus extends Bundle with HasGroundTestConstants {
+  val finished = Bool(OUTPUT)
+  val timeout = Valid(UInt(width = timeoutCodeBits))
+  val error = Valid(UInt(width = errorCodeBits))
+}
+
+class GroundTestIO(implicit val p: Parameters) extends ParameterizedBundle()(p)
+    with HasGroundTestParameters {
+  val cache = Vec(nCached, new HellaCacheIO)
+  val mem = Vec(nUncached, new ClientUncachedTileLinkIO)
+  val ptw = Vec(nPTW, new TLBPTWIO)
+  val status = new GroundTestStatus
+}
+
+abstract class GroundTest(implicit val p: Parameters) extends Module
+    with HasGroundTestParameters {
+  val io = new GroundTestIO
+}
+
+class GroundTestTile(resetSignal: Bool)
+                    (implicit val p: Parameters)
+                    extends Tile(resetSignal = resetSignal)(p)
+                    with HasGroundTestParameters {
+
+  override val io = new TileIO {
+    val success = Bool(OUTPUT)
+  }
+
+  val test = p(BuildGroundTest)(dcacheParams)
+
+  val ptwPorts = ListBuffer.empty ++= test.io.ptw
+  val memPorts = ListBuffer.empty ++= test.io.mem
+
+  if (nCached > 0) {
+    val dcache_io = HellaCache(p(DCacheKey))(dcacheParams)
+    val dcacheArb = Module(new HellaCacheArbiter(nCached)(dcacheParams))
+
+    dcacheArb.io.requestor.zip(test.io.cache).foreach {
+      case (requestor, cache) =>
+        val dcacheIF = Module(new SimpleHellaCacheIF()(dcacheParams))
+        dcacheIF.io.requestor <> cache
+        requestor <> dcacheIF.io.cache
+    }
+    dcache_io.cpu <> dcacheArb.io.mem
+    io.cached.head <> dcache_io.mem
+
+    // SimpleHellaCacheIF leaves invalidate_lr dangling, so we wire it to false
+    dcache_io.cpu.invalidate_lr := Bool(false)
+
+    ptwPorts += dcache_io.ptw
+  }
+
+  if (ptwPorts.size > 0) {
+    val ptw = Module(new DummyPTW(ptwPorts.size))
+    ptw.io.requestors <> ptwPorts
+  }
+
+  require(memPorts.size == io.uncached.size)
+  if (memPorts.size > 0) {
+    io.uncached <> memPorts
+  }
+
+  io.success := test.io.status.finished
+}
--- a/src/main/scala/groundtest/TraceGen.scala
+++ b/src/main/scala/groundtest/TraceGen.scala
@ -0,0 +1,629 @@
+// This file was originally written by Matthew Naylor, University of
+// Cambridge, based on code already present in the groundtest repo.
+//
+// This software was partly developed by the University of Cambridge
+// Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+// ("CTSRD"), as part of the DARPA CRASH research programme.
+// 
+// This software was partly developed by the University of Cambridge
+// Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
+// ("MRC2"), as part of the DARPA MRC research programme.
+// 
+// This software was partly developed by the University of Cambridge
+// Computer Laboratory as part of the Rigorous Engineering of
+// Mainstream Systems (REMS) project, funded by EPSRC grant
+// EP/K008528/1.
+
+package groundtest
+ 
+import Chisel._
+import uncore.tilelink._
+import uncore.constants._
+import uncore.devices.NTiles
+import junctions._
+import rocket._
+import scala.util.Random
+import cde.{Parameters, Field}
+
+// =======
+// Outline
+// =======
+
+// Generate memory traces that result from random sequences of memory
+// operations.  These traces can then be validated by an external
+// tool.  A trace is a simply sequence of memory requests and
+// responses.
+
+// ==========================
+// Trace-generator parameters
+// ==========================
+
+// Compile-time parameters:
+//
+//   * The id of the generator (there may be more than one in a
+//     multi-core system).
+//
+//   * The total number of generators present in the system.
+//
+//   * The desired number of requests to be sent by each generator.
+//
+//   * A bag of physical addresses, shared by all cores, from which an
+//     address can be drawn when generating a fresh request.
+//
+//   * A number of random 'extra addresses', local to each core, from
+//     which an address can be drawn when generating a fresh request.
+//     (This is a way to generate a wider range of addresses without having
+//     to repeatedly recompile with a different address bag.)
+
+case object AddressBag extends Field[List[BigInt]]
+
+trait HasTraceGenParams {
+  implicit val p: Parameters
+  val numGens             = p(NTiles)
+  val numBitsInId         = log2Up(numGens)
+  val numReqsPerGen       = p(GeneratorKey).maxRequests
+  val memRespTimeout      = 8192
+  val numBitsInWord       = p(XLen)
+  val numBytesInWord      = numBitsInWord / 8
+  val numBitsInWordOffset = log2Up(numBytesInWord)
+  val addressBag          = p(AddressBag)
+  val addressBagLen       = addressBag.length
+  val logAddressBagLen    = log2Up(addressBagLen)
+  val genExtraAddrs       = false
+  val logNumExtraAddrs    = 1
+  val numExtraAddrs       = 1 << logNumExtraAddrs
+  val maxTags             = 8
+
+  require(numBytesInWord * 8 == numBitsInWord)
+  require((1 << logAddressBagLen) == addressBagLen)
+}
+
+// ============
+// Trace format
+// ============
+
+// Let <id>   denote a generator id;
+//     <addr> denote an address (in hex);
+//     <data> denote a value that is stored at an address;
+//     <tag>  denote a unique request/response id;
+// and <time> denote an integer representing a cycle-count.
+
+// Each line in the trace takes one of the following formats.
+//
+//   <id>: load-req                <addr> #<tag> @<time>
+//   <id>: load-reserve-req        <addr> #<tag> @<time>
+//   <id>: store-req        <data> <addr> #<tag> @<time>
+//   <id>: store-cond-req   <data> <addr> #<tag> @<time>
+//   <id>: swap-req         <data> <addr> #<tag> @<time>
+//   <id>: resp             <data>        #<tag> @<time>
+//   <id>: fence-req                             @<time>
+//   <id>: fence-resp                            @<time>
+
+// NOTE: The (address, value) pair of every generated store is unique,
+// i.e. the same value is never written to the same address twice.
+// This aids trace validation.
+
+// ============
+// Random seeds
+// ============
+
+// The generator employs "unitialised registers" to seed its PRNGs;
+// these are randomly initialised by the C++ backend.  This means that
+// the "-s" command-line argument to the Rocket emulator can be used
+// to generate new traces, or to replay specific ones.
+
+// ===========
+// Tag manager
+// ===========
+
+//  This is used to obtain unique tags for memory requests: each
+//  request must carry a unique tag since responses can come back
+//  out-of-order.
+//
+//  The tag manager can be viewed as a set of tags.  The user can take
+//  a tag out of the set (if there is one available) and later put it
+//  back.
+
+class TagMan(val logNumTags : Int) extends Module {
+  val io = new Bundle {
+    // Is there a tag available?
+    val available = Bool(OUTPUT)
+    // If so, which one?
+    val tagOut    = UInt(OUTPUT, logNumTags)
+    // User pulses this to take the currently available tag
+    val take      = Bool(INPUT)
+    // User pulses this to put a tag back
+    val put       = Bool(INPUT)
+    // And the tag put back is
+    val tagIn     = UInt(INPUT, logNumTags)
+  }
+
+  // Total number of tags available
+  val numTags = 1 << logNumTags
+
+  // For each tag, record whether or not it is in use
+  val inUse = List.fill(numTags)(Reg(init = Bool(false)))
+
+  // Mapping from each tag to its 'inUse' bit
+  val inUseMap = (0 to numTags-1).map(i => UInt(i)).zip(inUse)
+
+  // Next tag to offer
+  val nextTag = Reg(init = UInt(0, logNumTags))
+  io.tagOut := nextTag
+
+  // Is the next tag available?
+  io.available := ~MuxLookup(nextTag, Bool(true), inUseMap)
+
+  // When user takes a tag
+  when (io.take) {
+    for ((i, b) <- inUseMap) {
+      when (i === nextTag) { b := Bool(true) }
+    }
+    nextTag := nextTag + UInt(1)
+  }
+
+  // When user puts a tag back
+  when (io.put) {
+    for ((i, b) <- inUseMap) {
+      when (i === io.tagIn) { b := Bool(false) }
+    }
+  }
+}
+
+// ===============
+// Trace generator
+// ===============
+
+class TraceGenerator(id: Int)
+    (implicit p: Parameters) extends L1HellaCacheModule()(p)
+                                with HasTraceGenParams {
+  val io = new Bundle {
+    val finished = Bool(OUTPUT)
+    val timeout = Bool(OUTPUT)
+    val mem = new HellaCacheIO
+  }
+
+  val reqTimer = Module(new Timer(8192, maxTags))
+  reqTimer.io.start.valid := io.mem.req.fire()
+  reqTimer.io.start.bits := io.mem.req.bits.tag
+  reqTimer.io.stop.valid := io.mem.resp.valid
+  reqTimer.io.stop.bits := io.mem.resp.bits.tag
+
+  assert(!reqTimer.io.timeout.valid, s"TraceGen core ${id}: request timed out")
+
+  // Random addresses
+  // ----------------
+  
+  // Address bag, shared by all cores, taken from module parameters.
+  // In addition, there is a per-core random selection of extra addresses.
+
+  val addrHashMap = p(GlobalAddrMap)
+  val baseAddr = addrHashMap("mem").start + 0x01000000
+
+  val bagOfAddrs = addressBag.map(x => UInt(x, numBitsInWord))
+
+  val extraAddrs = (0 to numExtraAddrs-1).
+                   map(i => Reg(UInt(width = 16)))
+
+  // A random index into the address bag.
+
+  val randAddrBagIndex = LCG(logAddressBagLen)
+
+  // A random address from the address bag.
+
+  val addrBagIndices = (0 to addressBagLen-1).
+                    map(i => UInt(i, logAddressBagLen))
+  
+  val randAddrFromBag = MuxLookup(randAddrBagIndex, UInt(0),
+                          addrBagIndices.zip(bagOfAddrs))
+
+  // Random address from the address bag or the extra addresses.
+
+  val randAddr =
+        if (! genExtraAddrs) {
+          randAddrFromBag
+        }
+        else {
+          // A random index into the extra addresses.
+
+          val randExtraAddrIndex = LCG(logNumExtraAddrs)
+
+          // A random address from the extra addresses.
+
+          val extraAddrIndices = (0 to numExtraAddrs-1).
+                                 map(i => UInt(i, logNumExtraAddrs))
+  
+          val randAddrFromExtra = Cat(UInt(0),
+                MuxLookup(randExtraAddrIndex, UInt(0),
+                  extraAddrIndices.zip(extraAddrs)), UInt(0, 3))
+
+          Frequency(List(
+            (1, randAddrFromBag),
+            (1, randAddrFromExtra)))
+        }
+
+  // Random opcodes
+  // --------------
+ 
+  // Generate random opcodes for memory operations according to the
+  // given frequency distribution.
+
+  // Opcodes
+  val (opNop   :: opLoad :: opStore ::
+       opFence :: opLRSC :: opSwap  ::
+       opDelay :: Nil) = Enum(Bits(), 7)
+
+  // Distribution specified as a list of (frequency,value) pairs.
+  // NOTE: frequencies must sum to a power of two.
+
+  val randOp = Frequency(List(
+    (10, opLoad),
+    (10, opStore),
+    (4,  opFence),
+    (3,  opLRSC),
+    (3,  opSwap),
+    (2,  opDelay)))
+
+  // Request/response tags
+  // ---------------------
+
+  // Responses may come back out-of-order.  Each request and response
+  // therefore contains a unique 7-bit identifier, referred to as a
+  // "tag", used to match each response with its corresponding request.
+
+  // Create a tag manager giving out unique 3-bit tags
+  val tagMan = Module(new TagMan(log2Ceil(maxTags)))
+
+  // Default inputs
+  tagMan.io.take  := Bool(false);
+  tagMan.io.put   := Bool(false);
+  tagMan.io.tagIn := UInt(0);
+
+  // Cycle counter
+  // -------------
+
+  // 32-bit cycle count used to record send-times of requests and
+  // receive-times of respones.
+
+  val cycleCount = Reg(init = UInt(0, 32))
+  cycleCount := cycleCount + UInt(1);
+
+  // Delay timer
+  // -----------
+
+  // Used to implement the delay operation and to insert random
+  // delays between load-reserve and store-conditional commands.
+
+  // A 16-bit timer is plenty
+  val delayTimer = Module(new DynamicTimer(16))
+
+  // Used to generate a random delay period
+  val randDelayBase = LCG16()
+
+  // Random delay period: usually small, occasionally big
+  val randDelay = Frequency(List(
+    (14, UInt(0, 13) ## randDelayBase(2, 0)),
+    (2,  UInt(0, 11) ## randDelayBase(5, 0))))
+
+  // Default inputs
+  delayTimer.io.start  := Bool(false)
+  delayTimer.io.period := randDelay
+  delayTimer.io.stop   := Bool(false)
+
+  // Operation dispatch
+  // ------------------
+
+  // Hardware thread id
+  val tid = UInt(id, numBitsInId)
+
+  // Request & response count
+  val reqCount  = Reg(init = UInt(0, 32))
+  val respCount = Reg(init = UInt(0, 32))
+
+  // Current operation being executed
+  val currentOp = Reg(init = opNop)
+
+  // If larger than 0, a multi-cycle operation is in progress.
+  // Value indicates stage of progress.
+  val opInProgress = Reg(init = UInt(0, 2))
+
+  // Indicate when a fresh request is to be sent
+  val sendFreshReq = Wire(Bool())
+  sendFreshReq := Bool(false)
+
+  // Used to generate unique data values
+  val nextData = Reg(init = UInt(1, numBitsInWord-numBitsInId))
+
+  // Registers for all the interesting parts of a request
+  val reqValid = Reg(init = Bool(false))
+  val reqAddr  = Reg(init = UInt(0, numBitsInWord))
+  val reqData  = Reg(init = UInt(0, numBitsInWord))
+  val reqCmd   = Reg(init = UInt(0, 5))
+  val reqTag   = Reg(init = UInt(0, 7))
+
+   // Condition on being allowed to send a fresh request
+  val canSendFreshReq = (!reqValid || io.mem.req.fire()) &&
+                          tagMan.io.available
+
+  // Operation dispatch
+  when (reqCount < UInt(numReqsPerGen)) {
+
+    // No-op
+    when (currentOp === opNop) {
+      // Move on to a new operation
+      currentOp := randOp
+    }
+
+    // Fence
+    when (currentOp === opFence) {
+      when (opInProgress === UInt(0) && !reqValid) {
+        // Emit fence request
+        printf("%d: fence-req @%d\n", tid, cycleCount)
+        // Multi-cycle operation now in progress
+        opInProgress := UInt(1)
+      }
+      // Wait until all requests have had a response
+      .elsewhen (reqCount === respCount) {
+        // Emit fence response
+        printf("%d: fence-resp @%d\n", tid, cycleCount)
+        // Move on to a new operation
+        currentOp := randOp
+        // Operation finished
+        opInProgress := UInt(0)
+      }
+    }
+
+    // Delay
+    when (currentOp === opDelay) {
+      when (opInProgress === UInt(0)) {
+        // Start timer
+        delayTimer.io.start := Bool(true)
+        // Multi-cycle operation now in progress
+        opInProgress := UInt(1)
+      }
+      .elsewhen (delayTimer.io.timeout) {
+        // Move on to a new operation
+        currentOp := randOp
+        // Operation finished
+        opInProgress := UInt(0)
+      }
+    }
+  
+    // Load, store, or atomic swap
+    when (currentOp === opLoad  ||
+          currentOp === opStore ||
+          currentOp === opSwap) {
+      when (canSendFreshReq) {
+        // Set address
+        reqAddr := randAddr
+        // Set command
+        when (currentOp === opLoad) {
+          reqCmd := M_XRD
+        } .elsewhen (currentOp === opStore) {
+          reqCmd := M_XWR
+        } .elsewhen (currentOp === opSwap) {
+          reqCmd := M_XA_SWAP
+        }
+        // Send request
+        sendFreshReq := Bool(true)
+        // Move on to a new operation
+        currentOp := randOp
+      }
+    }
+  
+    // Load-reserve and store-conditional
+    // First issue an LR, then delay, then issue an SC
+    when (currentOp === opLRSC) {
+      // LR request has not yet been sent
+      when (opInProgress === UInt(0)) {
+        when (canSendFreshReq) {
+          // Set address and command
+          reqAddr := randAddr
+          reqCmd  := M_XLR
+          // Send request
+          sendFreshReq := Bool(true)
+          // Multi-cycle operation now in progress
+          opInProgress := UInt(1)
+        }
+      }
+      // LR request has been sent, start delay timer
+      when (opInProgress === UInt(1)) {
+        // Start timer
+        delayTimer.io.start := Bool(true)
+        // Indicate that delay has started
+        opInProgress := UInt(2)
+      }
+      // Delay in progress
+      when (opInProgress === UInt(2)) {
+        when (delayTimer.io.timeout) {
+          // Delay finished
+          opInProgress := UInt(3)
+        }
+      }
+      // Delay finished, send SC request
+      when (opInProgress === UInt(3)) {
+        when (canSendFreshReq) {
+          // Set command, but leave address
+          // i.e. use same address as LR did
+          reqCmd  := M_XSC
+          // Send request
+          sendFreshReq := Bool(true)
+          // Multi-cycle operation finished
+          opInProgress := UInt(0)
+          // Move on to a new operation
+          currentOp := randOp
+        }
+      }
+    }
+  }
+
+  // Sending of requests
+  // -------------------
+
+  when (sendFreshReq) {
+    // Grab a unique tag for the request
+    reqTag := tagMan.io.tagOut
+    tagMan.io.take := Bool(true)
+    // Fill in unique data
+    reqData := Cat(nextData, tid)
+    nextData := nextData + UInt(1)
+    // Request is good to go!
+    reqValid := Bool(true)
+    // Increment request count
+    reqCount := reqCount + UInt(1)
+  }
+  .elsewhen (io.mem.req.fire()) {
+    // Request has been sent and there is no new request ready
+    reqValid := Bool(false)
+  }
+
+  // Wire up interface to memory
+  io.mem.req.valid     := reqValid
+  io.mem.req.bits.addr := reqAddr
+  io.mem.req.bits.data := reqData
+  io.mem.req.bits.typ  := UInt(log2Ceil(numBytesInWord))
+  io.mem.req.bits.cmd  := reqCmd
+  io.mem.req.bits.tag  := reqTag
+
+  // On cycle when request is actually sent, print it
+  when (io.mem.req.fire()) {
+    // Short-hand for address
+    val addr = io.mem.req.bits.addr
+    // Print thread id
+    printf("%d:", tid)
+    // Print command
+    when (reqCmd === M_XRD) {
+      printf(" load-req 0x%x", addr)
+    }
+    when (reqCmd === M_XLR) {
+      printf(" load-reserve-req 0x%x", addr)
+    }
+    when (reqCmd === M_XWR) {
+      printf(" store-req %d 0x%x", reqData, addr)
+    }
+    when (reqCmd === M_XSC) {
+      printf(" store-cond-req %d 0x%x", reqData, addr)
+    }
+    when (reqCmd === M_XA_SWAP) {
+      printf(" swap-req %d 0x%x", reqData, addr)
+    }
+    // Print tag
+    printf(" #%d", reqTag)
+    // Print time
+    printf(" @%d\n", cycleCount)
+  }
+
+  // Handling of responses
+  // ---------------------
+
+  // When a response is received
+  when (io.mem.resp.valid) {
+    // Put tag back in tag set
+    tagMan.io.tagIn := io.mem.resp.bits.tag
+    tagMan.io.put   := Bool(true)
+    // Print response
+    printf("%d: resp %d #%d @%d\n", tid,
+      io.mem.resp.bits.data, io.mem.resp.bits.tag, cycleCount)
+    // Increment response count
+    respCount := respCount + UInt(1)
+  }
+
+  // Termination condition
+  // ---------------------
+
+  val done = reqCount  === UInt(numReqsPerGen) &&
+             respCount === UInt(numReqsPerGen)
+
+  val donePulse = done && !Reg(init = Bool(false), next = done)
+
+  // Emit that this thread has completed
+  when (donePulse) {
+    printf(s"FINISHED ${numGens}\n")
+  }
+
+  io.finished := Bool(false)
+  io.timeout := reqTimer.io.timeout.valid
+}
+
+class NoiseGenerator(implicit val p: Parameters) extends Module
+    with HasTraceGenParams
+    with HasTileLinkParameters {
+  val io = new Bundle {
+    val mem = new ClientUncachedTileLinkIO
+    val finished = Bool(INPUT)
+  }
+
+  val idBits = tlClientXactIdBits
+  val xact_id_free = Reg(UInt(width = idBits), init = ~UInt(0, idBits))
+  val xact_id_onehot = PriorityEncoderOH(xact_id_free)
+
+  val timer = Module(new DynamicTimer(8))
+  timer.io.start := io.mem.acquire.fire()
+  timer.io.period := LCG(8, io.mem.acquire.fire())
+  timer.io.stop := Bool(false)
+
+  val s_start :: s_send :: s_wait :: s_done :: Nil = Enum(Bits(), 4)
+  val state = Reg(init = s_start)
+
+  when (state === s_start) { state := s_send }
+  when (io.mem.acquire.fire()) { state := s_wait }
+  when (state === s_wait) {
+    when (timer.io.timeout) { state := s_send }
+    when (io.finished) { state := s_done }
+  }
+
+  val acq_id = OHToUInt(xact_id_onehot)
+  val gnt_id = io.mem.grant.bits.client_xact_id
+
+  xact_id_free := (xact_id_free &
+                    ~Mux(io.mem.acquire.fire(), xact_id_onehot, UInt(0))) |
+                    Mux(io.mem.grant.fire(), UIntToOH(gnt_id), UInt(0))
+
+  val tlBlockOffset = tlBeatAddrBits + tlByteAddrBits
+  val addr_idx = LCG(logAddressBagLen, io.mem.acquire.fire())
+  val addr_bag = Vec(addressBag.map(
+    addr => UInt(addr >> tlBlockOffset, tlBlockAddrBits)))
+  val addr_block = addr_bag(addr_idx)
+  val addr_beat = LCG(tlBeatAddrBits, io.mem.acquire.fire())
+  val acq_select = LCG(1, io.mem.acquire.fire())
+
+  val get_acquire = Get(
+    client_xact_id = acq_id,
+    addr_block = addr_block,
+    addr_beat = addr_beat)
+  val put_acquire = Put(
+    client_xact_id = acq_id,
+    addr_block = addr_block,
+    addr_beat = addr_beat,
+    data = UInt(0),
+    wmask = Some(UInt(0)))
+
+  io.mem.acquire.valid := (state === s_send) && xact_id_free.orR
+  io.mem.acquire.bits := Mux(acq_select(0), get_acquire, put_acquire)
+  io.mem.grant.ready := !xact_id_free(gnt_id)
+}
+
+// =======================
+// Trace-generator wrapper
+// =======================
+
+class GroundTestTraceGenerator(implicit p: Parameters)
+    extends GroundTest()(p) with HasTraceGenParams {
+
+  require(io.mem.size <= 1)
+  require(io.cache.size == 1)
+
+  val traceGen = Module(new TraceGenerator(p(GroundTestId)))
+  io.cache.head <> traceGen.io.mem
+
+  if (io.mem.size == 1) {
+    val noiseGen = Module(new NoiseGenerator)
+    io.mem.head <> noiseGen.io.mem
+    noiseGen.io.finished := traceGen.io.finished
+  }
+
+  io.status.finished := traceGen.io.finished
+  io.status.timeout.valid := traceGen.io.timeout
+  io.status.timeout.bits := UInt(0)
+  io.status.error.valid := Bool(false)
+}
--- a/src/main/scala/groundtest/Util.scala
+++ b/src/main/scala/groundtest/Util.scala
@ -0,0 +1,194 @@
+package groundtest
+
+import Chisel._
+
+// =============
+// Dynamic timer
+// =============
+
+// Timer with a dynamically-settable period.
+
+class DynamicTimer(w: Int) extends Module {
+  val io = new Bundle {
+    val start   = Bool(INPUT)
+    val period  = UInt(INPUT, w)
+    val stop    = Bool(INPUT)
+    val timeout = Bool(OUTPUT)
+  }
+
+  val countdown = Reg(init = UInt(0, w))
+  val active = Reg(init = Bool(false))
+
+  when (io.start) {
+    countdown := io.period
+    active := Bool(true)
+  } .elsewhen (io.stop || countdown === UInt(0)) {
+    active := Bool(false)
+  } .elsewhen (active) {
+    countdown := countdown - UInt(1)
+  }
+
+  io.timeout := countdown === UInt(0) && active
+}
+
+// ============
+// LCG16 module
+// ============
+
+// A 16-bit psuedo-random generator based on a linear conguential
+// generator (LCG).  The state is stored in an unitialised register.
+// When using the C++ backend, it is straigtforward to arrange a
+// random initial value for each uninitialised register, effectively
+// seeding each LCG16 instance with a different seed.
+
+class LCG16 extends Module { 
+  val io = new Bundle { 
+    val out = UInt(OUTPUT, 16) 
+    val inc = Bool(INPUT)
+  } 
+  val state = Reg(UInt(width = 32))
+  when (io.inc) {
+    state := state * UInt(1103515245, 32) + UInt(12345, 32)
+  }
+  io.out := state(30, 15)
+} 
+ 
+// ==========
+// LCG module
+// ==========
+
+// An n-bit psuedo-random generator made from many instances of a
+// 16-bit LCG.  Parameter 'width' must be larger than 0.
+
+class LCG(val w: Int) extends Module {
+  val io = new Bundle { 
+    val out = UInt(OUTPUT, w) 
+    val inc = Bool(INPUT)
+  } 
+  require(w > 0)
+  val numLCG16s : Int = (w+15)/16
+  val outs = Seq.fill(numLCG16s) { LCG16(io.inc) }
+  io.out := Cat(outs)
+}
+
+object LCG16 {
+  def apply(inc: Bool = Bool(true)): UInt = {
+    val lcg = Module(new LCG16)
+    lcg.io.inc := inc
+    lcg.io.out
+  }
+}
+
+object LCG {
+  def apply(w: Int, inc: Bool = Bool(true)): UInt = {
+    val lcg = Module(new LCG(w))
+    lcg.io.inc := inc
+    lcg.io.out
+  }
+}
+
+// ======================
+// Frequency distribution
+// ======================
+
+// Given a list of (frequency, value) pairs, return a random value
+// according to the frequency distribution.  The sum of the
+// frequencies in the distribution must be a power of two.
+
+object Frequency {
+  def apply(dist : List[(Int, Bits)]) : Bits = {
+    // Distribution must be non-empty
+    require(dist.length > 0)
+
+    // Require that the frequencies sum to a power of two
+    val (freqs, vals) = dist.unzip
+    val total = freqs.sum
+    require(isPow2(total))
+
+    // First item in the distribution
+    val (firstFreq, firstVal) = dist.head
+
+    // Result wire
+    val result = Wire(Bits(width = firstVal.getWidth))
+    result := UInt(0)
+
+    // Random value
+    val randVal = LCG(log2Up(total))
+
+    // Pick return value
+    var count = firstFreq
+    var select = when (randVal < UInt(firstFreq)) { result := firstVal }
+    for (p <- dist.drop(1)) {
+      count = count + p._1
+      select = select.elsewhen(randVal < UInt(count)) { result := p._2 }
+    }
+
+    return result
+  }
+}
+
+object ValidMux {
+  def apply[T <: Data](v1: ValidIO[T], v2: ValidIO[T]*): ValidIO[T] = {
+    apply(v1 +: v2.toSeq)
+  }
+  def apply[T <: Data](valids: Seq[ValidIO[T]]): ValidIO[T] = {
+    val out = Wire(Valid(valids.head.bits))
+    out.valid := valids.map(_.valid).reduce(_ || _)
+    out.bits := MuxCase(valids.head.bits,
+      valids.map(v => (v.valid -> v.bits)))
+    out
+  }
+}
+
+object DebugCombiner {
+  def apply(debugs: Seq[GroundTestStatus]): GroundTestStatus = {
+    val out = Wire(new GroundTestStatus)
+    out.finished := debugs.map(_.finished).reduce(_ && _)
+    out.timeout  := ValidMux(debugs.map(_.timeout))
+    out.error    := ValidMux(debugs.map(_.error))
+    out
+  }
+}
+
+/**
+ * Takes in data on one decoupled interface and broadcasts it to
+ * N decoupled output interfaces
+ */
+class Broadcaster[T <: Data](typ: T, n: Int) extends Module {
+  val io = new Bundle {
+    val in = Decoupled(typ).flip
+    val out = Vec(n, Decoupled(typ))
+  }
+
+  require (n > 0)
+
+  if (n == 1) {
+    io.out.head <> io.in
+  } else {
+    val idx = Reg(init = UInt(0, log2Up(n)))
+    val save = Reg(typ)
+
+    io.out.head.valid := idx === UInt(0) && io.in.valid
+    io.out.head.bits := io.in.bits
+    for (i <- 1 until n) {
+      io.out(i).valid := idx === UInt(i)
+      io.out(i).bits := save
+    }
+    io.in.ready := io.out.head.ready && idx === UInt(0)
+
+    when (io.in.fire()) { save := io.in.bits }
+
+    when (io.out(idx).fire()) {
+      when (idx === UInt(n - 1)) { idx := UInt(0) }
+      .otherwise { idx := idx + UInt(1) }
+    }
+  }
+}
+
+object Broadcaster {
+  def apply[T <: Data](in: DecoupledIO[T], n: Int): Vec[DecoupledIO[T]] = {
+    val split = Module(new Broadcaster(in.bits, n))
+    split.io.in <> in
+    split.io.out
+  }
+}