Merge remote-tracking branch 'junctions/master' into mono-repo

2016-07-28 11:27:47 -07:00
parent 373fd427dc 59d700bf66
commit a5b88d0bdc
17 changed files with 3246 additions and 0 deletions
--- a/junctions/.gitignore
+++ b/junctions/.gitignore
@@ -0,0 +1,17 @@
 *.class
 *.log
 # sbt specific
 .cache
 .history
 .lib/
 dist/*
 target/
 lib_managed/
 src_managed/
 project/boot/
 project/plugins/project/
 # Scala-IDE specific
 .scala_dependencies
 .worksheet
--- a/junctions/LICENSE
+++ b/junctions/LICENSE
@@ -0,0 +1,28 @@
 Copyright (c) 2015, The Regents of the University of California (Regents)
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 * Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.
 * Neither the name of junctions nor the names of its
  contributors may be used to endorse or promote products derived from
  this software without specific prior written permission.
 IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
 SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
 OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS
 BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
 HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
 MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
--- a/junctions/README.md
+++ b/junctions/README.md
@@ -0,0 +1,6 @@
 # junctions
 A repository for peripheral components and IO devices associated with the RocketChip project.
 To uses these modules, include this repo as a git submodule within the your chip repository and add it as Project in your chip's build.scala. These components are only dependent on Chisel, i.e.
    lazy val junctions = project.dependsOn(chisel)
--- a/junctions/build.sbt
+++ b/junctions/build.sbt
@@ -0,0 +1,19 @@
 organization := "edu.berkeley.cs"
 version := "1.0"
 name := "junctions"
 scalaVersion := "2.11.6"
 // Provide a managed dependency on chisel if -DchiselVersion="" is supplied on the command line.
 libraryDependencies ++= (Seq("chisel","cde").map {
  dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten
 site.settings
 site.includeScaladoc()
 ghpages.settings
 git.remoteRepo := "git@github.com:ucb-bar/junctions.git"
--- a/junctions/project/plugins.sbt
+++ b/junctions/project/plugins.sbt
@@ -0,0 +1,5 @@
 resolvers += "jgit-repo" at "http://download.eclipse.org/jgit/maven"
 addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.5.3")
 addSbtPlugin("com.typesafe.sbt" % "sbt-site" % "0.8.1")
--- a/junctions/src/main/scala/addrmap.scala
+++ b/junctions/src/main/scala/addrmap.scala
@@ -0,0 +1,150 @@
 // See LICENSE for license details.
 package junctions
 import Chisel._
 import cde.{Parameters, Field}
 import scala.collection.mutable.HashMap
 case object PAddrBits extends Field[Int]
 case object VAddrBits extends Field[Int]
 case object PgIdxBits extends Field[Int]
 case object PgLevels extends Field[Int]
 case object PgLevelBits extends Field[Int]
 case object ASIdBits extends Field[Int]
 case object PPNBits extends Field[Int]
 case object VPNBits extends Field[Int]
 case object GlobalAddrMap extends Field[AddrMap]
 trait HasAddrMapParameters {
  implicit val p: Parameters
  val paddrBits = p(PAddrBits)
  val vaddrBits = p(VAddrBits)
  val pgIdxBits = p(PgIdxBits)
  val ppnBits = p(PPNBits)
  val vpnBits = p(VPNBits)
  val pgLevels = p(PgLevels)
  val pgLevelBits = p(PgLevelBits)
  val asIdBits = p(ASIdBits)
  val addrMap = p(GlobalAddrMap)
 }
 case class MemAttr(prot: Int, cacheable: Boolean = false)
 sealed abstract class MemRegion {
  def start: BigInt
  def size: BigInt
  def numSlaves: Int
  def attr: MemAttr
  def containsAddress(x: UInt) = UInt(start) <= x && x < UInt(start + size)
 }
 case class MemSize(size: BigInt, attr: MemAttr) extends MemRegion {
  def start = 0
  def numSlaves = 1
 }
 case class MemRange(start: BigInt, size: BigInt, attr: MemAttr) extends MemRegion {
  def numSlaves = 1
 }
 object AddrMapProt {
  val R = 0x1
  val W = 0x2
  val X = 0x4
  val RW = R | W
  val RX = R | X
  val RWX = R | W | X
  val SZ = 3
 }
 class AddrMapProt extends Bundle {
  val x = Bool()
  val w = Bool()
  val r = Bool()
 }
 case class AddrMapEntry(name: String, region: MemRegion)
 object AddrMap {
  def apply(elems: AddrMapEntry*): AddrMap = new AddrMap(elems)
 }
 class AddrMap(entriesIn: Seq[AddrMapEntry], val start: BigInt = BigInt(0)) extends MemRegion {
  def isEmpty = entries.isEmpty
  def length = entries.size
  def numSlaves = entries.map(_.region.numSlaves).foldLeft(0)(_ + _)
  def attr = ???
  private val slavePorts = HashMap[String, Int]()
  private val mapping = HashMap[String, MemRegion]()
  val (size: BigInt, entries: Seq[AddrMapEntry]) = {
    var ind = 0
    var base = start
    var rebasedEntries = collection.mutable.ArrayBuffer[AddrMapEntry]()
    for (AddrMapEntry(name, r) <- entriesIn) {
      if (r.start != 0) {
        val align = BigInt(1) << log2Ceil(r.size)
        require(r.start >= base, s"region $name base address 0x${r.start.toString(16)} overlaps previous base 0x${base.toString(16)}")
        require(r.start % align == 0, s"region $name base address 0x${r.start.toString(16)} not aligned to 0x${align.toString(16)}")
        base = r.start
      } else {
        base = (base + r.size - 1) / r.size * r.size
      }
      r match {
        case r: AddrMap =>
          val subMap = new AddrMap(r.entries, base)
          rebasedEntries += AddrMapEntry(name, subMap)
          mapping += name -> subMap
          mapping ++= subMap.mapping.map { case (k, v) => s"$name:$k" -> v }
          slavePorts ++= subMap.slavePorts.map { case (k, v) => s"$name:$k" -> (ind + v) }
        case _ =>
          val e = MemRange(base, r.size, r.attr)
          rebasedEntries += AddrMapEntry(name, e)
          mapping += name -> e
          slavePorts += name -> ind
      }
      ind += r.numSlaves
      base += r.size
    }
    (base - start, rebasedEntries)
  }
  val flatten: Seq[(String, MemRange)] = {
    val arr = new Array[(String, MemRange)](slavePorts.size)
    for ((name, port) <- slavePorts)
      arr(port) = (name, mapping(name).asInstanceOf[MemRange])
    arr
  }
  def apply(name: String): MemRegion = mapping(name)
  def port(name: String): Int = slavePorts(name)
  def subMap(name: String): AddrMap = mapping(name).asInstanceOf[AddrMap]
  def isInRegion(name: String, addr: UInt): Bool = mapping(name).containsAddress(addr)
  def isCacheable(addr: UInt): Bool = {
    flatten.filter(_._2.attr.cacheable).map { case (_, region) =>
      region.containsAddress(addr)
    }.foldLeft(Bool(false))(_ || _)
  }
  def isValid(addr: UInt): Bool = {
    flatten.map { case (_, region) =>
      region.containsAddress(addr)
    }.foldLeft(Bool(false))(_ || _)
  }
  def getProt(addr: UInt): AddrMapProt = {
    val protForRegion = flatten.map { case (_, region) =>
      Mux(region.containsAddress(addr), UInt(region.attr.prot, AddrMapProt.SZ), UInt(0))
    }
    new AddrMapProt().fromBits(protForRegion.reduce(_|_))
  }
 }
--- a/junctions/src/main/scala/atos.scala
+++ b/junctions/src/main/scala/atos.scala
@@ -0,0 +1,333 @@
 package junctions
 import Chisel._
 import scala.math.max
 import cde.{Parameters, Field}
 trait HasAtosParameters extends HasNastiParameters {
  // round up to a multiple of 32
  def roundup(n: Int) = 32 * ((n - 1) / 32 + 1)
  val atosUnionBits = max(
    nastiXIdBits + nastiXDataBits + nastiWStrobeBits + 1,
    nastiXIdBits + nastiXBurstBits +
    nastiXSizeBits + nastiXLenBits + nastiXAddrBits)
  val atosIdBits = nastiXIdBits
  val atosTypBits = 2
  val atosRespBits = nastiXRespBits
  val atosDataBits = nastiXDataBits
  val atosAddrOffset = atosIdBits
  val atosLenOffset = atosIdBits + nastiXAddrBits
  val atosSizeOffset = atosLenOffset + nastiXLenBits
  val atosBurstOffset = atosSizeOffset + nastiXSizeBits
  val atosDataOffset = atosIdBits
  val atosStrobeOffset = nastiXDataBits + atosIdBits
  val atosLastOffset = atosStrobeOffset + nastiWStrobeBits
  val atosRequestBits = roundup(atosTypBits + atosUnionBits)
  val atosResponseBits = roundup(atosTypBits + atosIdBits + atosRespBits + atosDataBits + 1)
  val atosRequestBytes = atosRequestBits / 8
  val atosResponseBytes = atosResponseBits / 8
  val atosRequestWords = atosRequestBytes / 4
  val atosResponseWords = atosResponseBytes / 4
 }
 abstract class AtosModule(implicit val p: Parameters)
  extends Module with HasAtosParameters
 abstract class AtosBundle(implicit val p: Parameters)
  extends ParameterizedBundle()(p) with HasAtosParameters
 object AtosRequest {
  def arType = UInt("b00")
  def awType = UInt("b01")
  def wType  = UInt("b10")
  def apply(typ: UInt, union: UInt)(implicit p: Parameters): AtosRequest = {
    val areq = Wire(new AtosRequest)
    areq.typ := typ
    areq.union := union
    areq
  }
  def apply(ar: NastiReadAddressChannel)(implicit p: Parameters): AtosRequest =
    apply(arType, Cat(ar.burst, ar.size, ar.len, ar.addr, ar.id))
  def apply(aw: NastiWriteAddressChannel)(implicit p: Parameters): AtosRequest =
    apply(awType, Cat(aw.burst, aw.size, aw.len, aw.addr, aw.id))
  def apply(w: NastiWriteDataChannel)(implicit p: Parameters): AtosRequest =
    apply(wType, Cat(w.last, w.strb, w.data, w.id))
 }
 class AtosRequest(implicit p: Parameters)
    extends AtosBundle()(p) with Serializable {
  val typ = UInt(width = atosTypBits)
  val union = UInt(width = atosUnionBits)
  def burst(dummy: Int = 0) =
    union(atosUnionBits - 1, atosBurstOffset)
  def size(dummy: Int = 0) =
    union(atosBurstOffset - 1, atosSizeOffset)
  def len(dummy: Int = 0) =
    union(atosSizeOffset - 1, atosLenOffset)
  def addr(dummy: Int = 0) =
    union(atosLenOffset - 1, atosAddrOffset)
  def id(dummy: Int = 0) =
    union(atosIdBits - 1, 0)
  def data(dummy: Int = 0) =
    union(atosStrobeOffset - 1, atosDataOffset)
  def strb(dummy: Int = 0) =
    union(atosLastOffset - 1, atosStrobeOffset)
  def last(dummy: Int = 0) =
    union(atosLastOffset)
  def has_addr(dummy: Int = 0) =
    typ === AtosRequest.arType || typ === AtosRequest.awType
  def has_data(dummy: Int = 0) =
    typ === AtosRequest.wType
  def is_last(dummy: Int = 0) =
    typ === AtosRequest.arType || (typ === AtosRequest.wType && last())
  def nbits: Int = atosRequestBits
  def resp_len(dummy: Int = 0) =
    MuxLookup(typ, UInt(0), Seq(
      AtosRequest.arType -> (len() + UInt(1)),
      AtosRequest.awType -> UInt(1)))
 }
 object AtosResponse {
  def rType = UInt("b00")
  def bType = UInt("b01")
  def apply(typ: UInt, id: UInt, resp: UInt, data: UInt, last: Bool)
      (implicit p: Parameters): AtosResponse = {
    val aresp = Wire(new AtosResponse)
    aresp.typ := typ
    aresp.id := id
    aresp.resp := resp
    aresp.data := data
    aresp.last := last
    aresp
  }
  def apply(r: NastiReadDataChannel)(implicit p: Parameters): AtosResponse =
    apply(rType, r.id, r.resp, r.data, r.last)
  def apply(b: NastiWriteResponseChannel)(implicit p: Parameters): AtosResponse =
    apply(bType, b.id, b.resp, UInt(0), Bool(false))
 }
 class AtosResponse(implicit p: Parameters)
    extends AtosBundle()(p) with Serializable {
  val typ = UInt(width = atosTypBits)
  val id = UInt(width = atosIdBits)
  val resp = UInt(width = atosRespBits)
  val last = Bool()
  val data = UInt(width = atosDataBits)
  def has_data(dummy: Int = 0) = typ === AtosResponse.rType
  def is_last(dummy: Int = 0) = !has_data() || last
  def nbits: Int = atosResponseBits
 }
 class AtosIO(implicit p: Parameters) extends AtosBundle()(p) {
  val req = Decoupled(new AtosRequest)
  val resp = Decoupled(new AtosResponse).flip
 }
 class AtosRequestEncoder(implicit p: Parameters) extends AtosModule()(p) {
  val io = new Bundle {
    val ar = Decoupled(new NastiReadAddressChannel).flip
    val aw = Decoupled(new NastiWriteAddressChannel).flip
    val w  = Decoupled(new NastiWriteDataChannel).flip
    val req = Decoupled(new AtosRequest)
  }
  val writing = Reg(init = Bool(false))
  io.ar.ready := !writing && io.req.ready
  io.aw.ready := !writing && !io.ar.valid && io.req.ready
  io.w.ready  := writing && io.req.ready
  io.req.valid := Mux(writing, io.w.valid, io.ar.valid || io.aw.valid)
  io.req.bits := Mux(writing, AtosRequest(io.w.bits),
    Mux(io.ar.valid, AtosRequest(io.ar.bits), AtosRequest(io.aw.bits)))
  when (io.aw.fire()) { writing := Bool(true) }
  when (io.w.fire() && io.w.bits.last) { writing := Bool(false) }
 }
 class AtosResponseDecoder(implicit p: Parameters) extends AtosModule()(p) {
  val io = new Bundle {
    val resp = Decoupled(new AtosResponse).flip
    val b = Decoupled(new NastiWriteResponseChannel)
    val r = Decoupled(new NastiReadDataChannel)
  }
  val is_b = io.resp.bits.typ === AtosResponse.bType
  val is_r = io.resp.bits.typ === AtosResponse.rType
  io.b.valid := io.resp.valid && is_b
  io.b.bits := NastiWriteResponseChannel(
    id = io.resp.bits.id,
    resp = io.resp.bits.resp)
  io.r.valid := io.resp.valid && is_r
  io.r.bits := NastiReadDataChannel(
    id = io.resp.bits.id,
    data = io.resp.bits.data,
    last = io.resp.bits.last,
    resp = io.resp.bits.resp)
  io.resp.ready := (is_b && io.b.ready) || (is_r && io.r.ready)
 }
 class AtosClientConverter(implicit p: Parameters) extends AtosModule()(p) {
  val io = new Bundle {
    val nasti = (new NastiIO).flip
    val atos = new AtosIO
  }
  val req_enc = Module(new AtosRequestEncoder)
  req_enc.io.ar <> io.nasti.ar
  req_enc.io.aw <> io.nasti.aw
  req_enc.io.w  <> io.nasti.w
  io.atos.req <> req_enc.io.req
  val resp_dec = Module(new AtosResponseDecoder)
  resp_dec.io.resp <> io.atos.resp
  io.nasti.b <> resp_dec.io.b
  io.nasti.r <> resp_dec.io.r
 }
 class AtosRequestDecoder(implicit p: Parameters) extends AtosModule()(p) {
  val io = new Bundle {
    val req = Decoupled(new AtosRequest).flip
    val ar = Decoupled(new NastiReadAddressChannel)
    val aw = Decoupled(new NastiWriteAddressChannel)
    val w  = Decoupled(new NastiWriteDataChannel)
  }
  val is_ar = io.req.bits.typ === AtosRequest.arType
  val is_aw = io.req.bits.typ === AtosRequest.awType
  val is_w  = io.req.bits.typ === AtosRequest.wType
  io.ar.valid := io.req.valid && is_ar
  io.ar.bits := NastiReadAddressChannel(
    id = io.req.bits.id(),
    addr = io.req.bits.addr(),
    size = io.req.bits.size(),
    len = io.req.bits.len(),
    burst = io.req.bits.burst())
  io.aw.valid := io.req.valid && is_aw
  io.aw.bits := NastiWriteAddressChannel(
    id = io.req.bits.id(),
    addr = io.req.bits.addr(),
    size = io.req.bits.size(),
    len = io.req.bits.len(),
    burst = io.req.bits.burst())
  io.w.valid := io.req.valid && is_w
  io.w.bits := NastiWriteDataChannel(
    id = io.req.bits.id(),
    data = io.req.bits.data(),
    strb = Some(io.req.bits.strb()),
    last = io.req.bits.last())
  io.req.ready := (io.ar.ready && is_ar) ||
                  (io.aw.ready && is_aw) ||
                  (io.w.ready  && is_w)
 }
 class AtosResponseEncoder(implicit p: Parameters) extends AtosModule()(p) {
  val io = new Bundle {
    val b = Decoupled(new NastiWriteResponseChannel).flip
    val r = Decoupled(new NastiReadDataChannel).flip
    val resp = Decoupled(new AtosResponse)
  }
  val locked = Reg(init = Bool(false))
  io.resp.valid := (io.b.valid && !locked) || io.r.valid
  io.resp.bits := Mux(io.r.valid,
    AtosResponse(io.r.bits), AtosResponse(io.b.bits))
  io.b.ready := !locked && !io.r.valid && io.resp.ready
  io.r.ready := io.resp.ready
  when (io.r.fire() && !io.r.bits.last) { locked := Bool(true) }
  when (io.r.fire() && io.r.bits.last) { locked := Bool(false) }
 }
 class AtosManagerConverter(implicit p: Parameters) extends AtosModule()(p) {
  val io = new Bundle {
    val atos = (new AtosIO).flip
    val nasti = new NastiIO
  }
  val req_dec = Module(new AtosRequestDecoder)
  val resp_enc = Module(new AtosResponseEncoder)
  req_dec.io.req <> io.atos.req
  io.atos.resp <> resp_enc.io.resp
  io.nasti.ar <> req_dec.io.ar
  io.nasti.aw <> req_dec.io.aw
  io.nasti.w  <> req_dec.io.w
  resp_enc.io.b <> io.nasti.b
  resp_enc.io.r <> io.nasti.r
 }
 class AtosSerializedIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) {
  val req = Decoupled(Bits(width = w))
  val resp = Decoupled(Bits(width = w)).flip
  val clk = Bool(OUTPUT)
  val clk_edge = Bool(OUTPUT)
  override def cloneType = new AtosSerializedIO(w)(p).asInstanceOf[this.type]
 }
 class AtosSerdes(w: Int)(implicit p: Parameters) extends AtosModule()(p) {
  val io = new Bundle {
    val wide = (new AtosIO).flip
    val narrow = new AtosSerializedIO(w)
  }
  val ser = Module(new Serializer(w, new AtosRequest))
  ser.io.in <> io.wide.req
  io.narrow.req <> ser.io.out
  val des = Module(new Deserializer(w, new AtosResponse))
  des.io.in <> io.narrow.resp
  io.wide.resp <> des.io.out
 }
 class AtosDesser(w: Int)(implicit p: Parameters) extends AtosModule()(p) {
  val io = new Bundle {
    val narrow = new AtosSerializedIO(w).flip
    val wide = new AtosIO
  }
  val des = Module(new Deserializer(w, new AtosRequest))
  des.io.in <> io.narrow.req
  io.wide.req <> des.io.out
  val ser = Module(new Serializer(w, new AtosResponse))
  ser.io.in <> io.wide.resp
  io.narrow.resp <> ser.io.out
 }
--- a/junctions/src/main/scala/crossing.scala
+++ b/junctions/src/main/scala/crossing.scala
@@ -0,0 +1,150 @@
 package junctions
 import Chisel._
 class Crossing[T <: Data](gen: T, enq_sync: Boolean, deq_sync: Boolean) extends Bundle {
    val enq = Decoupled(gen).flip()
    val deq = Decoupled(gen)
    val enq_clock = if (enq_sync) Some(Clock(INPUT)) else None
    val deq_clock = if (deq_sync) Some(Clock(INPUT)) else None
    val enq_reset = if (enq_sync) Some(Bool(INPUT))  else None
    val deq_reset = if (deq_sync) Some(Bool(INPUT))  else None
 }
 // Output is 1 for one cycle after any edge of 'in'
 object AsyncHandshakePulse {
  def apply(in: Bool, sync: Int): Bool = {
    val syncv = RegInit(Vec.fill(sync+1){Bool(false)})
    syncv.last := in
    (syncv.init zip syncv.tail).foreach { case (sink, source) => sink := source }
    syncv(0) =/= syncv(1)
  }
 }
 class AsyncHandshakeSource[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool)
    extends Module(_clock = clock, _reset = reset) {
  val io = new Bundle {
    // These come from the source clock domain
    val enq  = Decoupled(gen).flip()
    // These cross to the sink clock domain
    val bits = gen.cloneType.asOutput
    val push = Bool(OUTPUT)
    val pop  = Bool(INPUT)
  }
  val ready = RegInit(Bool(true))
  val bits = Reg(gen)
  val push = RegInit(Bool(false))
  io.enq.ready := ready
  io.bits := bits
  io.push := push
  val pop = AsyncHandshakePulse(io.pop, sync)
  assert (!pop || !ready)
  when (pop) {
    ready := Bool(true)
  }
  when (io.enq.fire()) {
    ready := Bool(false)
    bits := io.enq.bits
    push := !push
  }
 }
 class AsyncHandshakeSink[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool) 
    extends Module(_clock = clock, _reset = reset) {
  val io = new Bundle {
    // These cross to the source clock domain
    val bits = gen.cloneType.asInput
    val push = Bool(INPUT)
    val pop  = Bool(OUTPUT)
    // These go to the sink clock domain
    val deq = Decoupled(gen)
  }
  val valid = RegInit(Bool(false))
  val bits  = Reg(gen)
  val pop   = RegInit(Bool(false))
  io.deq.valid := valid
  io.deq.bits  := bits
  io.pop := pop
  val push = AsyncHandshakePulse(io.push, sync)
  assert (!push || !valid)
  when (push) {
    valid := Bool(true)
    bits  := io.bits
  }
  when (io.deq.fire()) {
    valid := Bool(false)
    pop := !pop
  }
 }
 class AsyncHandshake[T <: Data](gen: T, sync: Int = 2) extends Module {
  val io = new Crossing(gen, true, true)
  require (sync >= 2)
  val source = Module(new AsyncHandshakeSource(gen, sync, io.enq_clock.get, io.enq_reset.get))
  val sink   = Module(new AsyncHandshakeSink  (gen, sync, io.deq_clock.get, io.deq_reset.get))
  source.io.enq <> io.enq
  io.deq <> sink.io.deq
  sink.io.bits := source.io.bits
  sink.io.push := source.io.push
  source.io.pop := sink.io.pop
 }
 class AsyncDecoupledTo[T <: Data](gen: T, depth: Int = 0, sync: Int = 2) extends Module {
  val io = new Crossing(gen, false, true)
  // !!! if depth == 0 { use Handshake } else { use AsyncFIFO }
  val crossing = Module(new AsyncHandshake(gen, sync)).io
  crossing.enq_clock.get := clock
  crossing.enq_reset.get := reset
  crossing.enq <> io.enq
  crossing.deq_clock.get := io.deq_clock.get
  crossing.deq_reset.get := io.deq_reset.get
  io.deq <> crossing.deq
 }
 object AsyncDecoupledTo {
  // source is in our clock domain, output is in the 'to' clock domain
  def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: DecoupledIO[T], depth: Int = 0, sync: Int = 2): DecoupledIO[T] = {
    val to = Module(new AsyncDecoupledTo(source.bits, depth, sync))
    to.io.deq_clock.get := to_clock
    to.io.deq_reset.get := to_reset
    to.io.enq <> source
    to.io.deq
  }
 }
 class AsyncDecoupledFrom[T <: Data](gen: T, depth: Int = 0, sync: Int = 2) extends Module {
  val io = new Crossing(gen, true, false)
  // !!! if depth == 0 { use Handshake } else { use AsyncFIFO }
  val crossing = Module(new AsyncHandshake(gen, sync)).io
  crossing.enq_clock.get := io.enq_clock.get
  crossing.enq_reset.get := io.enq_reset.get
  crossing.enq <> io.enq
  crossing.deq_clock.get := clock
  crossing.deq_reset.get := reset
  io.deq <> crossing.deq
 }
 object AsyncDecoupledFrom {
  // source is in the 'from' clock domain, output is in our clock domain
  def apply[T <: Data](from_clock: Clock, from_reset: Bool, source: DecoupledIO[T], depth: Int = 0, sync: Int = 2): DecoupledIO[T] = {
    val from = Module(new AsyncDecoupledFrom(source.bits, depth, sync))
    from.io.enq_clock.get := from_clock
    from.io.enq_reset.get := from_reset
    from.io.enq <> source
    from.io.deq
  }
 }
--- a/junctions/src/main/scala/hasti.scala
+++ b/junctions/src/main/scala/hasti.scala
@@ -0,0 +1,549 @@
 package junctions
 import Chisel._
 import cde.{Parameters, Field}
 object HastiConstants
 {
  // Values for htrans
  val SZ_HTRANS     = 2
  val HTRANS_IDLE   = UInt(0, SZ_HTRANS) // No transfer requested, not in a burst
  val HTRANS_BUSY   = UInt(1, SZ_HTRANS) // No transfer requested, in a burst
  val HTRANS_NONSEQ = UInt(2, SZ_HTRANS) // First (potentially only) request in a burst
  val HTRANS_SEQ    = UInt(3, SZ_HTRANS) // Following requests in a burst
  // Values for hburst
  val SZ_HBURST     = 3
  val HBURST_SINGLE = UInt(0, SZ_HBURST) // Single access (no burst)
  val HBURST_INCR   = UInt(1, SZ_HBURST) // Incrementing burst of arbitrary length, not crossing 1KB
  val HBURST_WRAP4  = UInt(2, SZ_HBURST) // 4-beat wrapping burst
  val HBURST_INCR4  = UInt(3, SZ_HBURST) // 4-beat incrementing burst
  val HBURST_WRAP8  = UInt(4, SZ_HBURST) // 8-beat wrapping burst
  val HBURST_INCR8  = UInt(5, SZ_HBURST) // 8-beat incrementing burst
  val HBURST_WRAP16 = UInt(6, SZ_HBURST) // 16-beat wrapping burst
  val HBURST_INCR16 = UInt(7, SZ_HBURST) // 16-beat incrementing burst
  // Values for hresp
  val SZ_HRESP      = 1
  val HRESP_OKAY    = UInt(0, SZ_HRESP)
  val HRESP_ERROR   = UInt(1, SZ_HRESP)
  // Values for hsize are identical to TileLink MT_SZ
  // ie: 8*2^SZ_HSIZE bit transfers
  val SZ_HSIZE = 3
  // Values for hprot (a bitmask)
  val SZ_HPROT = 4
  def HPROT_DATA       = UInt("b0001") // Data access or Opcode fetch
  def HPROT_PRIVILEGED = UInt("b0010") // Privileged or User access
  def HPROT_BUFFERABLE = UInt("b0100") // Bufferable or non-bufferable
  def HPROT_CACHEABLE  = UInt("b1000") // Cacheable or non-cacheable
  def dgate(valid: Bool, b: UInt) = Fill(b.getWidth, valid) & b
 }
 import HastiConstants._
 case class HastiParameters(dataBits: Int, addrBits: Int)
 case object HastiId extends Field[String]
 case class HastiKey(id: String) extends Field[HastiParameters]
 trait HasHastiParameters {
  implicit val p: Parameters
  val hastiParams = p(HastiKey(p(HastiId)))
  val hastiAddrBits = hastiParams.addrBits
  val hastiDataBits = hastiParams.dataBits
  val hastiDataBytes = hastiDataBits/8
  val hastiAlignment = log2Ceil(hastiDataBytes)
 }
 abstract class HastiModule(implicit val p: Parameters) extends Module
  with HasHastiParameters
 abstract class HastiBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
  with HasHastiParameters
 class HastiMasterIO(implicit p: Parameters) extends HastiBundle()(p) {
  val htrans    = UInt(OUTPUT, SZ_HTRANS)
  val hmastlock = Bool(OUTPUT)
  val haddr     = UInt(OUTPUT, hastiAddrBits)
  val hwrite    = Bool(OUTPUT)
  val hburst    = UInt(OUTPUT, SZ_HBURST)
  val hsize     = UInt(OUTPUT, SZ_HSIZE)
  val hprot     = UInt(OUTPUT, SZ_HPROT)
  val hwdata = Bits(OUTPUT, hastiDataBits)
  val hrdata = Bits(INPUT,  hastiDataBits)
  val hready = Bool(INPUT)
  val hresp  = UInt(INPUT, SZ_HRESP)
  def isNSeq(dummy:Int=0) = htrans === HTRANS_NONSEQ // SEQ does not start a NEW request
  def isHold(dummy:Int=0) = htrans === HTRANS_BUSY || htrans === HTRANS_SEQ
  def isIdle(dummy:Int=0) = htrans === HTRANS_IDLE || htrans === HTRANS_BUSY
 }
 class HastiSlaveIO(implicit p: Parameters) extends HastiBundle()(p) {
  val htrans    = UInt(INPUT, SZ_HTRANS)
  val hmastlock = Bool(INPUT)
  val haddr     = UInt(INPUT, hastiAddrBits)
  val hwrite    = Bool(INPUT)
  val hburst    = UInt(INPUT, SZ_HBURST)
  val hsize     = UInt(INPUT, SZ_HSIZE)
  val hprot     = UInt(INPUT, SZ_HPROT)
  val hwdata = Bits(INPUT,  hastiDataBits)
  val hrdata = Bits(OUTPUT, hastiDataBits)
  val hsel   = Bool(INPUT)
  val hready = Bool(OUTPUT)
  val hresp  = UInt(OUTPUT, SZ_HRESP)
 }
 /* A diverted master is told hready when his address phase goes nowhere.
 * In this case, we buffer his address phase request and replay it later.
 * NOTE: this must optimize to nothing when divert is constantly false.
 */
 class MasterDiversion(implicit p: Parameters) extends HastiModule()(p) {
  val io = new Bundle {
    val in     = (new HastiMasterIO).flip
    val out    = (new HastiMasterIO)
    val divert = Bool(INPUT)
  }
  val full   = Reg(init = Bool(false))
  val buffer = Reg(new HastiMasterIO)
  when (io.out.hready) {
    full := Bool(false)
  }
  when (io.divert) {
    full := Bool(true)
    buffer := io.in
  }
  // If the master is diverted, he must also have been told hready
  assert (!io.divert || io.in.hready,
    "Diverted but not ready");
  // Replay the request we diverted
  io.out.htrans    := Mux(full, buffer.htrans,    io.in.htrans)
  io.out.hmastlock := Mux(full, buffer.hmastlock, io.in.hmastlock)
  io.out.haddr     := Mux(full, buffer.haddr,     io.in.haddr)
  io.out.hwrite    := Mux(full, buffer.hwrite,    io.in.hwrite)
  io.out.hburst    := Mux(full, buffer.hburst,    io.in.hburst)
  io.out.hsize     := Mux(full, buffer.hsize,     io.in.hsize)
  io.out.hprot     := Mux(full, buffer.hprot,     io.in.hprot)
  io.out.hwdata    := Mux(full, buffer.hwdata,    io.in.hwdata)
  // Pass slave response back
  io.in.hrdata := io.out.hrdata
  io.in.hresp  := io.out.hresp
  io.in.hready := io.out.hready && !full // Block master while we steal his address phase
 }
 /* Masters with lower index have priority over higher index masters.
 * However, a lower priority master will retain control of a slave when EITHER:
 *   1. a burst is in progress (switching slaves mid-burst violates AHB-lite at slave)
 *   2. a transfer was waited (the standard forbids changing requests in this case)
 *
 * If a master raises hmastlock, it will be waited until no other master has inflight
 * requests; then, it acquires exclusive control of the crossbar until hmastlock is low.
 *
 * To implement an AHB-lite crossbar, it is important to realize that requests and
 * responses are coupled. Unlike modern bus protocols where the response data has flow
 * control independent of the request data, in AHB-lite, both flow at the same time at
 * the sole discretion of the slave via the hready signal. The address and data are
 * delivered on two back-to-back cycles, the so-called address and data phases.
 *
 * Masters can only be connected to a single slave at a time. If a master had two different
 * slave connections on the address and data phases, there would be two independent hready
 * signals. An AHB-lite slave can assume that data flows when it asserts hready. If the data
 * slave deasserts hready while the address slave asserts hready, the master is put in the
 * impossible position of being in data phase on two slaves at once. For this reason, when
 * a master issues back-to-back accesses to distinct slaves, we inject a pipeline bubble
 * between the two requests to limit the master to just a single slave at a time.
 *
 * Conversely, a slave CAN have two masters attached to it. This is unproblematic, because
 * the only signal which governs data flow is hready. Thus, both masters can be stalled
 * safely by the single slave.
 */
 class HastiXbar(nMasters: Int, addressMap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p) {
  val io = new Bundle {
    val masters = Vec(nMasters,        new HastiMasterIO).flip
    val slaves  = Vec(addressMap.size, new HastiSlaveIO).flip
  }
  val nSlaves = addressMap.size
  // Setup diversions infront of each master
  val diversions = Seq.tabulate(nMasters) { m => Module(new MasterDiversion) }
  (io.masters zip diversions) foreach { case (m, d) => d.io.in <> m }
  // Handy short-hand
  val masters = diversions map (_.io.out)
  val slaves  = io.slaves
  // Lock status of the crossbar
  val lockedM = Reg(init = Vec.fill(nMasters)(Bool(false)))
  val isLocked = lockedM.reduce(_ || _)
  // This matrix governs the master-slave connections in the address phase
  // It is indexed by addressPhaseGrantSM(slave)(master)
  // It is guaranteed to have at most one 'true' per column and per row
  val addressPhaseGrantSM = Wire(Vec(nSlaves, Vec(nMasters, Bool())))
  // This matrix governs the master-slave connections in the data phase
  // It is guaranteed to have at most one 'true' per column and per row
  val dataPhaseGrantSM    = Reg (init = Vec.fill(nSlaves)(Vec.fill(nMasters)(Bool(false))))
  // This matrix is the union of the address and data phases.
  // It is transposed with respect to the two previous matrices.
  // It is guaranteed to contain at most one 'true' per master row.
  // However, two 'true's per slave column are permitted.
  val unionGrantMS = Vec.tabulate(nMasters) { m => Vec.tabulate(nSlaves) { s => 
                       addressPhaseGrantSM(s)(m) || dataPhaseGrantSM(s)(m) } }
  // Confirm the guarantees made above
  def justOnce(v: Vec[Bool]) = v.fold(Bool(false)) { case (p, v) =>
    assert (!p || !v)
    p || v
  }
  addressPhaseGrantSM foreach { s => justOnce(s) }
  unionGrantMS        foreach { s => justOnce(s) }
  // Data phase follows address phase whenever the slave is ready
  (slaves zip (dataPhaseGrantSM zip addressPhaseGrantSM)) foreach { case (s, (d, a)) =>
    when (s.hready) { d := a }
  }
  // Record the grant state from the previous cycle; needed in case we hold access
  val priorAddressPhaseGrantSM = RegNext(addressPhaseGrantSM)
  // If a master says BUSY or SEQ, it is in the middle of a burst.
  // In this case, it MUST stay attached to the same slave as before.
  // Otherwise, it would violate the AHB-lite specification as seen by
  // the slave, which is guaranteed a complete burst of the promised length.
  // One case where this matters is preventing preemption of low-prio masters.
  // NOTE: this exposes a slave to bad addresses when a master is buggy
  val holdBurstM = Vec(masters map { _.isHold() })
  // Transform the burst hold requirement from master indexing to slave indexing
  // We use the previous cycle's binding because the master continues the prior burst
  val holdBurstS = Vec(priorAddressPhaseGrantSM map { m => Mux1H(m, holdBurstM) })
  // If a slave says !hready to a request, it must retain the same master next cycle.
  // The AHB-lite specification requires that a waited transfer remain unchanged.
  // If we preempted a waited master, the new master's request could potentially differ.
  val holdBusyS = RegNext(Vec(slaves map { s => !s.hready && s.hsel }))
  // Combine the above two grounds to determine if the slave retains its prior master
  val holdS = Vec((holdBurstS zip holdBusyS) map ({ case (a,b) => a||b }))
  // Determine which master addresses match which slaves
  val matchMS = Vec(masters map { m => Vec(addressMap map { afn => afn(m.haddr) }) })
  // Detect requests to nowhere; we need to allow progress in this case
  val nowhereM = Vec(matchMS map { s => !s.reduce(_ || _) })
  // Detect if we need to inject a pipeline bubble between the master requests.
  // Divert masters already granted a data phase different from next request.
  // NOTE: if only one slave, matchMS is always true => bubble always false
  //       => the diversion registers are optimized away as they are unread
  // NOTE: bubble => dataPhase => have an hready signal
  val bubbleM =
    Vec.tabulate(nMasters) { m =>
      Vec.tabulate(nSlaves) { s => dataPhaseGrantSM(s)(m) && !matchMS(m)(s) }
      .reduce(_ || _) }
  // Block any request that requires bus ownership or conflicts with isLocked
  val blockedM = 
    Vec((lockedM zip masters) map { case(l, m) => !l && (isLocked || m.hmastlock) })
  // Requested access to slaves from masters (pre-arbitration)
  // NOTE: isNSeq does NOT include SEQ; thus, masters who are midburst do not
  // request access to a new slave. They stay tied to the old and do not get two.
  // NOTE: if a master was waited, it must repeat the same request as last cycle;
  // thus, it will request the same slave and not end up with two (unless buggy).
  val NSeq = masters.map(_.isNSeq())
  val requestSM = Vec.tabulate(nSlaves) { s => Vec.tabulate(nMasters) { m => 
    matchMS(m)(s) && NSeq(m) && !bubbleM(m) && !blockedM(m) } }
  // Select at most one master request per slave (lowest index = highest priority)
  val selectedRequestSM = Vec(requestSM map { m => Vec(PriorityEncoderOH(m)) })
  // Calculate new crossbar interconnect state
  addressPhaseGrantSM := Vec((holdS zip (priorAddressPhaseGrantSM zip selectedRequestSM))
                             map { case (h, (p, r)) => Mux(h, p, r) })
  for (m <- 0 until nMasters) {
    // If the master is connected to a slave, the slave determines hready.
    // However, if no slave is connected, for progress report ready anyway, if:
    //   bad address (swallow request) OR idle (permit stupid masters to move FSM)
    val autoready = nowhereM(m) || masters(m).isIdle()
    val hready = Mux1H(unionGrantMS(m), slaves.map(_.hready ^ autoready)) ^ autoready
    masters(m).hready := hready
    // If we diverted a master, we need to absorb his address phase to replay later
    diversions(m).io.divert := (bubbleM(m) || blockedM(m)) && NSeq(m) && hready
  }
  // Master muxes (address and data phase are the same)
  (masters zip unionGrantMS) foreach { case (m, g) => {
    m.hrdata := Mux1H(g, slaves.map(_.hrdata))
    m.hresp  := Mux1H(g, slaves.map(_.hresp))
  } }
  // Slave address phase muxes
  (slaves zip addressPhaseGrantSM) foreach { case (s, g) => {
    s.htrans    := Mux1H(g, masters.map(_.htrans))
    s.haddr     := Mux1H(g, masters.map(_.haddr))
    s.hmastlock := isLocked
    s.hwrite    := Mux1H(g, masters.map(_.hwrite))
    s.hsize     := Mux1H(g, masters.map(_.hsize))
    s.hburst    := Mux1H(g, masters.map(_.hburst))
    s.hprot     := Mux1H(g, masters.map(_.hprot))
    s.hsel      := g.reduce(_ || _)
  } }
  // Slave data phase muxes
  (slaves zip dataPhaseGrantSM) foreach { case (s, g) => {
    s.hwdata := Mux1H(g, masters.map(_.hwdata))
  } }
  // When no master-slave connections are active, a master can take-over the bus
  val canLock = !addressPhaseGrantSM.map({ v => v.reduce(_ || _) }).reduce(_ || _)
  // Lowest index highest priority for lock arbitration
  val reqLock = masters.map(_.hmastlock)
  val winLock = PriorityEncoderOH(reqLock)
  // Lock arbitration
  when (isLocked) {
    lockedM := (lockedM zip reqLock) map { case (a,b) => a && b }
  } .elsewhen (canLock) {
    lockedM := winLock
  }
 }
 class HastiBus(amap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p) {
  val io = new Bundle {
    val master = new HastiMasterIO().flip
    val slaves = Vec(amap.size, new HastiSlaveIO).flip
  }
  val bar = Module(new HastiXbar(1, amap))
  bar.io.masters(0) <> io.master
  bar.io.slaves <> io.slaves
 }
 class HastiSlaveMux(n: Int)(implicit p: Parameters) extends HastiModule()(p) {
  val io = new Bundle {
    val ins = Vec(n, new HastiSlaveIO)
    val out = new HastiSlaveIO().flip
  }
  val amap = Seq({ (_:UInt) => Bool(true)})
  val bar = Module(new HastiXbar(n, amap))
  io.ins <> bar.io.masters
  io.out <> bar.io.slaves(0)
 }
 class HastiSlaveToMaster(implicit p: Parameters) extends HastiModule()(p) {
  val io = new Bundle {
    val in  = new HastiSlaveIO
    val out = new HastiMasterIO
  }
  io.out.htrans    := Mux(io.in.hsel, io.in.htrans, HTRANS_IDLE)
  io.out.hmastlock := io.in.hmastlock
  io.out.haddr     := io.in.haddr
  io.out.hwrite    := io.in.hwrite
  io.out.hburst    := io.in.hburst
  io.out.hsize     := io.in.hsize
  io.out.hprot     := io.in.hprot
  io.out.hwdata    := io.in.hwdata
  io.in.hrdata := io.out.hrdata
  io.in.hready := io.out.hready
  io.in.hresp  := io.out.hresp
 }
 class HastiMasterIONastiIOConverter(implicit p: Parameters) extends HastiModule()(p)
    with HasNastiParameters {
  val io = new Bundle {
    val nasti = new NastiIO().flip
    val hasti = new HastiMasterIO
  }
  require(hastiAddrBits == nastiXAddrBits)
  require(hastiDataBits == nastiXDataBits)
  val r_queue = Module(new Queue(new NastiReadDataChannel, 2, pipe = true))
  val s_idle :: s_read :: s_write :: s_write_resp :: Nil = Enum(Bits(), 4)
  val state = Reg(init = s_idle)
  val addr = Reg(UInt(width = hastiAddrBits))
  val id = Reg(UInt(width = nastiXIdBits))
  val size = Reg(UInt(width = nastiXSizeBits))
  val len = Reg(UInt(width = nastiXLenBits))
  val data = Reg(UInt(width = nastiXDataBits))
  val first = Reg(init = Bool(false))
  val is_rtrans = (state === s_read) &&
                  (io.hasti.htrans === HTRANS_SEQ ||
                   io.hasti.htrans === HTRANS_NONSEQ)
  val rvalid = RegEnable(is_rtrans, Bool(false), io.hasti.hready)
  io.nasti.aw.ready := (state === s_idle)
  io.nasti.ar.ready := (state === s_idle) && !io.nasti.aw.valid
  io.nasti.w.ready := (state === s_write) && io.hasti.hready
  io.nasti.b.valid := (state === s_write_resp)
  io.nasti.b.bits := NastiWriteResponseChannel(id = id)
  io.nasti.r <> r_queue.io.deq
  r_queue.io.enq.valid := io.hasti.hready && rvalid
  r_queue.io.enq.bits := NastiReadDataChannel(
    id = id,
    data = io.hasti.hrdata,
    last = (len === UInt(0)))
  assert(!r_queue.io.enq.valid || r_queue.io.enq.ready,
    "NASTI -> HASTI converter queue overflow")
  // How many read requests have we not delivered a response for yet?
  val pending_count = r_queue.io.count + rvalid
  io.hasti.haddr := addr
  io.hasti.hsize := size
  io.hasti.hwrite := (state === s_write)
  io.hasti.hburst := HBURST_INCR
  io.hasti.hprot := UInt(0)
  io.hasti.hwdata := data
  io.hasti.hmastlock := Bool(false)
  io.hasti.htrans := MuxLookup(state, HTRANS_IDLE, Seq(
    s_write -> Mux(io.nasti.w.valid,
      Mux(first, HTRANS_NONSEQ, HTRANS_SEQ),
      Mux(first, HTRANS_IDLE, HTRANS_BUSY)),
    s_read -> MuxCase(HTRANS_BUSY, Seq(
      first -> HTRANS_NONSEQ,
      (pending_count <= UInt(1)) -> HTRANS_SEQ))))
  when (io.nasti.aw.fire()) {
    first := Bool(true)
    addr := io.nasti.aw.bits.addr
    id := io.nasti.aw.bits.id
    size := io.nasti.aw.bits.size
    state := s_write
  }
  when (io.nasti.ar.fire()) {
    first := Bool(true)
    addr := io.nasti.ar.bits.addr
    id := io.nasti.ar.bits.id
    size := io.nasti.ar.bits.size
    len := io.nasti.ar.bits.len
    state := s_read
  }
  when (io.nasti.w.fire()) {
    first := Bool(false)
    addr := addr + (UInt(1) << size)
    data := io.nasti.w.bits.data
    when (io.nasti.w.bits.last) { state := s_write_resp }
  }
  when (io.nasti.b.fire()) { state := s_idle }
  when (is_rtrans && io.hasti.hready) {
    first := Bool(false)
    addr := addr + (UInt(1) << size)
    len := len - UInt(1)
    when (len === UInt(0)) { state := s_idle }
  }
 }
 class HastiTestSRAM(depth: Int)(implicit p: Parameters) extends HastiModule()(p) {
  val io = new HastiSlaveIO
  // This is a test SRAM with random delays
  val ready = LFSR16(Bool(true))(0) // Bool(true)
  // Calculate the bitmask of which bytes are being accessed
  val mask_decode = Vec.tabulate(hastiAlignment+1) (UInt(_) <= io.hsize)
  val mask_wide   = Vec.tabulate(hastiDataBytes) { i => mask_decode(log2Up(i+1)) }
  val mask_shift  = if (hastiAlignment == 0) UInt(1) else
                    mask_wide.toBits().asUInt() << io.haddr(hastiAlignment-1,0)
  // The request had better have been aligned! (AHB-lite requires this)
  if (hastiAlignment >= 1) {
    assert (io.htrans === HTRANS_IDLE || io.htrans === HTRANS_BUSY ||
      (io.haddr & mask_decode.toBits()(hastiAlignment,1).asUInt) === UInt(0),
      "HASTI request not aligned")
  }
  // The mask and address during the address phase
  val a_request   = io.hsel && (io.htrans === HTRANS_NONSEQ || io.htrans === HTRANS_SEQ)
  val a_mask      = Wire(UInt(width = hastiDataBytes))
  val a_address   = io.haddr(depth-1, hastiAlignment)
  val a_write     = io.hwrite
  // for backwards compatibility with chisel2, we needed a static width in definition
  a_mask := mask_shift(hastiDataBytes-1, 0)
  // The data phase signals
  val d_read  = RegEnable(a_request && !a_write, Bool(false), ready)
  val d_mask  = RegEnable(a_mask, ready && a_request)
  val d_wdata = Vec.tabulate(hastiDataBytes) { i => io.hwdata(8*(i+1)-1, 8*i) }
  // AHB writes must occur during the data phase; this poses a structural
  // hazard with reads which must occur during the address phase. To solve
  // this problem, we delay the writes until there is a free cycle.
  //
  // The idea is to record the address information from address phase and
  // then as soon as possible flush the pending write. This cannot be done
  // on a cycle when there is an address phase read, but on any other cycle
  // the write will execute. In the case of reads following a write, the
  // result must bypass data from the pending write into the read if they
  // happen to have matching address.
  // Remove this once HoldUnless is in chisel3
  def holdUnless[T <: Data](in : T, enable: Bool): T = Mux(!enable, RegEnable(in, enable), in)
  // Pending write?
  val p_valid     = RegInit(Bool(false))
  val p_address   = Reg(a_address)
  val p_mask      = Reg(a_mask)
  val p_latch_d   = RegNext(ready && a_request && a_write, Bool(false))
  val p_wdata     = holdUnless(d_wdata, p_latch_d)
  // Use single-ported memory with byte-write enable
  val mem = SeqMem(1 << (depth-hastiAlignment), Vec(hastiDataBytes, Bits(width = 8)))
  // Decide is the SRAM port is used for reading or (potentially) writing
  val read = ready && a_request && !a_write
  // In case we are stalled, we need to hold the read data
  val d_rdata = holdUnless(mem.read(a_address, read), RegNext(read))
  // Whenever the port is not needed for reading, execute pending writes
  when (!read) {
    when (p_valid) { mem.write(p_address, p_wdata, p_mask.toBools) }
    p_valid := Bool(false)
  }
  // Record the request for later?
  when (ready && a_request && a_write) {
    p_valid   := Bool(true)
    p_address := a_address
    p_mask    := a_mask
  }
  // Does the read need to be muxed with the previous write?
  val a_bypass = a_address === p_address && p_valid
  val d_bypass = RegEnable(a_bypass, ready && a_request)
  // Mux in data from the pending write
  val muxdata = Vec((p_mask.toBools zip (p_wdata zip d_rdata))
                    map { case (m, (p, r)) => Mux(d_bypass && m, p, r) })
  // Wipe out any data the master should not see (for testing)
  val outdata = Vec((d_mask.toBools zip muxdata)
                    map { case (m, p) => Mux(d_read && ready && m, p, Bits(0)) })
  // Finally, the outputs
  io.hrdata := outdata.toBits()
  io.hready := ready
  io.hresp  := HRESP_OKAY
 }
--- a/junctions/src/main/scala/memserdes.scala
+++ b/junctions/src/main/scala/memserdes.scala
@@ -0,0 +1,317 @@
 // See LICENSE for license details.
 package junctions
 import Chisel._
 import scala.math._
 import cde.{Parameters, Field}
 case object MIFAddrBits extends Field[Int]
 case object MIFDataBits extends Field[Int]
 case object MIFTagBits extends Field[Int]
 case object MIFDataBeats extends Field[Int]
 trait HasMIFParameters {
  implicit val p: Parameters
  val mifTagBits = p(MIFTagBits)
  val mifAddrBits = p(MIFAddrBits)
  val mifDataBits = p(MIFDataBits)
  val mifDataBeats = p(MIFDataBeats)
 }
 abstract class MIFModule(implicit val p: Parameters) extends Module with HasMIFParameters
 abstract class MIFBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
  with HasMIFParameters
 trait HasMemData extends HasMIFParameters {
  val data = Bits(width = mifDataBits)
 }
 trait HasMemAddr extends HasMIFParameters {
  val addr = UInt(width = mifAddrBits)
 }
 trait HasMemTag extends HasMIFParameters {
  val tag = UInt(width = mifTagBits)
 }
 class MemReqCmd(implicit p: Parameters) extends MIFBundle()(p) with HasMemAddr with HasMemTag {
  val rw = Bool()
 }
 class MemTag(implicit p: Parameters) extends MIFBundle()(p) with HasMemTag
 class MemData(implicit p: Parameters) extends MIFBundle()(p) with HasMemData
 class MemResp(implicit p: Parameters) extends MIFBundle()(p) with HasMemData with HasMemTag
 class MemIO(implicit p: Parameters) extends ParameterizedBundle()(p) {
  val req_cmd  = Decoupled(new MemReqCmd)
  val req_data = Decoupled(new MemData)
  val resp     = Decoupled(new MemResp).flip
 }
 class MemPipeIO(implicit p: Parameters) extends ParameterizedBundle()(p) {
  val req_cmd  = Decoupled(new MemReqCmd)
  val req_data = Decoupled(new MemData)
  val resp     = Valid(new MemResp).flip
 }
 class MemSerializedIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) {
  val req = Decoupled(Bits(width = w))
  val resp = Valid(Bits(width = w)).flip
  override def cloneType = new MemSerializedIO(w)(p).asInstanceOf[this.type]
 }
 class MemSerdes(w: Int)(implicit p: Parameters) extends MIFModule
 {
  val io = new Bundle {
    val wide = new MemIO().flip
    val narrow = new MemSerializedIO(w)
  }
  val abits = io.wide.req_cmd.bits.toBits.getWidth
  val dbits = io.wide.req_data.bits.toBits.getWidth
  val rbits = io.wide.resp.bits.getWidth
  val out_buf = Reg(Bits())
  val in_buf = Reg(Bits())
  val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(UInt(), 5)
  val state = Reg(init=s_idle)
  val send_cnt = Reg(init=UInt(0, log2Up((max(abits, dbits)+w-1)/w)))
  val data_send_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
  val adone = io.narrow.req.ready && send_cnt === UInt((abits-1)/w)
  val ddone = io.narrow.req.ready && send_cnt === UInt((dbits-1)/w)
  when (io.narrow.req.valid && io.narrow.req.ready) {
    send_cnt := send_cnt + UInt(1)
    out_buf := out_buf >> UInt(w)
  }
  when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) {
    out_buf := io.wide.req_cmd.bits.toBits
  }
  when (io.wide.req_data.valid && io.wide.req_data.ready) {
    out_buf := io.wide.req_data.bits.toBits
  }
  io.wide.req_cmd.ready := state === s_idle
  io.wide.req_data.ready := state === s_write_idle
  io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data
  io.narrow.req.bits := out_buf
  when (state === s_idle && io.wide.req_cmd.valid) {
    state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr)
  }
  when (state === s_read_addr && adone) {
    state := s_idle
    send_cnt := UInt(0)
  }
  when (state === s_write_addr && adone) {
    state := s_write_idle
    send_cnt := UInt(0)
  }
  when (state === s_write_idle && io.wide.req_data.valid) {
    state := s_write_data
  }
  when (state === s_write_data && ddone) {
    data_send_cnt := data_send_cnt + UInt(1)
    state := Mux(data_send_cnt === UInt(mifDataBeats-1), s_idle, s_write_idle)
    send_cnt := UInt(0)
  }
  val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w)))
  val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
  val resp_val = Reg(init=Bool(false))
  resp_val := Bool(false)
  when (io.narrow.resp.valid) {
    recv_cnt := recv_cnt + UInt(1)
    when (recv_cnt === UInt((rbits-1)/w)) {
      recv_cnt := UInt(0)
      data_recv_cnt := data_recv_cnt + UInt(1)
      resp_val := Bool(true)
    }
    in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+w-1)/w*w-1,w))
  }
  io.wide.resp.valid := resp_val
  io.wide.resp.bits := io.wide.resp.bits.fromBits(in_buf)
 }
 class MemDesserIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) {
  val narrow = new MemSerializedIO(w).flip
  val wide = new MemIO
 }
 class MemDesser(w: Int)(implicit p: Parameters) extends Module // test rig side
 {
  val io = new MemDesserIO(w)
  val abits = io.wide.req_cmd.bits.toBits.getWidth
  val dbits = io.wide.req_data.bits.toBits.getWidth
  val rbits = io.wide.resp.bits.getWidth
  val mifDataBeats = p(MIFDataBeats)
  require(dbits >= abits && rbits >= dbits)
  val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w)))
  val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
  val adone = io.narrow.req.valid && recv_cnt === UInt((abits-1)/w)
  val ddone = io.narrow.req.valid && recv_cnt === UInt((dbits-1)/w)
  val rdone = io.narrow.resp.valid && recv_cnt === UInt((rbits-1)/w)
  val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(UInt(), 5)
  val state = Reg(init=s_cmd_recv)
  val in_buf = Reg(Bits())
  when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) {
    recv_cnt := recv_cnt + UInt(1)
    in_buf := Cat(io.narrow.req.bits, in_buf((rbits+w-1)/w*w-1,w))
  }
  io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv
  when (state === s_cmd_recv && adone) {
    state := s_cmd
    recv_cnt := UInt(0)
  }
  when (state === s_cmd && io.wide.req_cmd.ready) {
    state := Mux(io.wide.req_cmd.bits.rw, s_data_recv, s_reply)
  }
  when (state === s_data_recv && ddone) {
    state := s_data
    recv_cnt := UInt(0)
  }
  when (state === s_data && io.wide.req_data.ready) {
    state := s_data_recv
    when (data_recv_cnt === UInt(mifDataBeats-1)) {
      state := s_cmd_recv
    }
    data_recv_cnt := data_recv_cnt + UInt(1)
  }
  when (rdone) { // state === s_reply
    when (data_recv_cnt === UInt(mifDataBeats-1)) {
      state := s_cmd_recv
    }
    recv_cnt := UInt(0)
    data_recv_cnt := data_recv_cnt + UInt(1)
  }
  val req_cmd = in_buf >> UInt(((rbits+w-1)/w - (abits+w-1)/w)*w)
  io.wide.req_cmd.valid := state === s_cmd
  io.wide.req_cmd.bits := io.wide.req_cmd.bits.fromBits(req_cmd)
  io.wide.req_data.valid := state === s_data
  io.wide.req_data.bits.data := in_buf >> UInt(((rbits+w-1)/w - (dbits+w-1)/w)*w)
  val dataq = Module(new Queue(new MemResp, mifDataBeats))
  dataq.io.enq <> io.wide.resp
  dataq.io.deq.ready := recv_cnt === UInt((rbits-1)/w)
  io.narrow.resp.valid := dataq.io.deq.valid
  io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UInt(w))
 }
 class MemIOArbiter(val arbN: Int)(implicit p: Parameters) extends MIFModule {
  val io = new Bundle {
    val inner = Vec(arbN, new MemIO).flip
    val outer = new MemIO
  }
  if(arbN > 1) {
    val cmd_arb = Module(new RRArbiter(new MemReqCmd, arbN))
    val choice_q = Module(new Queue(cmd_arb.io.chosen, 4))
    val (data_cnt, data_done) = Counter(io.outer.req_data.fire(), mifDataBeats)
    io.inner.map(_.req_cmd).zipWithIndex.zip(cmd_arb.io.in).map{ case ((req, id), arb) => {
      arb.valid := req.valid
      arb.bits := req.bits
      arb.bits.tag := Cat(req.bits.tag, UInt(id))
      req.ready := arb.ready
    }}
    io.outer.req_cmd.bits := cmd_arb.io.out.bits
    io.outer.req_cmd.valid := cmd_arb.io.out.valid && choice_q.io.enq.ready
    cmd_arb.io.out.ready := io.outer.req_cmd.ready && choice_q.io.enq.ready
    choice_q.io.enq.bits := cmd_arb.io.chosen
    choice_q.io.enq.valid := cmd_arb.io.out.fire() && cmd_arb.io.out.bits.rw
    io.outer.req_data.bits := io.inner(choice_q.io.deq.bits).req_data.bits
    io.outer.req_data.valid := io.inner(choice_q.io.deq.bits).req_data.valid && choice_q.io.deq.valid
    io.inner.map(_.req_data.ready).zipWithIndex.foreach {
      case(r, i) => r := UInt(i) === choice_q.io.deq.bits && choice_q.io.deq.valid
    }
    choice_q.io.deq.ready := data_done
    io.outer.resp.ready := Bool(false)
    for (i <- 0 until arbN) {
      io.inner(i).resp.valid := Bool(false)
      when(io.outer.resp.bits.tag(log2Up(arbN)-1,0).toUInt === UInt(i)) {
        io.inner(i).resp.valid := io.outer.resp.valid
        io.outer.resp.ready := io.inner(i).resp.ready
      }
      io.inner(i).resp.bits := io.outer.resp.bits
      io.inner(i).resp.bits.tag := io.outer.resp.bits.tag >> UInt(log2Up(arbN))
    }
  } else { io.outer <> io.inner.head }
 }
 object MemIOMemPipeIOConverter {
  def apply(in: MemPipeIO)(implicit p: Parameters): MemIO = {
    val out = Wire(new MemIO())
    in.resp.valid := out.resp.valid
    in.resp.bits := out.resp.bits
    out.resp.ready := Bool(true)
    out.req_cmd.valid := in.req_cmd.valid
    out.req_cmd.bits := in.req_cmd.bits
    in.req_cmd.ready := out.req_cmd.ready
    out.req_data.valid := in.req_data.valid
    out.req_data.bits := in.req_data.bits
    in.req_data.ready := out.req_data.ready
    out
  }
 }
 class MemPipeIOMemIOConverter(numRequests: Int)(implicit p: Parameters) extends MIFModule {
  val io = new Bundle {
    val cpu = new MemIO().flip
    val mem = new MemPipeIO
  }
  val numEntries = numRequests * mifDataBeats
  val size = log2Down(numEntries) + 1
  val inc = Wire(Bool())
  val dec = Wire(Bool())
  val count = Reg(init=UInt(numEntries, size))
  val watermark = count >= UInt(mifDataBeats)
  when (inc && !dec) {
    count := count + UInt(1)
  }
  when (!inc && dec) {
    count := count - UInt(mifDataBeats)
  }
  when (inc && dec) {
    count := count - UInt(mifDataBeats-1)
  }
  val cmdq_mask = io.cpu.req_cmd.bits.rw || watermark
  io.mem.req_cmd.valid := io.cpu.req_cmd.valid && cmdq_mask
  io.cpu.req_cmd.ready := io.mem.req_cmd.ready && cmdq_mask
  io.mem.req_cmd.bits := io.cpu.req_cmd.bits
  io.mem.req_data <> io.cpu.req_data
  // Have separate queues to allow for different mem implementations
  val resp_data_q = Module((new HellaQueue(numEntries)) { new MemData })
  resp_data_q.io.enq.valid := io.mem.resp.valid
  resp_data_q.io.enq.bits.data := io.mem.resp.bits.data
  val resp_tag_q = Module((new HellaQueue(numEntries)) { new MemTag })
  resp_tag_q.io.enq.valid := io.mem.resp.valid
  resp_tag_q.io.enq.bits.tag := io.mem.resp.bits.tag
  io.cpu.resp.valid := resp_data_q.io.deq.valid && resp_tag_q.io.deq.valid
  io.cpu.resp.bits.data := resp_data_q.io.deq.bits.data
  io.cpu.resp.bits.tag := resp_tag_q.io.deq.bits.tag
  resp_data_q.io.deq.ready := io.cpu.resp.ready
  resp_tag_q.io.deq.ready := io.cpu.resp.ready
  inc := resp_data_q.io.deq.fire() && resp_tag_q.io.deq.fire()
  dec := io.mem.req_cmd.fire() && !io.mem.req_cmd.bits.rw
 }
--- a/junctions/src/main/scala/nasti.scala
+++ b/junctions/src/main/scala/nasti.scala
@@ -0,0 +1,737 @@
 /// See LICENSE for license details.
 package junctions
 import Chisel._
 import scala.math.max
 import scala.collection.mutable.ArraySeq
 import cde.{Parameters, Field}
 case object NastiKey extends Field[NastiParameters]
 case class NastiParameters(dataBits: Int, addrBits: Int, idBits: Int)
 trait HasNastiParameters {
  implicit val p: Parameters
  val nastiExternal = p(NastiKey)
  val nastiXDataBits = nastiExternal.dataBits
  val nastiWStrobeBits = nastiXDataBits / 8
  val nastiXAddrBits = nastiExternal.addrBits
  val nastiWIdBits = nastiExternal.idBits
  val nastiRIdBits = nastiExternal.idBits
  val nastiXIdBits = max(nastiWIdBits, nastiRIdBits)
  val nastiXUserBits = 1
  val nastiAWUserBits = nastiXUserBits
  val nastiWUserBits = nastiXUserBits
  val nastiBUserBits = nastiXUserBits
  val nastiARUserBits = nastiXUserBits
  val nastiRUserBits = nastiXUserBits
  val nastiXLenBits = 8
  val nastiXSizeBits = 3
  val nastiXBurstBits = 2
  val nastiXCacheBits = 4
  val nastiXProtBits = 3
  val nastiXQosBits = 4
  val nastiXRegionBits = 4
  val nastiXRespBits = 2
  def bytesToXSize(bytes: UInt) = MuxLookup(bytes, UInt("b111"), Array(
    UInt(1) -> UInt(0),
    UInt(2) -> UInt(1),
    UInt(4) -> UInt(2),
    UInt(8) -> UInt(3),
    UInt(16) -> UInt(4),
    UInt(32) -> UInt(5),
    UInt(64) -> UInt(6),
    UInt(128) -> UInt(7)))
 }
 abstract class NastiModule(implicit val p: Parameters) extends Module
  with HasNastiParameters
 abstract class NastiBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
  with HasNastiParameters
 abstract class NastiChannel(implicit p: Parameters) extends NastiBundle()(p)
 abstract class NastiMasterToSlaveChannel(implicit p: Parameters) extends NastiChannel()(p)
 abstract class NastiSlaveToMasterChannel(implicit p: Parameters) extends NastiChannel()(p)
 trait HasNastiMetadata extends HasNastiParameters {
  val addr   = UInt(width = nastiXAddrBits)
  val len    = UInt(width = nastiXLenBits)
  val size   = UInt(width = nastiXSizeBits)
  val burst  = UInt(width = nastiXBurstBits)
  val lock   = Bool()
  val cache  = UInt(width = nastiXCacheBits)
  val prot   = UInt(width = nastiXProtBits)
  val qos    = UInt(width = nastiXQosBits)
  val region = UInt(width = nastiXRegionBits)
 }
 trait HasNastiData extends HasNastiParameters {
  val data = UInt(width = nastiXDataBits)
  val last = Bool()
 }
 class NastiReadIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
  val ar = Decoupled(new NastiReadAddressChannel)
  val r  = Decoupled(new NastiReadDataChannel).flip
 }
 class NastiWriteIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
  val aw = Decoupled(new NastiWriteAddressChannel)
  val w  = Decoupled(new NastiWriteDataChannel)
  val b  = Decoupled(new NastiWriteResponseChannel).flip
 }
 class NastiIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
  val aw = Decoupled(new NastiWriteAddressChannel)
  val w  = Decoupled(new NastiWriteDataChannel)
  val b  = Decoupled(new NastiWriteResponseChannel).flip
  val ar = Decoupled(new NastiReadAddressChannel)
  val r  = Decoupled(new NastiReadDataChannel).flip
 }
 class NastiAddressChannel(implicit p: Parameters) extends NastiMasterToSlaveChannel()(p)
    with HasNastiMetadata
 class NastiResponseChannel(implicit p: Parameters) extends NastiSlaveToMasterChannel()(p) {
  val resp = UInt(width = nastiXRespBits)
 }
 class NastiWriteAddressChannel(implicit p: Parameters) extends NastiAddressChannel()(p) {
  val id   = UInt(width = nastiWIdBits)
  val user = UInt(width = nastiAWUserBits)
 }
 class NastiWriteDataChannel(implicit p: Parameters) extends NastiMasterToSlaveChannel()(p)
    with HasNastiData {
  val id   = UInt(width = nastiWIdBits)
  val strb = UInt(width = nastiWStrobeBits)
  val user = UInt(width = nastiWUserBits)
 }
 class NastiWriteResponseChannel(implicit p: Parameters) extends NastiResponseChannel()(p) {
  val id   = UInt(width = nastiWIdBits)
  val user = UInt(width = nastiBUserBits)
 }
 class NastiReadAddressChannel(implicit p: Parameters) extends NastiAddressChannel()(p) {
  val id   = UInt(width = nastiRIdBits)
  val user = UInt(width = nastiARUserBits)
 }
 class NastiReadDataChannel(implicit p: Parameters) extends NastiResponseChannel()(p)
    with HasNastiData {
  val id   = UInt(width = nastiRIdBits)
  val user = UInt(width = nastiRUserBits)
 }
 object NastiConstants {
  val BURST_FIXED = UInt("b00")
  val BURST_INCR  = UInt("b01")
  val BURST_WRAP  = UInt("b10")
  val RESP_OKAY = UInt("b00")
  val RESP_EXOKAY = UInt("b01")
  val RESP_SLVERR = UInt("b10")
  val RESP_DECERR = UInt("b11")
 }
 import NastiConstants._
 object NastiWriteAddressChannel {
  def apply(id: UInt, addr: UInt, size: UInt,
      len: UInt = UInt(0), burst: UInt = BURST_INCR)
      (implicit p: Parameters) = {
    val aw = Wire(new NastiWriteAddressChannel)
    aw.id := id
    aw.addr := addr
    aw.len := len
    aw.size := size
    aw.burst := burst
    aw.lock := Bool(false)
    aw.cache := UInt("b0000")
    aw.prot := UInt("b000")
    aw.qos := UInt("b0000")
    aw.region := UInt("b0000")
    aw.user := UInt(0)
    aw
  }
 }
 object NastiReadAddressChannel {
  def apply(id: UInt, addr: UInt, size: UInt,
      len: UInt = UInt(0), burst: UInt = BURST_INCR)
      (implicit p: Parameters) = {
    val ar = Wire(new NastiReadAddressChannel)
    ar.id := id
    ar.addr := addr
    ar.len := len
    ar.size := size
    ar.burst := burst
    ar.lock := Bool(false)
    ar.cache := UInt(0)
    ar.prot := UInt(0)
    ar.qos := UInt(0)
    ar.region := UInt(0)
    ar.user := UInt(0)
    ar
  }
 }
 object NastiWriteDataChannel {
  def apply(data: UInt, strb: Option[UInt] = None,
            last: Bool = Bool(true), id: UInt = UInt(0))
           (implicit p: Parameters): NastiWriteDataChannel = {
    val w = Wire(new NastiWriteDataChannel)
    w.strb := strb.getOrElse(Fill(w.nastiWStrobeBits, UInt(1, 1)))
    w.data := data
    w.last := last
    w.id   := id
    w.user := UInt(0)
    w
  }
 }
 object NastiReadDataChannel {
  def apply(id: UInt, data: UInt, last: Bool = Bool(true), resp: UInt = UInt(0))(
      implicit p: Parameters) = {
    val r = Wire(new NastiReadDataChannel)
    r.id := id
    r.data := data
    r.last := last
    r.resp := resp
    r.user := UInt(0)
    r
  }
 }
 object NastiWriteResponseChannel {
  def apply(id: UInt, resp: UInt = UInt(0))(implicit p: Parameters) = {
    val b = Wire(new NastiWriteResponseChannel)
    b.id := id
    b.resp := resp
    b.user := UInt(0)
    b
  }
 }
 class MemIONastiIOConverter(cacheBlockOffsetBits: Int)(implicit p: Parameters) extends MIFModule
    with HasNastiParameters {
  val io = new Bundle {
    val nasti = (new NastiIO).flip
    val mem = new MemIO
  }
  require(mifDataBits == nastiXDataBits, "Data sizes between LLC and MC don't agree")
  val (mif_cnt_out, mif_wrap_out) = Counter(io.mem.resp.fire(), mifDataBeats)
  assert(!io.nasti.aw.valid || io.nasti.aw.bits.size === UInt(log2Up(mifDataBits/8)),
    "Nasti data size does not match MemIO data size")
  assert(!io.nasti.ar.valid || io.nasti.ar.bits.size === UInt(log2Up(mifDataBits/8)),
    "Nasti data size does not match MemIO data size")
  assert(!io.nasti.aw.valid || io.nasti.aw.bits.len === UInt(mifDataBeats - 1),
    "Nasti length does not match number of MemIO beats")
  assert(!io.nasti.ar.valid || io.nasti.ar.bits.len === UInt(mifDataBeats - 1),
    "Nasti length does not match number of MemIO beats")
  // according to the spec, we can't send b until the last transfer on w
  val b_ok = Reg(init = Bool(true))
  when (io.nasti.aw.fire()) { b_ok := Bool(false) }
  when (io.nasti.w.fire() && io.nasti.w.bits.last) { b_ok := Bool(true) }
  val id_q = Module(new Queue(UInt(width = nastiWIdBits), 2))
  id_q.io.enq.valid := io.nasti.aw.valid && io.mem.req_cmd.ready
  id_q.io.enq.bits := io.nasti.aw.bits.id
  id_q.io.deq.ready := io.nasti.b.ready && b_ok
  io.mem.req_cmd.bits.addr := Mux(io.nasti.aw.valid, io.nasti.aw.bits.addr, io.nasti.ar.bits.addr) >>
                                UInt(cacheBlockOffsetBits)
  io.mem.req_cmd.bits.tag := Mux(io.nasti.aw.valid, io.nasti.aw.bits.id, io.nasti.ar.bits.id)
  io.mem.req_cmd.bits.rw := io.nasti.aw.valid
  io.mem.req_cmd.valid := (io.nasti.aw.valid && id_q.io.enq.ready) || io.nasti.ar.valid
  io.nasti.ar.ready := io.mem.req_cmd.ready && !io.nasti.aw.valid
  io.nasti.aw.ready := io.mem.req_cmd.ready && id_q.io.enq.ready
  io.nasti.b.valid := id_q.io.deq.valid && b_ok
  io.nasti.b.bits.id := id_q.io.deq.bits
  io.nasti.b.bits.resp := UInt(0)
  io.nasti.w.ready := io.mem.req_data.ready
  io.mem.req_data.valid := io.nasti.w.valid
  io.mem.req_data.bits.data := io.nasti.w.bits.data
  assert(!io.nasti.w.valid || io.nasti.w.bits.strb.andR, "MemIO must write full cache line")
  io.nasti.r.valid := io.mem.resp.valid
  io.nasti.r.bits.data := io.mem.resp.bits.data
  io.nasti.r.bits.last := mif_wrap_out
  io.nasti.r.bits.id := io.mem.resp.bits.tag
  io.nasti.r.bits.resp := UInt(0)
  io.mem.resp.ready := io.nasti.r.ready
 }
 class NastiArbiterIO(arbN: Int)(implicit p: Parameters) extends Bundle {
  val master = Vec(arbN, new NastiIO).flip
  val slave = new NastiIO
  override def cloneType =
    new NastiArbiterIO(arbN).asInstanceOf[this.type]
 }
 /** Arbitrate among arbN masters requesting to a single slave */
 class NastiArbiter(val arbN: Int)(implicit p: Parameters) extends NastiModule {
  val io = new NastiArbiterIO(arbN)
  if (arbN > 1) {
    val arbIdBits = log2Up(arbN)
    val ar_arb = Module(new RRArbiter(new NastiReadAddressChannel, arbN))
    val aw_arb = Module(new RRArbiter(new NastiWriteAddressChannel, arbN))
    val slave_r_arb_id = io.slave.r.bits.id(arbIdBits - 1, 0)
    val slave_b_arb_id = io.slave.b.bits.id(arbIdBits - 1, 0)
    val w_chosen = Reg(UInt(width = arbIdBits))
    val w_done = Reg(init = Bool(true))
    when (aw_arb.io.out.fire()) {
      w_chosen := aw_arb.io.chosen
      w_done := Bool(false)
    }
    when (io.slave.w.fire() && io.slave.w.bits.last) {
      w_done := Bool(true)
    }
    for (i <- 0 until arbN) {
      val m_ar = io.master(i).ar
      val m_aw = io.master(i).aw
      val m_r = io.master(i).r
      val m_b = io.master(i).b
      val a_ar = ar_arb.io.in(i)
      val a_aw = aw_arb.io.in(i)
      val m_w = io.master(i).w
      a_ar <> m_ar
      a_ar.bits.id := Cat(m_ar.bits.id, UInt(i, arbIdBits))
      a_aw <> m_aw
      a_aw.bits.id := Cat(m_aw.bits.id, UInt(i, arbIdBits))
      m_r.valid := io.slave.r.valid && slave_r_arb_id === UInt(i)
      m_r.bits := io.slave.r.bits
      m_r.bits.id := io.slave.r.bits.id >> UInt(arbIdBits)
      m_b.valid := io.slave.b.valid && slave_b_arb_id === UInt(i)
      m_b.bits := io.slave.b.bits
      m_b.bits.id := io.slave.b.bits.id >> UInt(arbIdBits)
      m_w.ready := io.slave.w.ready && w_chosen === UInt(i) && !w_done
    }
    io.slave.r.ready := io.master(slave_r_arb_id).r.ready
    io.slave.b.ready := io.master(slave_b_arb_id).b.ready
    io.slave.w.bits := io.master(w_chosen).w.bits
    io.slave.w.valid := io.master(w_chosen).w.valid && !w_done
    io.slave.ar <> ar_arb.io.out
    io.slave.aw.bits <> aw_arb.io.out.bits
    io.slave.aw.valid := aw_arb.io.out.valid && w_done
    aw_arb.io.out.ready := io.slave.aw.ready && w_done
  } else { io.slave <> io.master.head }
 }
 /** A slave that send decode error for every request it receives */
 class NastiErrorSlave(implicit p: Parameters) extends NastiModule {
  val io = (new NastiIO).flip
  when (io.ar.fire()) { printf("Invalid read address %x\n", io.ar.bits.addr) }
  when (io.aw.fire()) { printf("Invalid write address %x\n", io.aw.bits.addr) }
  val r_queue = Module(new Queue(new NastiReadAddressChannel, 1))
  r_queue.io.enq <> io.ar
  val responding = Reg(init = Bool(false))
  val beats_left = Reg(init = UInt(0, nastiXLenBits))
  when (!responding && r_queue.io.deq.valid) {
    responding := Bool(true)
    beats_left := r_queue.io.deq.bits.len
  }
  io.r.valid := r_queue.io.deq.valid && responding
  io.r.bits.id := r_queue.io.deq.bits.id
  io.r.bits.data := UInt(0)
  io.r.bits.resp := RESP_DECERR
  io.r.bits.last := beats_left === UInt(0)
  r_queue.io.deq.ready := io.r.fire() && io.r.bits.last
  when (io.r.fire()) {
    when (beats_left === UInt(0)) {
      responding := Bool(false)
    } .otherwise {
      beats_left := beats_left - UInt(1)
    }
  }
  val draining = Reg(init = Bool(false))
  io.w.ready := draining
  when (io.aw.fire()) { draining := Bool(true) }
  when (io.w.fire() && io.w.bits.last) { draining := Bool(false) }
  val b_queue = Module(new Queue(UInt(width = nastiWIdBits), 1))
  b_queue.io.enq.valid := io.aw.valid && !draining
  b_queue.io.enq.bits := io.aw.bits.id
  io.aw.ready := b_queue.io.enq.ready && !draining
  io.b.valid := b_queue.io.deq.valid && !draining
  io.b.bits.id := b_queue.io.deq.bits
  io.b.bits.resp := Bits("b11")
  b_queue.io.deq.ready := io.b.ready && !draining
 }
 class NastiRouterIO(nSlaves: Int)(implicit p: Parameters) extends Bundle {
  val master = (new NastiIO).flip
  val slave = Vec(nSlaves, new NastiIO)
  override def cloneType =
    new NastiRouterIO(nSlaves).asInstanceOf[this.type]
 }
 /** Take a single Nasti master and route its requests to various slaves
 *  @param nSlaves the number of slaves
 *  @param routeSel a function which takes an address and produces
 *  a one-hot encoded selection of the slave to write to */
 class NastiRouter(nSlaves: Int, routeSel: UInt => UInt)(implicit p: Parameters)
    extends NastiModule {
  val io = new NastiRouterIO(nSlaves)
  val ar_route = routeSel(io.master.ar.bits.addr)
  val aw_route = routeSel(io.master.aw.bits.addr)
  var ar_ready = Bool(false)
  var aw_ready = Bool(false)
  var w_ready = Bool(false)
  io.slave.zipWithIndex.foreach { case (s, i) =>
    s.ar.valid := io.master.ar.valid && ar_route(i)
    s.ar.bits := io.master.ar.bits
    ar_ready = ar_ready || (s.ar.ready && ar_route(i))
    s.aw.valid := io.master.aw.valid && aw_route(i)
    s.aw.bits := io.master.aw.bits
    aw_ready = aw_ready || (s.aw.ready && aw_route(i))
    val chosen = Reg(init = Bool(false))
    when (s.w.fire() && s.w.bits.last) { chosen := Bool(false) }
    when (s.aw.fire()) { chosen := Bool(true) }
    s.w.valid := io.master.w.valid && chosen
    s.w.bits := io.master.w.bits
    w_ready = w_ready || (s.w.ready && chosen)
  }
  val r_invalid = !ar_route.orR
  val w_invalid = !aw_route.orR
  val err_slave = Module(new NastiErrorSlave)
  err_slave.io.ar.valid := r_invalid && io.master.ar.valid
  err_slave.io.ar.bits := io.master.ar.bits
  err_slave.io.aw.valid := w_invalid && io.master.aw.valid
  err_slave.io.aw.bits := io.master.aw.bits
  err_slave.io.w.valid := io.master.w.valid
  err_slave.io.w.bits := io.master.w.bits
  io.master.ar.ready := ar_ready || (r_invalid && err_slave.io.ar.ready)
  io.master.aw.ready := aw_ready || (w_invalid && err_slave.io.aw.ready)
  io.master.w.ready := w_ready || err_slave.io.w.ready
  val b_arb = Module(new RRArbiter(new NastiWriteResponseChannel, nSlaves + 1))
  val r_arb = Module(new JunctionsPeekingArbiter(
    new NastiReadDataChannel, nSlaves + 1,
    // we can unlock if it's the last beat
    (r: NastiReadDataChannel) => r.last))
  for (i <- 0 until nSlaves) {
    b_arb.io.in(i) <> io.slave(i).b
    r_arb.io.in(i) <> io.slave(i).r
  }
  b_arb.io.in(nSlaves) <> err_slave.io.b
  r_arb.io.in(nSlaves) <> err_slave.io.r
  io.master.b <> b_arb.io.out
  io.master.r <> r_arb.io.out
 }
 /** Crossbar between multiple Nasti masters and slaves
 *  @param nMasters the number of Nasti masters
 *  @param nSlaves the number of Nasti slaves
 *  @param routeSel a function selecting the slave to route an address to */
 class NastiCrossbar(nMasters: Int, nSlaves: Int, routeSel: UInt => UInt)
                   (implicit p: Parameters) extends NastiModule {
  val io = new Bundle {
    val masters = Vec(nMasters, new NastiIO).flip
    val slaves = Vec(nSlaves, new NastiIO)
  }
  if (nMasters == 1) {
    val router = Module(new NastiRouter(nSlaves, routeSel))
    router.io.master <> io.masters.head
    io.slaves <> router.io.slave
  } else {
    val routers = Vec.fill(nMasters) { Module(new NastiRouter(nSlaves, routeSel)).io }
    val arbiters = Vec.fill(nSlaves) { Module(new NastiArbiter(nMasters)).io }
    for (i <- 0 until nMasters) {
      routers(i).master <> io.masters(i)
    }
    for (i <- 0 until nSlaves) {
      arbiters(i).master <> Vec(routers.map(r => r.slave(i)))
      io.slaves(i) <> arbiters(i).slave
    }
  }
 }
 class NastiInterconnectIO(val nMasters: Int, val nSlaves: Int)
                         (implicit p: Parameters) extends Bundle {
  /* This is a bit confusing. The interconnect is a slave to the masters and
   * a master to the slaves. Hence why the declarations seem to be backwards. */
  val masters = Vec(nMasters, new NastiIO).flip
  val slaves = Vec(nSlaves, new NastiIO)
  override def cloneType =
    new NastiInterconnectIO(nMasters, nSlaves).asInstanceOf[this.type]
 }
 abstract class NastiInterconnect(implicit p: Parameters) extends NastiModule()(p) {
  val nMasters: Int
  val nSlaves: Int
  lazy val io = new NastiInterconnectIO(nMasters, nSlaves)
 }
 class NastiRecursiveInterconnect(val nMasters: Int, addrMap: AddrMap)
    (implicit p: Parameters) extends NastiInterconnect()(p) {
  def port(name: String) = io.slaves(addrMap.port(name))
  val nSlaves = addrMap.numSlaves
  val routeSel = (addr: UInt) =>
    Cat(addrMap.entries.map(e => addrMap(e.name).containsAddress(addr)).reverse)
  val xbar = Module(new NastiCrossbar(nMasters, addrMap.length, routeSel))
  xbar.io.masters <> io.masters
  io.slaves <> addrMap.entries.zip(xbar.io.slaves).flatMap {
    case (entry, xbarSlave) => {
      entry.region match {
        case submap: AddrMap if submap.entries.isEmpty =>
          val err_slave = Module(new NastiErrorSlave)
          err_slave.io <> xbarSlave
          None
        case submap: AddrMap =>
          val ic = Module(new NastiRecursiveInterconnect(1, submap))
          ic.io.masters.head <> xbarSlave
          ic.io.slaves
        case r: MemRange =>
          Some(xbarSlave)
      }
    }
  }
 }
 class ChannelHelper(nChannels: Int)
    (implicit val p: Parameters) extends HasNastiParameters {
  val dataBytes = p(MIFDataBits) * p(MIFDataBeats) / 8
  val chanSelBits = log2Ceil(nChannels)
  val selOffset = log2Up(dataBytes)
  val blockOffset = selOffset + chanSelBits
  def getSelect(addr: UInt) =
    if (nChannels > 1) addr(blockOffset - 1, selOffset) else UInt(0)
  def getAddr(addr: UInt) =
    if (nChannels > 1)
      Cat(addr(nastiXAddrBits - 1, blockOffset), addr(selOffset - 1, 0))
    else addr
 }
 class NastiMemoryInterconnect(
    nBanksPerChannel: Int, nChannels: Int)
    (implicit p: Parameters) extends NastiInterconnect()(p) {
  val nBanks = nBanksPerChannel * nChannels
  val nMasters = nBanks
  val nSlaves = nChannels
  val chanHelper = new ChannelHelper(nChannels)
  def connectChannel(outer: NastiIO, inner: NastiIO) {
    outer <> inner
    outer.ar.bits.addr := chanHelper.getAddr(inner.ar.bits.addr)
    outer.aw.bits.addr := chanHelper.getAddr(inner.aw.bits.addr)
  }
  for (i <- 0 until nChannels) {
    /* Bank assignments to channels are strided so that consecutive banks
     * map to different channels. That way, consecutive cache lines also
     * map to different channels */
    val banks = (i until nBanks by nChannels).map(j => io.masters(j))
    val channelArb = Module(new NastiArbiter(nBanksPerChannel))
    channelArb.io.master <> banks
    connectChannel(io.slaves(i), channelArb.io.slave)
  }
 }
 /** Allows users to switch between various memory configurations.  Note that
  * this is a dangerous operation: not only does switching the select input to
  * this module violate Nasti, it also causes the memory of the machine to
  * become garbled.  It's expected that select only changes at boot time, as
  * part of the memory controller configuration. */
 class NastiMemorySelectorIO(val nBanks: Int, val maxMemChannels: Int, nConfigs: Int)
                           (implicit p: Parameters)
                           extends NastiInterconnectIO(nBanks, maxMemChannels) {
  val select  = UInt(INPUT, width = log2Up(nConfigs))
  override def cloneType =
    new NastiMemorySelectorIO(nMasters, nSlaves, nConfigs).asInstanceOf[this.type]
 }
 class NastiMemorySelector(nBanks: Int, maxMemChannels: Int, configs: Seq[Int])
                         (implicit p: Parameters)
                         extends NastiInterconnect()(p) {
  val nMasters = nBanks
  val nSlaves  = maxMemChannels
  val nConfigs = configs.size
  override lazy val io = new NastiMemorySelectorIO(nBanks, maxMemChannels, nConfigs)
  def muxOnSelect(up: DecoupledIO[Bundle], dn: DecoupledIO[Bundle], active: Bool): Unit = {
    when (active) { dn.bits  := up.bits  }
    when (active) { up.ready := dn.ready }
    when (active) { dn.valid := up.valid }
  }
  def muxOnSelect(up: NastiIO, dn: NastiIO, active: Bool): Unit = {
    muxOnSelect(up.aw, dn.aw, active)
    muxOnSelect(up.w,  dn.w,  active)
    muxOnSelect(dn.b,  up.b,  active)
    muxOnSelect(up.ar, dn.ar, active)
    muxOnSelect(dn.r,  up.r,  active)
  }
  def muxOnSelect(up: Vec[NastiIO], dn: Vec[NastiIO], active: Bool) : Unit = {
    for (i <- 0 until up.size)
      muxOnSelect(up(i), dn(i), active)
  }
  /* Disconnects a vector of Nasti ports, which involves setting them to
   * invalid.  Due to Chisel reasons, we need to also set the bits to 0 (since
   * there can't be any unconnected inputs). */
  def disconnectSlave(slave: Vec[NastiIO]) = {
    slave.foreach{ m =>
      m.aw.valid := Bool(false)
      m.aw.bits  := m.aw.bits.fromBits( UInt(0) )
      m.w.valid  := Bool(false)
      m.w.bits   := m.w.bits.fromBits( UInt(0) )
      m.b.ready  := Bool(false)
      m.ar.valid := Bool(false)
      m.ar.bits  := m.ar.bits.fromBits( UInt(0) )
      m.r.ready  := Bool(false)
    }
  }
  def disconnectMaster(master: Vec[NastiIO]) = {
    master.foreach{ m =>
      m.aw.ready := Bool(false)
      m.w.ready  := Bool(false)
      m.b.valid  := Bool(false)
      m.b.bits   := m.b.bits.fromBits( UInt(0) )
      m.ar.ready := Bool(false)
      m.r.valid  := Bool(false)
      m.r.bits   := m.r.bits.fromBits( UInt(0) )
    }
  }
  /* Provides default wires on all our outputs. */
  disconnectMaster(io.masters)
  disconnectSlave(io.slaves)
  /* Constructs interconnects for each of the layouts suggested by the
   * configuration and switches between them based on the select input. */
  configs.zipWithIndex.foreach{ case (nChannels, select) =>
    val nBanksPerChannel = nBanks / nChannels
    val ic = Module(new NastiMemoryInterconnect(nBanksPerChannel, nChannels))
    disconnectMaster(ic.io.slaves)
    disconnectSlave(ic.io.masters)
    muxOnSelect(   io.masters, ic.io.masters, io.select === UInt(select))
    muxOnSelect(ic.io.slaves,     io.slaves,  io.select === UInt(select))
  }
 }
 class NastiMemoryDemux(nRoutes: Int)(implicit p: Parameters) extends NastiModule()(p) {
  val io = new Bundle {
    val master = (new NastiIO).flip
    val slaves = Vec(nRoutes, new NastiIO)
    val select = UInt(INPUT, log2Up(nRoutes))
  }
  def connectReqChannel[T <: Data](idx: Int, out: DecoupledIO[T], in: DecoupledIO[T]) {
    out.valid := in.valid && io.select === UInt(idx)
    out.bits := in.bits
    when (io.select === UInt(idx)) { in.ready := out.ready }
  }
  def connectRespChannel[T <: Data](idx: Int, out: DecoupledIO[T], in: DecoupledIO[T]) {
    when (io.select === UInt(idx)) { out.valid := in.valid }
    when (io.select === UInt(idx)) { out.bits := in.bits }
    in.ready := out.ready && io.select === UInt(idx)
  }
  io.master.ar.ready := Bool(false)
  io.master.aw.ready := Bool(false)
  io.master.w.ready := Bool(false)
  io.master.r.valid := Bool(false)
  io.master.r.bits := NastiReadDataChannel(id = UInt(0), data = UInt(0))
  io.master.b.valid := Bool(false)
  io.master.b.bits := NastiWriteResponseChannel(id = UInt(0))
  io.slaves.zipWithIndex.foreach { case (slave, i) =>
    connectReqChannel(i, slave.ar, io.master.ar)
    connectReqChannel(i, slave.aw, io.master.aw)
    connectReqChannel(i, slave.w, io.master.w)
    connectRespChannel(i, io.master.r, slave.r)
    connectRespChannel(i, io.master.b, slave.b)
  }
 }
 object AsyncNastiTo {
  // source(master) is in our clock domain, output is in the 'to' clock domain
  def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2)(implicit p: Parameters): NastiIO = {
    val sink = Wire(new NastiIO)
    sink.aw <> AsyncDecoupledTo(to_clock, to_reset, source.aw, depth, sync)
    sink.ar <> AsyncDecoupledTo(to_clock, to_reset, source.ar, depth, sync)
    sink.w  <> AsyncDecoupledTo(to_clock, to_reset, source.w,  depth, sync)
    source.b <> AsyncDecoupledFrom(to_clock, to_reset, sink.b, depth, sync)
    source.r <> AsyncDecoupledFrom(to_clock, to_reset, sink.r, depth, sync)
    sink
  }
 }
 object AsyncNastiFrom {
  // source(master) is in the 'from' clock domain, output is in our clock domain
  def apply[T <: Data](from_clock: Clock, from_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2)(implicit p: Parameters): NastiIO = {
    val sink = Wire(new NastiIO)
    sink.aw <> AsyncDecoupledFrom(from_clock, from_reset, source.aw, depth, sync)
    sink.ar <> AsyncDecoupledFrom(from_clock, from_reset, source.ar, depth, sync)
    sink.w  <> AsyncDecoupledFrom(from_clock, from_reset, source.w,  depth, sync)
    source.b <> AsyncDecoupledTo(from_clock, from_reset, sink.b, depth, sync)
    source.r <> AsyncDecoupledTo(from_clock, from_reset, sink.r, depth, sync)
    sink
  }
 }
--- a/junctions/src/main/scala/package.scala
+++ b/junctions/src/main/scala/package.scala
@@ -0,0 +1 @@
 package object junctions
--- a/junctions/src/main/scala/poci.scala
+++ b/junctions/src/main/scala/poci.scala
@@ -0,0 +1,82 @@
 package junctions
 import Chisel._
 import cde.{Parameters, Field}
 class PociIO(implicit p: Parameters) extends HastiBundle()(p)
 {
  val paddr = UInt(OUTPUT, hastiAddrBits)
  val pwrite = Bool(OUTPUT)
  val psel = Bool(OUTPUT)
  val penable = Bool(OUTPUT)
  val pwdata = UInt(OUTPUT, hastiDataBits)
  val prdata = UInt(INPUT, hastiDataBits)
  val pready = Bool(INPUT)
  val pslverr = Bool(INPUT)
 }
 class HastiToPociBridge(implicit p: Parameters) extends HastiModule()(p) {
  val io = new Bundle {
    val in = new HastiSlaveIO
    val out = new PociIO
  }
  val s_idle :: s_setup :: s_access :: Nil = Enum(UInt(), 3)
  val state = Reg(init = s_idle)
  val transfer = io.in.hsel & io.in.htrans(1)
  switch (state) {
    is (s_idle) {
      when (transfer) { state := s_setup }
    }
    is (s_setup) {
      state := s_access
    }
    is (s_access) {
      when (io.out.pready & ~transfer) { state := s_idle   }
      when (io.out.pready & transfer)  { state := s_setup  }
      when (~io.out.pready)            { state := s_access }
    }
  }
  val haddr_reg = Reg(UInt(width = hastiAddrBits))
  val hwrite_reg = Reg(UInt(width = 1))
  when (transfer) {
    haddr_reg  := io.in.haddr
    hwrite_reg := io.in.hwrite
  }
  io.out.paddr := haddr_reg
  io.out.pwrite := hwrite_reg(0)
  io.out.psel := (state =/= s_idle)
  io.out.penable := (state === s_access)
  io.out.pwdata := io.in.hwdata
  io.in.hrdata := io.out.prdata
  io.in.hready := ((state === s_access) & io.out.pready) | (state === s_idle)
  io.in.hresp := io.out.pslverr
 }
 class PociBus(amap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p)
 {
  val io = new Bundle {
    val master = new PociIO().flip
    val slaves = Vec(amap.size, new PociIO)
  }
  val psels = PriorityEncoderOH(
    (io.slaves zip amap) map { case (s, afn) => {
      s.paddr := io.master.paddr
      s.pwrite := io.master.pwrite
      s.pwdata := io.master.pwdata
      afn(io.master.paddr) && io.master.psel
  }})
  (io.slaves zip psels) foreach { case (s, psel) => {
    s.psel := psel
    s.penable := io.master.penable && psel
  } }
  io.master.prdata := Mux1H(psels, io.slaves.map(_.prdata))
  io.master.pready := Mux1H(psels, io.slaves.map(_.pready))
  io.master.pslverr := Mux1H(psels, io.slaves.map(_.pslverr))
 }
--- a/junctions/src/main/scala/slowio.scala
+++ b/junctions/src/main/scala/slowio.scala
@@ -0,0 +1,70 @@
 // See LICENSE for license details.
 package junctions
 import Chisel._
 class SlowIO[T <: Data](val divisor_max: Int)(data: => T) extends Module
 {
  val io = new Bundle {
    val out_fast = Decoupled(data).flip
    val out_slow = Decoupled(data)
    val in_fast = Decoupled(data)
    val in_slow = Decoupled(data).flip
    val clk_slow = Bool(OUTPUT)
    val set_divisor = Valid(Bits(width = 32)).flip
    val divisor = Bits(OUTPUT, 32)
  }
  require(divisor_max >= 8 && divisor_max <= 65536 && isPow2(divisor_max))
  val divisor = Reg(init=UInt(divisor_max-1))
  val d_shadow = Reg(init=UInt(divisor_max-1))
  val hold = Reg(init=UInt(divisor_max/4-1))
  val h_shadow = Reg(init=UInt(divisor_max/4-1))
  when (io.set_divisor.valid) {
    d_shadow := io.set_divisor.bits(log2Up(divisor_max)-1, 0).toUInt
    h_shadow := io.set_divisor.bits(log2Up(divisor_max)-1+16, 16).toUInt
  }
  io.divisor := (hold << 16) | divisor
  val count = Reg{UInt(width = log2Up(divisor_max))}
  val myclock = Reg{Bool()}
  count := count + UInt(1)
  val rising = count === (divisor >> 1)
  val falling = count === divisor
  val held = count === (divisor >> 1) + hold
  when (falling) {
    divisor := d_shadow
    hold := h_shadow
    count := UInt(0)
    myclock := Bool(false)
  }
  when (rising) {
    myclock := Bool(true)
  }
  val in_slow_rdy = Reg(init=Bool(false))
  val out_slow_val = Reg(init=Bool(false))
  val out_slow_bits = Reg(data)
  val fromhost_q = Module(new Queue(data,1))
  fromhost_q.io.enq.valid := rising && (io.in_slow.valid && in_slow_rdy || this.reset)
  fromhost_q.io.enq.bits := io.in_slow.bits
  io.in_fast <> fromhost_q.io.deq
  val tohost_q = Module(new Queue(data,1))
  tohost_q.io.enq <> io.out_fast
  tohost_q.io.deq.ready := rising && io.out_slow.ready && out_slow_val
  when (held) {
    in_slow_rdy := fromhost_q.io.enq.ready
    out_slow_val := tohost_q.io.deq.valid
    out_slow_bits := Mux(this.reset, fromhost_q.io.deq.bits, tohost_q.io.deq.bits)
  }
  io.in_slow.ready := in_slow_rdy
  io.out_slow.valid := out_slow_val
  io.out_slow.bits := out_slow_bits
  io.clk_slow := myclock
 }
--- a/junctions/src/main/scala/smi.scala
+++ b/junctions/src/main/scala/smi.scala
@@ -0,0 +1,281 @@
 package junctions
 import Chisel._
 import cde.Parameters
 class SmiReq(val dataWidth: Int, val addrWidth: Int) extends Bundle {
  val rw = Bool()
  val addr = UInt(width = addrWidth)
  val data = Bits(width = dataWidth)
  override def cloneType =
    new SmiReq(dataWidth, addrWidth).asInstanceOf[this.type]
 }
 /** Simple Memory Interface IO. Used to communicate with PCR and SCR
 *  @param dataWidth the width in bits of the data field
 *  @param addrWidth the width in bits of the addr field */
 class SmiIO(val dataWidth: Int, val addrWidth: Int) extends Bundle {
  val req = Decoupled(new SmiReq(dataWidth, addrWidth))
  val resp = Decoupled(Bits(width = dataWidth)).flip
  override def cloneType =
    new SmiIO(dataWidth, addrWidth).asInstanceOf[this.type]
 }
 abstract class SmiPeripheral extends Module {
  val dataWidth: Int
  val addrWidth: Int
  lazy val io = new SmiIO(dataWidth, addrWidth).flip
 }
 /** A simple sequential memory accessed through Smi */
 class SmiMem(val dataWidth: Int, val memDepth: Int) extends SmiPeripheral {
  // override
  val addrWidth = log2Up(memDepth)
  val mem = SeqMem(memDepth, Bits(width = dataWidth))
  val ren = io.req.fire() && !io.req.bits.rw
  val wen = io.req.fire() && io.req.bits.rw
  when (wen) { mem.write(io.req.bits.addr, io.req.bits.data) }
  val resp_valid = Reg(init = Bool(false))
  when (io.resp.fire()) { resp_valid := Bool(false) }
  when (io.req.fire())  { resp_valid := Bool(true) }
  io.resp.valid := resp_valid
  io.resp.bits := mem.read(io.req.bits.addr, ren)
  io.req.ready := !resp_valid
 }
 /** Arbitrate among several Smi clients
 *  @param n the number of clients
 *  @param dataWidth Smi data width
 *  @param addrWidth Smi address width */
 class SmiArbiter(val n: Int, val dataWidth: Int, val addrWidth: Int)
    extends Module {
  val io = new Bundle {
    val in = Vec(n, new SmiIO(dataWidth, addrWidth)).flip
    val out = new SmiIO(dataWidth, addrWidth)
  }
  val wait_resp = Reg(init = Bool(false))
  val choice = Reg(UInt(width = log2Up(n)))
  val req_arb = Module(new RRArbiter(new SmiReq(dataWidth, addrWidth), n))
  req_arb.io.in <> io.in.map(_.req)
  req_arb.io.out.ready := io.out.req.ready && !wait_resp
  io.out.req.bits := req_arb.io.out.bits
  io.out.req.valid := req_arb.io.out.valid && !wait_resp
  when (io.out.req.fire()) {
    choice := req_arb.io.chosen
    wait_resp := Bool(true)
  }
  when (io.out.resp.fire()) { wait_resp := Bool(false) }
  for ((resp, i) <- io.in.map(_.resp).zipWithIndex) {
    resp.bits := io.out.resp.bits
    resp.valid := io.out.resp.valid && choice === UInt(i)
  }
  io.out.resp.ready := io.in(choice).resp.ready
 }
 class SmiIONastiReadIOConverter(val dataWidth: Int, val addrWidth: Int)
                               (implicit p: Parameters) extends NastiModule()(p) {
  val io = new Bundle {
    val nasti = new NastiReadIO().flip
    val smi = new SmiIO(dataWidth, addrWidth)
  }
  private val maxWordsPerBeat = nastiXDataBits / dataWidth
  private val wordCountBits = log2Up(maxWordsPerBeat)
  private val byteOffBits = log2Up(dataWidth / 8)
  private val addrOffBits = addrWidth + byteOffBits
  private def calcWordCount(size: UInt): UInt =
    (UInt(1) << (size - UInt(byteOffBits))) - UInt(1)
  val (s_idle :: s_read :: s_resp :: Nil) = Enum(Bits(), 3)
  val state = Reg(init = s_idle)
  val nWords = Reg(UInt(width = wordCountBits))
  val nBeats = Reg(UInt(width = nastiXLenBits))
  val addr = Reg(UInt(width = addrWidth))
  val id = Reg(UInt(width = nastiRIdBits))
  val byteOff = Reg(UInt(width = byteOffBits))
  val recvInd = Reg(init = UInt(0, wordCountBits))
  val sendDone = Reg(init = Bool(false))
  val buffer = Reg(init = Vec.fill(maxWordsPerBeat) { Bits(0, dataWidth) })
  io.nasti.ar.ready := (state === s_idle)
  io.smi.req.valid := (state === s_read) && !sendDone
  io.smi.req.bits.rw := Bool(false)
  io.smi.req.bits.addr := addr
  io.smi.resp.ready := (state === s_read)
  io.nasti.r.valid := (state === s_resp)
  io.nasti.r.bits := NastiReadDataChannel(
    id = id,
    data = buffer.toBits,
    last = (nBeats === UInt(0)))
  when (io.nasti.ar.fire()) {
    when (io.nasti.ar.bits.size < UInt(byteOffBits)) {
      nWords := UInt(0)
    } .otherwise {
      nWords := calcWordCount(io.nasti.ar.bits.size)
    }
    nBeats := io.nasti.ar.bits.len
    addr := io.nasti.ar.bits.addr(addrOffBits - 1, byteOffBits)
    if (maxWordsPerBeat > 1)
      recvInd := io.nasti.ar.bits.addr(wordCountBits + byteOffBits - 1, byteOffBits)
    else
      recvInd := UInt(0)
    id := io.nasti.ar.bits.id
    state := s_read
  }
  when (io.smi.req.fire()) {
    addr := addr + UInt(1)
    sendDone := (nWords === UInt(0))
  }
  when (io.smi.resp.fire()) {
    recvInd := recvInd + UInt(1)
    nWords := nWords - UInt(1)
    buffer(recvInd) := io.smi.resp.bits
    when (nWords === UInt(0)) { state := s_resp }
  }
  when (io.nasti.r.fire()) {
    recvInd := UInt(0)
    sendDone := Bool(false)
    // clear all the registers in the buffer
    buffer.foreach(_ := Bits(0))
    nBeats := nBeats - UInt(1)
    state := Mux(io.nasti.r.bits.last, s_idle, s_read)
  }
 }
 class SmiIONastiWriteIOConverter(val dataWidth: Int, val addrWidth: Int)
                                (implicit p: Parameters) extends NastiModule()(p) {
  val io = new Bundle {
    val nasti = new NastiWriteIO().flip
    val smi = new SmiIO(dataWidth, addrWidth)
  }
  private val dataBytes = dataWidth / 8
  private val maxWordsPerBeat = nastiXDataBits / dataWidth
  private val byteOffBits = log2Floor(dataBytes)
  private val addrOffBits = addrWidth + byteOffBits
  private val nastiByteOffBits = log2Ceil(nastiXDataBits / 8)
  assert(!io.nasti.aw.valid || io.nasti.aw.bits.size >= UInt(byteOffBits),
    "Nasti size must be >= Smi size")
  val id = Reg(UInt(width = nastiWIdBits))
  val addr = Reg(UInt(width = addrWidth))
  val offset = Reg(UInt(width = nastiByteOffBits))
  def makeStrobe(offset: UInt, size: UInt, strb: UInt) = {
    val sizemask = (UInt(1) << (UInt(1) << size)) - UInt(1)
    val bytemask = strb & (sizemask << offset)
    Vec.tabulate(maxWordsPerBeat){i => bytemask(dataBytes * i)}.toBits
  }
  val size = Reg(UInt(width = nastiXSizeBits))
  val strb = Reg(UInt(width = maxWordsPerBeat))
  val data = Reg(UInt(width = nastiXDataBits))
  val last = Reg(Bool())
  val s_idle :: s_data :: s_send :: s_ack :: s_resp :: Nil = Enum(Bits(), 5)
  val state = Reg(init = s_idle)
  io.nasti.aw.ready := (state === s_idle)
  io.nasti.w.ready := (state === s_data)
  io.smi.req.valid := (state === s_send) && strb(0)
  io.smi.req.bits.rw := Bool(true)
  io.smi.req.bits.addr := addr
  io.smi.req.bits.data := data(dataWidth - 1, 0)
  io.smi.resp.ready := (state === s_ack)
  io.nasti.b.valid := (state === s_resp)
  io.nasti.b.bits := NastiWriteResponseChannel(id)
  val jump = if (maxWordsPerBeat > 1)
    PriorityMux(strb(maxWordsPerBeat - 1, 1),
      (1 until maxWordsPerBeat).map(UInt(_)))
    else UInt(1)
  when (io.nasti.aw.fire()) {
    if (dataWidth == nastiXDataBits) {
      addr := io.nasti.aw.bits.addr(addrOffBits - 1, byteOffBits)
    } else {
      addr := Cat(io.nasti.aw.bits.addr(addrOffBits - 1, nastiByteOffBits),
                  UInt(0, nastiByteOffBits - byteOffBits))
    }
    offset := io.nasti.aw.bits.addr(nastiByteOffBits - 1, 0)
    id := io.nasti.aw.bits.id
    size := io.nasti.aw.bits.size
    last := Bool(false)
    state := s_data
  }
  when (io.nasti.w.fire()) {
    last := io.nasti.w.bits.last
    strb := makeStrobe(offset, size, io.nasti.w.bits.strb)
    data := io.nasti.w.bits.data
    state := s_send
  }
  when (state === s_send) {
    when (io.smi.req.ready || !strb(0)) {
      strb := strb >> jump
      data := data >> Cat(jump, UInt(0, log2Up(dataWidth)))
      addr := addr + jump
      when (strb(0)) { state := s_ack }
    }
  }
  when (io.smi.resp.fire()) {
    state := Mux(strb === UInt(0),
              Mux(last, s_resp, s_data), s_send)
  }
  when (io.nasti.b.fire()) { state := s_idle }
 }
 /** Convert Nasti protocol to Smi protocol */
 class SmiIONastiIOConverter(val dataWidth: Int, val addrWidth: Int)
                           (implicit p: Parameters) extends NastiModule()(p) {
  val io = new Bundle {
    val nasti = (new NastiIO).flip
    val smi = new SmiIO(dataWidth, addrWidth)
  }
  require(isPow2(dataWidth), "SMI data width must be power of 2")
  require(dataWidth <= nastiXDataBits,
    "SMI data width must be less than or equal to NASTI data width")
  val reader = Module(new SmiIONastiReadIOConverter(dataWidth, addrWidth))
  reader.io.nasti <> io.nasti
  val writer = Module(new SmiIONastiWriteIOConverter(dataWidth, addrWidth))
  writer.io.nasti <> io.nasti
  val arb = Module(new SmiArbiter(2, dataWidth, addrWidth))
  arb.io.in(0) <> reader.io.smi
  arb.io.in(1) <> writer.io.smi
  io.smi <> arb.io.out
 }
--- a/junctions/src/main/scala/stream.scala
+++ b/junctions/src/main/scala/stream.scala
@@ -0,0 +1,187 @@
 package junctions
 import Chisel._
 import NastiConstants._
 import cde.Parameters
 class StreamChannel(w: Int) extends Bundle {
  val data = UInt(width = w)
  val last = Bool()
  override def cloneType = new StreamChannel(w).asInstanceOf[this.type]
 }
 class StreamIO(w: Int) extends Bundle {
  val out = Decoupled(new StreamChannel(w))
  val in = Decoupled(new StreamChannel(w)).flip
  override def cloneType = new StreamIO(w).asInstanceOf[this.type]
 }
 class NastiIOStreamIOConverter(w: Int)(implicit p: Parameters) extends Module {
  val io = new Bundle {
    val nasti = (new NastiIO).flip
    val stream = new StreamIO(w)
  }
  val streamSize = UInt(log2Up(w / 8))
  assert(!io.nasti.ar.valid || io.nasti.ar.bits.size === streamSize,
         "read channel wrong size on stream")
  assert(!io.nasti.ar.valid || io.nasti.ar.bits.len === UInt(0) ||
         io.nasti.ar.bits.burst === BURST_FIXED,
         "read channel wrong burst type on stream")
  assert(!io.nasti.aw.valid || io.nasti.aw.bits.size === streamSize,
         "write channel wrong size on stream")
  assert(!io.nasti.aw.valid || io.nasti.aw.bits.len === UInt(0) ||
         io.nasti.aw.bits.burst === BURST_FIXED,
         "write channel wrong burst type on stream")
  assert(!io.nasti.w.valid || io.nasti.w.bits.strb.andR,
         "write channel cannot take partial writes")
  val read_id = Reg(io.nasti.ar.bits.id)
  val read_cnt = Reg(io.nasti.ar.bits.len)
  val reading = Reg(init = Bool(false))
  io.nasti.ar.ready := !reading
  io.nasti.r.valid := reading && io.stream.in.valid
  io.nasti.r.bits := io.stream.in.bits
  io.nasti.r.bits.resp := UInt(0)
  io.nasti.r.bits.id := read_id
  io.stream.in.ready := reading && io.nasti.r.ready
  when (io.nasti.ar.fire()) {
    read_id := io.nasti.ar.bits.id
    read_cnt := io.nasti.ar.bits.len
    reading := Bool(true)
  }
  when (io.nasti.r.fire()) {
    when (read_cnt === UInt(0)) {
      reading := Bool(false)
    } .otherwise {
      read_cnt := read_cnt - UInt(1)
    }
  }
  val write_id = Reg(io.nasti.aw.bits.id)
  val writing = Reg(init = Bool(false))
  val write_resp = Reg(init = Bool(false))
  io.nasti.aw.ready := !writing && !write_resp
  io.nasti.w.ready := writing && io.stream.out.ready
  io.stream.out.valid := writing && io.nasti.w.valid
  io.stream.out.bits := io.nasti.w.bits
  io.nasti.b.valid := write_resp
  io.nasti.b.bits.resp := UInt(0)
  io.nasti.b.bits.id := write_id
  when (io.nasti.aw.fire()) {
    write_id := io.nasti.aw.bits.id
    writing := Bool(true)
  }
  when (io.nasti.w.fire() && io.nasti.w.bits.last) {
    writing := Bool(false)
    write_resp := Bool(true)
  }
  when (io.nasti.b.fire()) { write_resp := Bool(false) }
 }
 class StreamNarrower(win: Int, wout: Int) extends Module {
  require(win > wout, "Stream narrower input width must be larger than input width")
  require(win % wout == 0, "Stream narrower input width must be multiple of output width")
  val io = new Bundle {
    val in = Decoupled(new StreamChannel(win)).flip
    val out = Decoupled(new StreamChannel(wout))
  }
  val n_pieces = win / wout
  val buffer = Reg(Bits(width = win))
  val (piece_idx, pkt_done) = Counter(io.out.fire(), n_pieces)
  val pieces = Vec.tabulate(n_pieces) { i => buffer(wout * (i + 1) - 1, wout * i) }
  val last_piece = (piece_idx === UInt(n_pieces - 1))
  val sending = Reg(init = Bool(false))
  val in_last = Reg(Bool())
  when (io.in.fire()) {
    buffer := io.in.bits.data
    in_last := io.in.bits.last
    sending := Bool(true)
  }
  when (pkt_done) { sending := Bool(false) }
  io.out.valid := sending
  io.out.bits.data := pieces(piece_idx)
  io.out.bits.last := in_last && last_piece
  io.in.ready := !sending
 }
 class StreamExpander(win: Int, wout: Int) extends Module {
  require(win < wout, "Stream expander input width must be smaller than input width")
  require(wout % win == 0, "Stream narrower output width must be multiple of input width")
  val io = new Bundle {
    val in = Decoupled(new StreamChannel(win)).flip
    val out = Decoupled(new StreamChannel(wout))
  }
  val n_pieces = wout / win
  val buffer = Reg(Vec(n_pieces, UInt(width = win)))
  val last = Reg(Bool())
  val collecting = Reg(init = Bool(true))
  val (piece_idx, pkt_done) = Counter(io.in.fire(), n_pieces)
  when (io.in.fire()) { buffer(piece_idx) := io.in.bits.data }
  when (pkt_done) { last := io.in.bits.last; collecting := Bool(false) }
  when (io.out.fire()) { collecting := Bool(true) }
  io.in.ready := collecting
  io.out.valid := !collecting
  io.out.bits.data := buffer.toBits
  io.out.bits.last := last
 }
 object StreamUtils {
  def connectStreams(a: StreamIO, b: StreamIO) {
    a.in <> b.out
    b.in <> a.out
  }
 }
 trait Serializable {
  def nbits: Int
 }
 class Serializer[T <: Data with Serializable](w: Int, typ: T) extends Module {
  val io = new Bundle {
    val in = Decoupled(typ).flip
    val out = Decoupled(Bits(width = w))
  }
  val narrower = Module(new StreamNarrower(typ.nbits, w))
  narrower.io.in.bits.data := io.in.bits.toBits
  narrower.io.in.bits.last := Bool(true)
  narrower.io.in.valid := io.in.valid
  io.in.ready := narrower.io.in.ready
  io.out.valid := narrower.io.out.valid
  io.out.bits := narrower.io.out.bits.data
  narrower.io.out.ready := io.out.ready
 }
 class Deserializer[T <: Data with Serializable](w: Int, typ: T) extends Module {
  val io = new Bundle {
    val in = Decoupled(Bits(width = w)).flip
    val out = Decoupled(typ)
  }
  val expander = Module(new StreamExpander(w, typ.nbits))
  expander.io.in.valid := io.in.valid
  expander.io.in.bits.data := io.in.bits
  expander.io.in.bits.last := Bool(true)
  io.in.ready := expander.io.in.ready
  io.out.valid := expander.io.out.valid
  io.out.bits := typ.cloneType.fromBits(expander.io.out.bits.data)
  expander.io.out.ready := io.out.ready
 }
--- a/junctions/src/main/scala/util.scala
+++ b/junctions/src/main/scala/util.scala
@@ -0,0 +1,314 @@
 /// See LICENSE for license details.
 package junctions
 import Chisel._
 import cde.Parameters
 class ParameterizedBundle(implicit p: Parameters) extends Bundle {
  override def cloneType = {
    try {
      this.getClass.getConstructors.head.newInstance(p).asInstanceOf[this.type]
    } catch {
      case e: java.lang.IllegalArgumentException =>
        throwException("Unable to use ParamaterizedBundle.cloneType on " +
                       this.getClass + ", probably because " + this.getClass +
                       "() takes more than one argument.  Consider overriding " +
                       "cloneType() on " + this.getClass, e)
    }
  }
 }
 class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Module {
  val io = new QueueIO(data, entries)
  require(entries > 1)
  val do_flow = Wire(Bool())
  val do_enq = io.enq.fire() && !do_flow
  val do_deq = io.deq.fire() && !do_flow
  val maybe_full = Reg(init=Bool(false))
  val enq_ptr = Counter(do_enq, entries)._1
  val (deq_ptr, deq_done) = Counter(do_deq, entries)
  when (do_enq =/= do_deq) { maybe_full := do_enq }
  val ptr_match = enq_ptr === deq_ptr
  val empty = ptr_match && !maybe_full
  val full = ptr_match && maybe_full
  val atLeastTwo = full || enq_ptr - deq_ptr >= UInt(2)
  do_flow := empty && io.deq.ready
  val ram = SeqMem(entries, data)
  when (do_enq) { ram.write(enq_ptr, io.enq.bits) }
  val ren = io.deq.ready && (atLeastTwo || !io.deq.valid && !empty)
  val raddr = Mux(io.deq.valid, Mux(deq_done, UInt(0), deq_ptr + UInt(1)), deq_ptr)
  val ram_out_valid = Reg(next = ren)
  io.deq.valid := Mux(empty, io.enq.valid, ram_out_valid)
  io.enq.ready := !full
  io.deq.bits := Mux(empty, io.enq.bits, ram.read(raddr, ren))
 }
 class HellaQueue[T <: Data](val entries: Int)(data: => T) extends Module {
  val io = new QueueIO(data, entries)
  val fq = Module(new HellaFlowQueue(entries)(data))
  fq.io.enq <> io.enq
  io.deq <> Queue(fq.io.deq, 1, pipe = true)
 }
 object HellaQueue {
  def apply[T <: Data](enq: DecoupledIO[T], entries: Int) = {
    val q = Module((new HellaQueue(entries)) { enq.bits })
    q.io.enq.valid := enq.valid // not using <> so that override is allowed
    q.io.enq.bits := enq.bits
    enq.ready := q.io.enq.ready
    q.io.deq
  }
 }
 /** A generalized locking RR arbiter that addresses the limitations of the
 *  version in the Chisel standard library */
 abstract class JunctionsAbstractLockingArbiter[T <: Data](typ: T, arbN: Int)
    extends Module {
  val io = new Bundle {
    val in = Vec(arbN, Decoupled(typ.cloneType)).flip
    val out = Decoupled(typ.cloneType)
  }
  def rotateLeft[T <: Data](norm: Vec[T], rot: UInt): Vec[T] = {
    val n = norm.size
    Vec.tabulate(n) { i =>
      Mux(rot < UInt(n - i), norm(UInt(i) + rot), norm(rot - UInt(n - i)))
    }
  }
  val lockIdx = Reg(init = UInt(0, log2Up(arbN)))
  val locked = Reg(init = Bool(false))
  val choice = PriorityMux(
    rotateLeft(Vec(io.in.map(_.valid)), lockIdx + UInt(1)),
    rotateLeft(Vec((0 until arbN).map(UInt(_))), lockIdx + UInt(1)))
  val chosen = Mux(locked, lockIdx, choice)
  for (i <- 0 until arbN) {
    io.in(i).ready := io.out.ready && chosen === UInt(i)
  }
  io.out.valid := io.in(chosen).valid
  io.out.bits := io.in(chosen).bits
 }
 /** This locking arbiter determines when it is safe to unlock
 *  by peeking at the data */
 class JunctionsPeekingArbiter[T <: Data](
    typ: T, arbN: Int,
    canUnlock: T => Bool,
    needsLock: Option[T => Bool] = None)
    extends JunctionsAbstractLockingArbiter(typ, arbN) {
  def realNeedsLock(data: T): Bool =
    needsLock.map(_(data)).getOrElse(Bool(true))
  when (io.out.fire()) {
    when (!locked && realNeedsLock(io.out.bits)) {
      lockIdx := choice
      locked := Bool(true)
    }
    // the unlock statement takes precedent
    when (canUnlock(io.out.bits)) {
      locked := Bool(false)
    }
  }
 }
 /** This arbiter determines when it is safe to unlock by counting transactions */
 class JunctionsCountingArbiter[T <: Data](
    typ: T, arbN: Int, count: Int,
    val needsLock: Option[T => Bool] = None)
    extends JunctionsAbstractLockingArbiter(typ, arbN) {
  def realNeedsLock(data: T): Bool =
    needsLock.map(_(data)).getOrElse(Bool(true))
  // if count is 1, you should use a non-locking arbiter
  require(count > 1, "CountingArbiter cannot have count <= 1")
  val lock_ctr = Counter(count)
  when (io.out.fire()) {
    when (!locked && realNeedsLock(io.out.bits)) {
      lockIdx := choice
      locked := Bool(true)
      lock_ctr.inc()
    }
    when (locked) {
      when (lock_ctr.inc()) { locked := Bool(false) }
    }
  }
 }
 class ReorderQueueWrite[T <: Data](dType: T, tagWidth: Int) extends Bundle {
  val data = dType.cloneType
  val tag = UInt(width = tagWidth)
  override def cloneType =
    new ReorderQueueWrite(dType, tagWidth).asInstanceOf[this.type]
 }
 class ReorderEnqueueIO[T <: Data](dType: T, tagWidth: Int)
  extends DecoupledIO(new ReorderQueueWrite(dType, tagWidth)) {
  override def cloneType =
    new ReorderEnqueueIO(dType, tagWidth).asInstanceOf[this.type]
 }
 class ReorderDequeueIO[T <: Data](dType: T, tagWidth: Int) extends Bundle {
  val valid = Bool(INPUT)
  val tag = UInt(INPUT, tagWidth)
  val data = dType.cloneType.asOutput
  val matches = Bool(OUTPUT)
  override def cloneType =
    new ReorderDequeueIO(dType, tagWidth).asInstanceOf[this.type]
 }
 class ReorderQueue[T <: Data](dType: T, tagWidth: Int, size: Option[Int] = None)
    extends Module {
  val io = new Bundle {
    val enq = new ReorderEnqueueIO(dType, tagWidth).flip
    val deq = new ReorderDequeueIO(dType, tagWidth)
  }
  val tagSpaceSize = 1 << tagWidth
  val actualSize = size.getOrElse(tagSpaceSize)
  if (tagSpaceSize > actualSize) {
    val roq_data = Reg(Vec(actualSize, dType))
    val roq_tags = Reg(Vec(actualSize, UInt(width = tagWidth)))
    val roq_free = Reg(init = Vec.fill(actualSize)(Bool(true)))
    val roq_enq_addr = PriorityEncoder(roq_free)
    val roq_matches = roq_tags.zip(roq_free)
      .map { case (tag, free) => tag === io.deq.tag && !free }
    val roq_deq_onehot = PriorityEncoderOH(roq_matches)
    io.enq.ready := roq_free.reduce(_ || _)
    io.deq.data := Mux1H(roq_deq_onehot, roq_data)
    io.deq.matches := roq_matches.reduce(_ || _)
    when (io.enq.valid && io.enq.ready) {
      roq_data(roq_enq_addr) := io.enq.bits.data
      roq_tags(roq_enq_addr) := io.enq.bits.tag
      roq_free(roq_enq_addr) := Bool(false)
    }
    when (io.deq.valid) {
      roq_free(OHToUInt(roq_deq_onehot)) := Bool(true)
    }
    println(s"Warning - using a CAM for ReorderQueue, tagBits: ${tagWidth} size: ${actualSize}")
  } else {
    val roq_data = Mem(tagSpaceSize, dType)
    val roq_free = Reg(init = Vec.fill(tagSpaceSize)(Bool(true)))
    io.enq.ready := roq_free(io.enq.bits.tag)
    io.deq.data := roq_data(io.deq.tag)
    io.deq.matches := !roq_free(io.deq.tag)
    when (io.enq.valid && io.enq.ready) {
      roq_data(io.enq.bits.tag) := io.enq.bits.data
      roq_free(io.enq.bits.tag) := Bool(false)
    }
    when (io.deq.valid) {
      roq_free(io.deq.tag) := Bool(true)
    }
  }
 }
 object DecoupledHelper {
  def apply(rvs: Bool*) = new DecoupledHelper(rvs)
 }
 class DecoupledHelper(val rvs: Seq[Bool]) {
  def fire(exclude: Bool, includes: Bool*) = {
    (rvs.filter(_ ne exclude) ++ includes).reduce(_ && _)
  }
 }
 class MultiWidthFifo(inW: Int, outW: Int, n: Int) extends Module {
  val io = new Bundle {
    val in = Decoupled(Bits(width = inW)).flip
    val out = Decoupled(Bits(width = outW))
    val count = UInt(OUTPUT, log2Up(n + 1))
  }
  if (inW == outW) {
    val q = Module(new Queue(Bits(width = inW), n))
    q.io.enq <> io.in
    io.out <> q.io.deq
    io.count := q.io.count
  } else if (inW > outW) {
    val nBeats = inW / outW
    require(inW % outW == 0, s"MultiWidthFifo: in: $inW not divisible by out: $outW")
    require(n % nBeats == 0, s"Cannot store $n output words when output beats is $nBeats")
    val wdata = Reg(Vec(n / nBeats, Bits(width = inW)))
    val rdata = Vec(wdata.flatMap { indat =>
      (0 until nBeats).map(i => indat(outW * (i + 1) - 1, outW * i)) })
    val head = Reg(init = UInt(0, log2Up(n / nBeats)))
    val tail = Reg(init = UInt(0, log2Up(n)))
    val size = Reg(init = UInt(0, log2Up(n + 1)))
    when (io.in.fire()) {
      wdata(head) := io.in.bits
      head := head + UInt(1)
    }
    when (io.out.fire()) { tail := tail + UInt(1) }
    size := MuxCase(size, Seq(
      (io.in.fire() && io.out.fire()) -> (size + UInt(nBeats - 1)),
      io.in.fire() -> (size + UInt(nBeats)),
      io.out.fire() -> (size - UInt(1))))
    io.out.valid := size > UInt(0)
    io.out.bits := rdata(tail)
    io.in.ready := size < UInt(n)
    io.count := size
  } else {
    val nBeats = outW / inW
    require(outW % inW == 0, s"MultiWidthFifo: out: $outW not divisible by in: $inW")
    val wdata = Reg(Vec(n * nBeats, Bits(width = inW)))
    val rdata = Vec.tabulate(n) { i =>
      Cat(wdata.slice(i * nBeats, (i + 1) * nBeats).reverse)}
    val head = Reg(init = UInt(0, log2Up(n * nBeats)))
    val tail = Reg(init = UInt(0, log2Up(n)))
    val size = Reg(init = UInt(0, log2Up(n * nBeats + 1)))
    when (io.in.fire()) {
      wdata(head) := io.in.bits
      head := head + UInt(1)
    }
    when (io.out.fire()) { tail := tail + UInt(1) }
    size := MuxCase(size, Seq(
      (io.in.fire() && io.out.fire()) -> (size - UInt(nBeats - 1)),
      io.in.fire() -> (size + UInt(1)),
      io.out.fire() -> (size - UInt(nBeats))))
    io.count := size >> UInt(log2Up(nBeats))
    io.out.valid := io.count > UInt(0)
    io.out.bits := rdata(tail)
    io.in.ready := size < UInt(n * nBeats)
  }
 }