diff --git a/junctions/.gitignore b/junctions/.gitignore new file mode 100644 index 00000000..c58d83b3 --- /dev/null +++ b/junctions/.gitignore @@ -0,0 +1,17 @@ +*.class +*.log + +# sbt specific +.cache +.history +.lib/ +dist/* +target/ +lib_managed/ +src_managed/ +project/boot/ +project/plugins/project/ + +# Scala-IDE specific +.scala_dependencies +.worksheet diff --git a/junctions/LICENSE b/junctions/LICENSE new file mode 100644 index 00000000..b226e9d5 --- /dev/null +++ b/junctions/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2015, The Regents of the University of California (Regents) +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of junctions nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING +OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS +BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED +HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE +MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + diff --git a/junctions/README.md b/junctions/README.md new file mode 100644 index 00000000..f88be666 --- /dev/null +++ b/junctions/README.md @@ -0,0 +1,6 @@ +# junctions +A repository for peripheral components and IO devices associated with the RocketChip project. + +To uses these modules, include this repo as a git submodule within the your chip repository and add it as Project in your chip's build.scala. These components are only dependent on Chisel, i.e. + + lazy val junctions = project.dependsOn(chisel) diff --git a/junctions/build.sbt b/junctions/build.sbt new file mode 100644 index 00000000..bae39ed9 --- /dev/null +++ b/junctions/build.sbt @@ -0,0 +1,19 @@ +organization := "edu.berkeley.cs" + +version := "1.0" + +name := "junctions" + +scalaVersion := "2.11.6" + +// Provide a managed dependency on chisel if -DchiselVersion="" is supplied on the command line. +libraryDependencies ++= (Seq("chisel","cde").map { + dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten + +site.settings + +site.includeScaladoc() + +ghpages.settings + +git.remoteRepo := "git@github.com:ucb-bar/junctions.git" diff --git a/junctions/project/plugins.sbt b/junctions/project/plugins.sbt new file mode 100644 index 00000000..4f4825c4 --- /dev/null +++ b/junctions/project/plugins.sbt @@ -0,0 +1,5 @@ +resolvers += "jgit-repo" at "http://download.eclipse.org/jgit/maven" + +addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.5.3") + +addSbtPlugin("com.typesafe.sbt" % "sbt-site" % "0.8.1") diff --git a/junctions/src/main/scala/addrmap.scala b/junctions/src/main/scala/addrmap.scala new file mode 100644 index 00000000..a223f63a --- /dev/null +++ b/junctions/src/main/scala/addrmap.scala @@ -0,0 +1,150 @@ +// See LICENSE for license details. + +package junctions + +import Chisel._ +import cde.{Parameters, Field} +import scala.collection.mutable.HashMap + +case object PAddrBits extends Field[Int] +case object VAddrBits extends Field[Int] +case object PgIdxBits extends Field[Int] +case object PgLevels extends Field[Int] +case object PgLevelBits extends Field[Int] +case object ASIdBits extends Field[Int] +case object PPNBits extends Field[Int] +case object VPNBits extends Field[Int] + +case object GlobalAddrMap extends Field[AddrMap] + +trait HasAddrMapParameters { + implicit val p: Parameters + + val paddrBits = p(PAddrBits) + val vaddrBits = p(VAddrBits) + val pgIdxBits = p(PgIdxBits) + val ppnBits = p(PPNBits) + val vpnBits = p(VPNBits) + val pgLevels = p(PgLevels) + val pgLevelBits = p(PgLevelBits) + val asIdBits = p(ASIdBits) + + val addrMap = p(GlobalAddrMap) +} + +case class MemAttr(prot: Int, cacheable: Boolean = false) + +sealed abstract class MemRegion { + def start: BigInt + def size: BigInt + def numSlaves: Int + def attr: MemAttr + + def containsAddress(x: UInt) = UInt(start) <= x && x < UInt(start + size) +} + +case class MemSize(size: BigInt, attr: MemAttr) extends MemRegion { + def start = 0 + def numSlaves = 1 +} + +case class MemRange(start: BigInt, size: BigInt, attr: MemAttr) extends MemRegion { + def numSlaves = 1 +} + +object AddrMapProt { + val R = 0x1 + val W = 0x2 + val X = 0x4 + val RW = R | W + val RX = R | X + val RWX = R | W | X + val SZ = 3 +} + +class AddrMapProt extends Bundle { + val x = Bool() + val w = Bool() + val r = Bool() +} + +case class AddrMapEntry(name: String, region: MemRegion) + +object AddrMap { + def apply(elems: AddrMapEntry*): AddrMap = new AddrMap(elems) +} + +class AddrMap(entriesIn: Seq[AddrMapEntry], val start: BigInt = BigInt(0)) extends MemRegion { + def isEmpty = entries.isEmpty + def length = entries.size + def numSlaves = entries.map(_.region.numSlaves).foldLeft(0)(_ + _) + def attr = ??? + + private val slavePorts = HashMap[String, Int]() + private val mapping = HashMap[String, MemRegion]() + + val (size: BigInt, entries: Seq[AddrMapEntry]) = { + var ind = 0 + var base = start + var rebasedEntries = collection.mutable.ArrayBuffer[AddrMapEntry]() + for (AddrMapEntry(name, r) <- entriesIn) { + if (r.start != 0) { + val align = BigInt(1) << log2Ceil(r.size) + require(r.start >= base, s"region $name base address 0x${r.start.toString(16)} overlaps previous base 0x${base.toString(16)}") + require(r.start % align == 0, s"region $name base address 0x${r.start.toString(16)} not aligned to 0x${align.toString(16)}") + base = r.start + } else { + base = (base + r.size - 1) / r.size * r.size + } + + r match { + case r: AddrMap => + val subMap = new AddrMap(r.entries, base) + rebasedEntries += AddrMapEntry(name, subMap) + mapping += name -> subMap + mapping ++= subMap.mapping.map { case (k, v) => s"$name:$k" -> v } + slavePorts ++= subMap.slavePorts.map { case (k, v) => s"$name:$k" -> (ind + v) } + case _ => + val e = MemRange(base, r.size, r.attr) + rebasedEntries += AddrMapEntry(name, e) + mapping += name -> e + slavePorts += name -> ind + } + + ind += r.numSlaves + base += r.size + } + (base - start, rebasedEntries) + } + + val flatten: Seq[(String, MemRange)] = { + val arr = new Array[(String, MemRange)](slavePorts.size) + for ((name, port) <- slavePorts) + arr(port) = (name, mapping(name).asInstanceOf[MemRange]) + arr + } + + def apply(name: String): MemRegion = mapping(name) + def port(name: String): Int = slavePorts(name) + def subMap(name: String): AddrMap = mapping(name).asInstanceOf[AddrMap] + def isInRegion(name: String, addr: UInt): Bool = mapping(name).containsAddress(addr) + + def isCacheable(addr: UInt): Bool = { + flatten.filter(_._2.attr.cacheable).map { case (_, region) => + region.containsAddress(addr) + }.foldLeft(Bool(false))(_ || _) + } + + def isValid(addr: UInt): Bool = { + flatten.map { case (_, region) => + region.containsAddress(addr) + }.foldLeft(Bool(false))(_ || _) + } + + def getProt(addr: UInt): AddrMapProt = { + val protForRegion = flatten.map { case (_, region) => + Mux(region.containsAddress(addr), UInt(region.attr.prot, AddrMapProt.SZ), UInt(0)) + } + new AddrMapProt().fromBits(protForRegion.reduce(_|_)) + } +} diff --git a/junctions/src/main/scala/atos.scala b/junctions/src/main/scala/atos.scala new file mode 100644 index 00000000..970a1f5c --- /dev/null +++ b/junctions/src/main/scala/atos.scala @@ -0,0 +1,333 @@ +package junctions + +import Chisel._ +import scala.math.max +import cde.{Parameters, Field} + +trait HasAtosParameters extends HasNastiParameters { + // round up to a multiple of 32 + def roundup(n: Int) = 32 * ((n - 1) / 32 + 1) + + val atosUnionBits = max( + nastiXIdBits + nastiXDataBits + nastiWStrobeBits + 1, + nastiXIdBits + nastiXBurstBits + + nastiXSizeBits + nastiXLenBits + nastiXAddrBits) + val atosIdBits = nastiXIdBits + val atosTypBits = 2 + val atosRespBits = nastiXRespBits + val atosDataBits = nastiXDataBits + + val atosAddrOffset = atosIdBits + val atosLenOffset = atosIdBits + nastiXAddrBits + val atosSizeOffset = atosLenOffset + nastiXLenBits + val atosBurstOffset = atosSizeOffset + nastiXSizeBits + + val atosDataOffset = atosIdBits + val atosStrobeOffset = nastiXDataBits + atosIdBits + val atosLastOffset = atosStrobeOffset + nastiWStrobeBits + + val atosRequestBits = roundup(atosTypBits + atosUnionBits) + val atosResponseBits = roundup(atosTypBits + atosIdBits + atosRespBits + atosDataBits + 1) + val atosRequestBytes = atosRequestBits / 8 + val atosResponseBytes = atosResponseBits / 8 + val atosRequestWords = atosRequestBytes / 4 + val atosResponseWords = atosResponseBytes / 4 +} + +abstract class AtosModule(implicit val p: Parameters) + extends Module with HasAtosParameters +abstract class AtosBundle(implicit val p: Parameters) + extends ParameterizedBundle()(p) with HasAtosParameters + +object AtosRequest { + def arType = UInt("b00") + def awType = UInt("b01") + def wType = UInt("b10") + + def apply(typ: UInt, union: UInt)(implicit p: Parameters): AtosRequest = { + val areq = Wire(new AtosRequest) + areq.typ := typ + areq.union := union + areq + } + + def apply(ar: NastiReadAddressChannel)(implicit p: Parameters): AtosRequest = + apply(arType, Cat(ar.burst, ar.size, ar.len, ar.addr, ar.id)) + + def apply(aw: NastiWriteAddressChannel)(implicit p: Parameters): AtosRequest = + apply(awType, Cat(aw.burst, aw.size, aw.len, aw.addr, aw.id)) + + def apply(w: NastiWriteDataChannel)(implicit p: Parameters): AtosRequest = + apply(wType, Cat(w.last, w.strb, w.data, w.id)) +} + +class AtosRequest(implicit p: Parameters) + extends AtosBundle()(p) with Serializable { + val typ = UInt(width = atosTypBits) + val union = UInt(width = atosUnionBits) + + def burst(dummy: Int = 0) = + union(atosUnionBits - 1, atosBurstOffset) + + def size(dummy: Int = 0) = + union(atosBurstOffset - 1, atosSizeOffset) + + def len(dummy: Int = 0) = + union(atosSizeOffset - 1, atosLenOffset) + + def addr(dummy: Int = 0) = + union(atosLenOffset - 1, atosAddrOffset) + + def id(dummy: Int = 0) = + union(atosIdBits - 1, 0) + + def data(dummy: Int = 0) = + union(atosStrobeOffset - 1, atosDataOffset) + + def strb(dummy: Int = 0) = + union(atosLastOffset - 1, atosStrobeOffset) + + def last(dummy: Int = 0) = + union(atosLastOffset) + + def has_addr(dummy: Int = 0) = + typ === AtosRequest.arType || typ === AtosRequest.awType + + def has_data(dummy: Int = 0) = + typ === AtosRequest.wType + + def is_last(dummy: Int = 0) = + typ === AtosRequest.arType || (typ === AtosRequest.wType && last()) + + def nbits: Int = atosRequestBits + + def resp_len(dummy: Int = 0) = + MuxLookup(typ, UInt(0), Seq( + AtosRequest.arType -> (len() + UInt(1)), + AtosRequest.awType -> UInt(1))) +} + +object AtosResponse { + def rType = UInt("b00") + def bType = UInt("b01") + + def apply(typ: UInt, id: UInt, resp: UInt, data: UInt, last: Bool) + (implicit p: Parameters): AtosResponse = { + val aresp = Wire(new AtosResponse) + aresp.typ := typ + aresp.id := id + aresp.resp := resp + aresp.data := data + aresp.last := last + aresp + } + + def apply(r: NastiReadDataChannel)(implicit p: Parameters): AtosResponse = + apply(rType, r.id, r.resp, r.data, r.last) + + def apply(b: NastiWriteResponseChannel)(implicit p: Parameters): AtosResponse = + apply(bType, b.id, b.resp, UInt(0), Bool(false)) +} + +class AtosResponse(implicit p: Parameters) + extends AtosBundle()(p) with Serializable { + val typ = UInt(width = atosTypBits) + val id = UInt(width = atosIdBits) + val resp = UInt(width = atosRespBits) + val last = Bool() + val data = UInt(width = atosDataBits) + + def has_data(dummy: Int = 0) = typ === AtosResponse.rType + + def is_last(dummy: Int = 0) = !has_data() || last + + def nbits: Int = atosResponseBits +} + +class AtosIO(implicit p: Parameters) extends AtosBundle()(p) { + val req = Decoupled(new AtosRequest) + val resp = Decoupled(new AtosResponse).flip +} + +class AtosRequestEncoder(implicit p: Parameters) extends AtosModule()(p) { + val io = new Bundle { + val ar = Decoupled(new NastiReadAddressChannel).flip + val aw = Decoupled(new NastiWriteAddressChannel).flip + val w = Decoupled(new NastiWriteDataChannel).flip + val req = Decoupled(new AtosRequest) + } + + val writing = Reg(init = Bool(false)) + + io.ar.ready := !writing && io.req.ready + io.aw.ready := !writing && !io.ar.valid && io.req.ready + io.w.ready := writing && io.req.ready + + io.req.valid := Mux(writing, io.w.valid, io.ar.valid || io.aw.valid) + io.req.bits := Mux(writing, AtosRequest(io.w.bits), + Mux(io.ar.valid, AtosRequest(io.ar.bits), AtosRequest(io.aw.bits))) + + when (io.aw.fire()) { writing := Bool(true) } + when (io.w.fire() && io.w.bits.last) { writing := Bool(false) } +} + +class AtosResponseDecoder(implicit p: Parameters) extends AtosModule()(p) { + val io = new Bundle { + val resp = Decoupled(new AtosResponse).flip + val b = Decoupled(new NastiWriteResponseChannel) + val r = Decoupled(new NastiReadDataChannel) + } + + val is_b = io.resp.bits.typ === AtosResponse.bType + val is_r = io.resp.bits.typ === AtosResponse.rType + + io.b.valid := io.resp.valid && is_b + io.b.bits := NastiWriteResponseChannel( + id = io.resp.bits.id, + resp = io.resp.bits.resp) + + io.r.valid := io.resp.valid && is_r + io.r.bits := NastiReadDataChannel( + id = io.resp.bits.id, + data = io.resp.bits.data, + last = io.resp.bits.last, + resp = io.resp.bits.resp) + + io.resp.ready := (is_b && io.b.ready) || (is_r && io.r.ready) +} + +class AtosClientConverter(implicit p: Parameters) extends AtosModule()(p) { + val io = new Bundle { + val nasti = (new NastiIO).flip + val atos = new AtosIO + } + + val req_enc = Module(new AtosRequestEncoder) + req_enc.io.ar <> io.nasti.ar + req_enc.io.aw <> io.nasti.aw + req_enc.io.w <> io.nasti.w + io.atos.req <> req_enc.io.req + + val resp_dec = Module(new AtosResponseDecoder) + resp_dec.io.resp <> io.atos.resp + io.nasti.b <> resp_dec.io.b + io.nasti.r <> resp_dec.io.r +} + +class AtosRequestDecoder(implicit p: Parameters) extends AtosModule()(p) { + val io = new Bundle { + val req = Decoupled(new AtosRequest).flip + val ar = Decoupled(new NastiReadAddressChannel) + val aw = Decoupled(new NastiWriteAddressChannel) + val w = Decoupled(new NastiWriteDataChannel) + } + + val is_ar = io.req.bits.typ === AtosRequest.arType + val is_aw = io.req.bits.typ === AtosRequest.awType + val is_w = io.req.bits.typ === AtosRequest.wType + + io.ar.valid := io.req.valid && is_ar + io.ar.bits := NastiReadAddressChannel( + id = io.req.bits.id(), + addr = io.req.bits.addr(), + size = io.req.bits.size(), + len = io.req.bits.len(), + burst = io.req.bits.burst()) + + io.aw.valid := io.req.valid && is_aw + io.aw.bits := NastiWriteAddressChannel( + id = io.req.bits.id(), + addr = io.req.bits.addr(), + size = io.req.bits.size(), + len = io.req.bits.len(), + burst = io.req.bits.burst()) + + io.w.valid := io.req.valid && is_w + io.w.bits := NastiWriteDataChannel( + id = io.req.bits.id(), + data = io.req.bits.data(), + strb = Some(io.req.bits.strb()), + last = io.req.bits.last()) + + io.req.ready := (io.ar.ready && is_ar) || + (io.aw.ready && is_aw) || + (io.w.ready && is_w) +} + +class AtosResponseEncoder(implicit p: Parameters) extends AtosModule()(p) { + val io = new Bundle { + val b = Decoupled(new NastiWriteResponseChannel).flip + val r = Decoupled(new NastiReadDataChannel).flip + val resp = Decoupled(new AtosResponse) + } + + val locked = Reg(init = Bool(false)) + + io.resp.valid := (io.b.valid && !locked) || io.r.valid + io.resp.bits := Mux(io.r.valid, + AtosResponse(io.r.bits), AtosResponse(io.b.bits)) + + io.b.ready := !locked && !io.r.valid && io.resp.ready + io.r.ready := io.resp.ready + + when (io.r.fire() && !io.r.bits.last) { locked := Bool(true) } + when (io.r.fire() && io.r.bits.last) { locked := Bool(false) } +} + +class AtosManagerConverter(implicit p: Parameters) extends AtosModule()(p) { + val io = new Bundle { + val atos = (new AtosIO).flip + val nasti = new NastiIO + } + + val req_dec = Module(new AtosRequestDecoder) + val resp_enc = Module(new AtosResponseEncoder) + + req_dec.io.req <> io.atos.req + io.atos.resp <> resp_enc.io.resp + + io.nasti.ar <> req_dec.io.ar + io.nasti.aw <> req_dec.io.aw + io.nasti.w <> req_dec.io.w + + resp_enc.io.b <> io.nasti.b + resp_enc.io.r <> io.nasti.r +} + +class AtosSerializedIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) { + val req = Decoupled(Bits(width = w)) + val resp = Decoupled(Bits(width = w)).flip + val clk = Bool(OUTPUT) + val clk_edge = Bool(OUTPUT) + override def cloneType = new AtosSerializedIO(w)(p).asInstanceOf[this.type] +} + +class AtosSerdes(w: Int)(implicit p: Parameters) extends AtosModule()(p) { + val io = new Bundle { + val wide = (new AtosIO).flip + val narrow = new AtosSerializedIO(w) + } + + val ser = Module(new Serializer(w, new AtosRequest)) + ser.io.in <> io.wide.req + io.narrow.req <> ser.io.out + + val des = Module(new Deserializer(w, new AtosResponse)) + des.io.in <> io.narrow.resp + io.wide.resp <> des.io.out +} + +class AtosDesser(w: Int)(implicit p: Parameters) extends AtosModule()(p) { + val io = new Bundle { + val narrow = new AtosSerializedIO(w).flip + val wide = new AtosIO + } + + val des = Module(new Deserializer(w, new AtosRequest)) + des.io.in <> io.narrow.req + io.wide.req <> des.io.out + + val ser = Module(new Serializer(w, new AtosResponse)) + ser.io.in <> io.wide.resp + io.narrow.resp <> ser.io.out +} diff --git a/junctions/src/main/scala/crossing.scala b/junctions/src/main/scala/crossing.scala new file mode 100644 index 00000000..0f71362e --- /dev/null +++ b/junctions/src/main/scala/crossing.scala @@ -0,0 +1,150 @@ +package junctions +import Chisel._ + +class Crossing[T <: Data](gen: T, enq_sync: Boolean, deq_sync: Boolean) extends Bundle { + val enq = Decoupled(gen).flip() + val deq = Decoupled(gen) + val enq_clock = if (enq_sync) Some(Clock(INPUT)) else None + val deq_clock = if (deq_sync) Some(Clock(INPUT)) else None + val enq_reset = if (enq_sync) Some(Bool(INPUT)) else None + val deq_reset = if (deq_sync) Some(Bool(INPUT)) else None +} + +// Output is 1 for one cycle after any edge of 'in' +object AsyncHandshakePulse { + def apply(in: Bool, sync: Int): Bool = { + val syncv = RegInit(Vec.fill(sync+1){Bool(false)}) + syncv.last := in + (syncv.init zip syncv.tail).foreach { case (sink, source) => sink := source } + syncv(0) =/= syncv(1) + } +} + +class AsyncHandshakeSource[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool) + extends Module(_clock = clock, _reset = reset) { + val io = new Bundle { + // These come from the source clock domain + val enq = Decoupled(gen).flip() + // These cross to the sink clock domain + val bits = gen.cloneType.asOutput + val push = Bool(OUTPUT) + val pop = Bool(INPUT) + } + + val ready = RegInit(Bool(true)) + val bits = Reg(gen) + val push = RegInit(Bool(false)) + + io.enq.ready := ready + io.bits := bits + io.push := push + + val pop = AsyncHandshakePulse(io.pop, sync) + assert (!pop || !ready) + + when (pop) { + ready := Bool(true) + } + + when (io.enq.fire()) { + ready := Bool(false) + bits := io.enq.bits + push := !push + } +} + +class AsyncHandshakeSink[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool) + extends Module(_clock = clock, _reset = reset) { + val io = new Bundle { + // These cross to the source clock domain + val bits = gen.cloneType.asInput + val push = Bool(INPUT) + val pop = Bool(OUTPUT) + // These go to the sink clock domain + val deq = Decoupled(gen) + } + + val valid = RegInit(Bool(false)) + val bits = Reg(gen) + val pop = RegInit(Bool(false)) + + io.deq.valid := valid + io.deq.bits := bits + io.pop := pop + + val push = AsyncHandshakePulse(io.push, sync) + assert (!push || !valid) + + when (push) { + valid := Bool(true) + bits := io.bits + } + + when (io.deq.fire()) { + valid := Bool(false) + pop := !pop + } +} + +class AsyncHandshake[T <: Data](gen: T, sync: Int = 2) extends Module { + val io = new Crossing(gen, true, true) + require (sync >= 2) + + val source = Module(new AsyncHandshakeSource(gen, sync, io.enq_clock.get, io.enq_reset.get)) + val sink = Module(new AsyncHandshakeSink (gen, sync, io.deq_clock.get, io.deq_reset.get)) + + source.io.enq <> io.enq + io.deq <> sink.io.deq + + sink.io.bits := source.io.bits + sink.io.push := source.io.push + source.io.pop := sink.io.pop +} + +class AsyncDecoupledTo[T <: Data](gen: T, depth: Int = 0, sync: Int = 2) extends Module { + val io = new Crossing(gen, false, true) + + // !!! if depth == 0 { use Handshake } else { use AsyncFIFO } + val crossing = Module(new AsyncHandshake(gen, sync)).io + crossing.enq_clock.get := clock + crossing.enq_reset.get := reset + crossing.enq <> io.enq + crossing.deq_clock.get := io.deq_clock.get + crossing.deq_reset.get := io.deq_reset.get + io.deq <> crossing.deq +} + +object AsyncDecoupledTo { + // source is in our clock domain, output is in the 'to' clock domain + def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: DecoupledIO[T], depth: Int = 0, sync: Int = 2): DecoupledIO[T] = { + val to = Module(new AsyncDecoupledTo(source.bits, depth, sync)) + to.io.deq_clock.get := to_clock + to.io.deq_reset.get := to_reset + to.io.enq <> source + to.io.deq + } +} + +class AsyncDecoupledFrom[T <: Data](gen: T, depth: Int = 0, sync: Int = 2) extends Module { + val io = new Crossing(gen, true, false) + + // !!! if depth == 0 { use Handshake } else { use AsyncFIFO } + val crossing = Module(new AsyncHandshake(gen, sync)).io + crossing.enq_clock.get := io.enq_clock.get + crossing.enq_reset.get := io.enq_reset.get + crossing.enq <> io.enq + crossing.deq_clock.get := clock + crossing.deq_reset.get := reset + io.deq <> crossing.deq +} + +object AsyncDecoupledFrom { + // source is in the 'from' clock domain, output is in our clock domain + def apply[T <: Data](from_clock: Clock, from_reset: Bool, source: DecoupledIO[T], depth: Int = 0, sync: Int = 2): DecoupledIO[T] = { + val from = Module(new AsyncDecoupledFrom(source.bits, depth, sync)) + from.io.enq_clock.get := from_clock + from.io.enq_reset.get := from_reset + from.io.enq <> source + from.io.deq + } +} diff --git a/junctions/src/main/scala/hasti.scala b/junctions/src/main/scala/hasti.scala new file mode 100644 index 00000000..abb3ee81 --- /dev/null +++ b/junctions/src/main/scala/hasti.scala @@ -0,0 +1,549 @@ +package junctions + +import Chisel._ +import cde.{Parameters, Field} + +object HastiConstants +{ + // Values for htrans + val SZ_HTRANS = 2 + val HTRANS_IDLE = UInt(0, SZ_HTRANS) // No transfer requested, not in a burst + val HTRANS_BUSY = UInt(1, SZ_HTRANS) // No transfer requested, in a burst + val HTRANS_NONSEQ = UInt(2, SZ_HTRANS) // First (potentially only) request in a burst + val HTRANS_SEQ = UInt(3, SZ_HTRANS) // Following requests in a burst + + // Values for hburst + val SZ_HBURST = 3 + val HBURST_SINGLE = UInt(0, SZ_HBURST) // Single access (no burst) + val HBURST_INCR = UInt(1, SZ_HBURST) // Incrementing burst of arbitrary length, not crossing 1KB + val HBURST_WRAP4 = UInt(2, SZ_HBURST) // 4-beat wrapping burst + val HBURST_INCR4 = UInt(3, SZ_HBURST) // 4-beat incrementing burst + val HBURST_WRAP8 = UInt(4, SZ_HBURST) // 8-beat wrapping burst + val HBURST_INCR8 = UInt(5, SZ_HBURST) // 8-beat incrementing burst + val HBURST_WRAP16 = UInt(6, SZ_HBURST) // 16-beat wrapping burst + val HBURST_INCR16 = UInt(7, SZ_HBURST) // 16-beat incrementing burst + + // Values for hresp + val SZ_HRESP = 1 + val HRESP_OKAY = UInt(0, SZ_HRESP) + val HRESP_ERROR = UInt(1, SZ_HRESP) + + // Values for hsize are identical to TileLink MT_SZ + // ie: 8*2^SZ_HSIZE bit transfers + val SZ_HSIZE = 3 + + // Values for hprot (a bitmask) + val SZ_HPROT = 4 + def HPROT_DATA = UInt("b0001") // Data access or Opcode fetch + def HPROT_PRIVILEGED = UInt("b0010") // Privileged or User access + def HPROT_BUFFERABLE = UInt("b0100") // Bufferable or non-bufferable + def HPROT_CACHEABLE = UInt("b1000") // Cacheable or non-cacheable + + def dgate(valid: Bool, b: UInt) = Fill(b.getWidth, valid) & b +} + +import HastiConstants._ + +case class HastiParameters(dataBits: Int, addrBits: Int) +case object HastiId extends Field[String] +case class HastiKey(id: String) extends Field[HastiParameters] + +trait HasHastiParameters { + implicit val p: Parameters + val hastiParams = p(HastiKey(p(HastiId))) + val hastiAddrBits = hastiParams.addrBits + val hastiDataBits = hastiParams.dataBits + val hastiDataBytes = hastiDataBits/8 + val hastiAlignment = log2Ceil(hastiDataBytes) +} + +abstract class HastiModule(implicit val p: Parameters) extends Module + with HasHastiParameters +abstract class HastiBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) + with HasHastiParameters + +class HastiMasterIO(implicit p: Parameters) extends HastiBundle()(p) { + val htrans = UInt(OUTPUT, SZ_HTRANS) + val hmastlock = Bool(OUTPUT) + val haddr = UInt(OUTPUT, hastiAddrBits) + val hwrite = Bool(OUTPUT) + val hburst = UInt(OUTPUT, SZ_HBURST) + val hsize = UInt(OUTPUT, SZ_HSIZE) + val hprot = UInt(OUTPUT, SZ_HPROT) + + val hwdata = Bits(OUTPUT, hastiDataBits) + val hrdata = Bits(INPUT, hastiDataBits) + + val hready = Bool(INPUT) + val hresp = UInt(INPUT, SZ_HRESP) + + def isNSeq(dummy:Int=0) = htrans === HTRANS_NONSEQ // SEQ does not start a NEW request + def isHold(dummy:Int=0) = htrans === HTRANS_BUSY || htrans === HTRANS_SEQ + def isIdle(dummy:Int=0) = htrans === HTRANS_IDLE || htrans === HTRANS_BUSY +} + +class HastiSlaveIO(implicit p: Parameters) extends HastiBundle()(p) { + val htrans = UInt(INPUT, SZ_HTRANS) + val hmastlock = Bool(INPUT) + val haddr = UInt(INPUT, hastiAddrBits) + val hwrite = Bool(INPUT) + val hburst = UInt(INPUT, SZ_HBURST) + val hsize = UInt(INPUT, SZ_HSIZE) + val hprot = UInt(INPUT, SZ_HPROT) + + val hwdata = Bits(INPUT, hastiDataBits) + val hrdata = Bits(OUTPUT, hastiDataBits) + + val hsel = Bool(INPUT) + val hready = Bool(OUTPUT) + val hresp = UInt(OUTPUT, SZ_HRESP) +} + +/* A diverted master is told hready when his address phase goes nowhere. + * In this case, we buffer his address phase request and replay it later. + * NOTE: this must optimize to nothing when divert is constantly false. + */ +class MasterDiversion(implicit p: Parameters) extends HastiModule()(p) { + val io = new Bundle { + val in = (new HastiMasterIO).flip + val out = (new HastiMasterIO) + val divert = Bool(INPUT) + } + + val full = Reg(init = Bool(false)) + val buffer = Reg(new HastiMasterIO) + + when (io.out.hready) { + full := Bool(false) + } + when (io.divert) { + full := Bool(true) + buffer := io.in + } + + // If the master is diverted, he must also have been told hready + assert (!io.divert || io.in.hready, + "Diverted but not ready"); + + // Replay the request we diverted + io.out.htrans := Mux(full, buffer.htrans, io.in.htrans) + io.out.hmastlock := Mux(full, buffer.hmastlock, io.in.hmastlock) + io.out.haddr := Mux(full, buffer.haddr, io.in.haddr) + io.out.hwrite := Mux(full, buffer.hwrite, io.in.hwrite) + io.out.hburst := Mux(full, buffer.hburst, io.in.hburst) + io.out.hsize := Mux(full, buffer.hsize, io.in.hsize) + io.out.hprot := Mux(full, buffer.hprot, io.in.hprot) + io.out.hwdata := Mux(full, buffer.hwdata, io.in.hwdata) + + // Pass slave response back + io.in.hrdata := io.out.hrdata + io.in.hresp := io.out.hresp + io.in.hready := io.out.hready && !full // Block master while we steal his address phase +} + +/* Masters with lower index have priority over higher index masters. + * However, a lower priority master will retain control of a slave when EITHER: + * 1. a burst is in progress (switching slaves mid-burst violates AHB-lite at slave) + * 2. a transfer was waited (the standard forbids changing requests in this case) + * + * If a master raises hmastlock, it will be waited until no other master has inflight + * requests; then, it acquires exclusive control of the crossbar until hmastlock is low. + * + * To implement an AHB-lite crossbar, it is important to realize that requests and + * responses are coupled. Unlike modern bus protocols where the response data has flow + * control independent of the request data, in AHB-lite, both flow at the same time at + * the sole discretion of the slave via the hready signal. The address and data are + * delivered on two back-to-back cycles, the so-called address and data phases. + * + * Masters can only be connected to a single slave at a time. If a master had two different + * slave connections on the address and data phases, there would be two independent hready + * signals. An AHB-lite slave can assume that data flows when it asserts hready. If the data + * slave deasserts hready while the address slave asserts hready, the master is put in the + * impossible position of being in data phase on two slaves at once. For this reason, when + * a master issues back-to-back accesses to distinct slaves, we inject a pipeline bubble + * between the two requests to limit the master to just a single slave at a time. + * + * Conversely, a slave CAN have two masters attached to it. This is unproblematic, because + * the only signal which governs data flow is hready. Thus, both masters can be stalled + * safely by the single slave. + */ +class HastiXbar(nMasters: Int, addressMap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p) { + val io = new Bundle { + val masters = Vec(nMasters, new HastiMasterIO).flip + val slaves = Vec(addressMap.size, new HastiSlaveIO).flip + } + + val nSlaves = addressMap.size + + // Setup diversions infront of each master + val diversions = Seq.tabulate(nMasters) { m => Module(new MasterDiversion) } + (io.masters zip diversions) foreach { case (m, d) => d.io.in <> m } + + // Handy short-hand + val masters = diversions map (_.io.out) + val slaves = io.slaves + + // Lock status of the crossbar + val lockedM = Reg(init = Vec.fill(nMasters)(Bool(false))) + val isLocked = lockedM.reduce(_ || _) + + // This matrix governs the master-slave connections in the address phase + // It is indexed by addressPhaseGrantSM(slave)(master) + // It is guaranteed to have at most one 'true' per column and per row + val addressPhaseGrantSM = Wire(Vec(nSlaves, Vec(nMasters, Bool()))) + // This matrix governs the master-slave connections in the data phase + // It is guaranteed to have at most one 'true' per column and per row + val dataPhaseGrantSM = Reg (init = Vec.fill(nSlaves)(Vec.fill(nMasters)(Bool(false)))) + // This matrix is the union of the address and data phases. + // It is transposed with respect to the two previous matrices. + // It is guaranteed to contain at most one 'true' per master row. + // However, two 'true's per slave column are permitted. + val unionGrantMS = Vec.tabulate(nMasters) { m => Vec.tabulate(nSlaves) { s => + addressPhaseGrantSM(s)(m) || dataPhaseGrantSM(s)(m) } } + + // Confirm the guarantees made above + def justOnce(v: Vec[Bool]) = v.fold(Bool(false)) { case (p, v) => + assert (!p || !v) + p || v + } + addressPhaseGrantSM foreach { s => justOnce(s) } + unionGrantMS foreach { s => justOnce(s) } + + // Data phase follows address phase whenever the slave is ready + (slaves zip (dataPhaseGrantSM zip addressPhaseGrantSM)) foreach { case (s, (d, a)) => + when (s.hready) { d := a } + } + + // Record the grant state from the previous cycle; needed in case we hold access + val priorAddressPhaseGrantSM = RegNext(addressPhaseGrantSM) + + // If a master says BUSY or SEQ, it is in the middle of a burst. + // In this case, it MUST stay attached to the same slave as before. + // Otherwise, it would violate the AHB-lite specification as seen by + // the slave, which is guaranteed a complete burst of the promised length. + // One case where this matters is preventing preemption of low-prio masters. + // NOTE: this exposes a slave to bad addresses when a master is buggy + val holdBurstM = Vec(masters map { _.isHold() }) + + // Transform the burst hold requirement from master indexing to slave indexing + // We use the previous cycle's binding because the master continues the prior burst + val holdBurstS = Vec(priorAddressPhaseGrantSM map { m => Mux1H(m, holdBurstM) }) + + // If a slave says !hready to a request, it must retain the same master next cycle. + // The AHB-lite specification requires that a waited transfer remain unchanged. + // If we preempted a waited master, the new master's request could potentially differ. + val holdBusyS = RegNext(Vec(slaves map { s => !s.hready && s.hsel })) + + // Combine the above two grounds to determine if the slave retains its prior master + val holdS = Vec((holdBurstS zip holdBusyS) map ({ case (a,b) => a||b })) + + // Determine which master addresses match which slaves + val matchMS = Vec(masters map { m => Vec(addressMap map { afn => afn(m.haddr) }) }) + // Detect requests to nowhere; we need to allow progress in this case + val nowhereM = Vec(matchMS map { s => !s.reduce(_ || _) }) + + // Detect if we need to inject a pipeline bubble between the master requests. + // Divert masters already granted a data phase different from next request. + // NOTE: if only one slave, matchMS is always true => bubble always false + // => the diversion registers are optimized away as they are unread + // NOTE: bubble => dataPhase => have an hready signal + val bubbleM = + Vec.tabulate(nMasters) { m => + Vec.tabulate(nSlaves) { s => dataPhaseGrantSM(s)(m) && !matchMS(m)(s) } + .reduce(_ || _) } + + // Block any request that requires bus ownership or conflicts with isLocked + val blockedM = + Vec((lockedM zip masters) map { case(l, m) => !l && (isLocked || m.hmastlock) }) + + // Requested access to slaves from masters (pre-arbitration) + // NOTE: isNSeq does NOT include SEQ; thus, masters who are midburst do not + // request access to a new slave. They stay tied to the old and do not get two. + // NOTE: if a master was waited, it must repeat the same request as last cycle; + // thus, it will request the same slave and not end up with two (unless buggy). + val NSeq = masters.map(_.isNSeq()) + val requestSM = Vec.tabulate(nSlaves) { s => Vec.tabulate(nMasters) { m => + matchMS(m)(s) && NSeq(m) && !bubbleM(m) && !blockedM(m) } } + + // Select at most one master request per slave (lowest index = highest priority) + val selectedRequestSM = Vec(requestSM map { m => Vec(PriorityEncoderOH(m)) }) + + // Calculate new crossbar interconnect state + addressPhaseGrantSM := Vec((holdS zip (priorAddressPhaseGrantSM zip selectedRequestSM)) + map { case (h, (p, r)) => Mux(h, p, r) }) + + for (m <- 0 until nMasters) { + // If the master is connected to a slave, the slave determines hready. + // However, if no slave is connected, for progress report ready anyway, if: + // bad address (swallow request) OR idle (permit stupid masters to move FSM) + val autoready = nowhereM(m) || masters(m).isIdle() + val hready = Mux1H(unionGrantMS(m), slaves.map(_.hready ^ autoready)) ^ autoready + masters(m).hready := hready + // If we diverted a master, we need to absorb his address phase to replay later + diversions(m).io.divert := (bubbleM(m) || blockedM(m)) && NSeq(m) && hready + } + + // Master muxes (address and data phase are the same) + (masters zip unionGrantMS) foreach { case (m, g) => { + m.hrdata := Mux1H(g, slaves.map(_.hrdata)) + m.hresp := Mux1H(g, slaves.map(_.hresp)) + } } + + // Slave address phase muxes + (slaves zip addressPhaseGrantSM) foreach { case (s, g) => { + s.htrans := Mux1H(g, masters.map(_.htrans)) + s.haddr := Mux1H(g, masters.map(_.haddr)) + s.hmastlock := isLocked + s.hwrite := Mux1H(g, masters.map(_.hwrite)) + s.hsize := Mux1H(g, masters.map(_.hsize)) + s.hburst := Mux1H(g, masters.map(_.hburst)) + s.hprot := Mux1H(g, masters.map(_.hprot)) + s.hsel := g.reduce(_ || _) + } } + + // Slave data phase muxes + (slaves zip dataPhaseGrantSM) foreach { case (s, g) => { + s.hwdata := Mux1H(g, masters.map(_.hwdata)) + } } + + // When no master-slave connections are active, a master can take-over the bus + val canLock = !addressPhaseGrantSM.map({ v => v.reduce(_ || _) }).reduce(_ || _) + + // Lowest index highest priority for lock arbitration + val reqLock = masters.map(_.hmastlock) + val winLock = PriorityEncoderOH(reqLock) + + // Lock arbitration + when (isLocked) { + lockedM := (lockedM zip reqLock) map { case (a,b) => a && b } + } .elsewhen (canLock) { + lockedM := winLock + } +} + +class HastiBus(amap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p) { + val io = new Bundle { + val master = new HastiMasterIO().flip + val slaves = Vec(amap.size, new HastiSlaveIO).flip + } + + val bar = Module(new HastiXbar(1, amap)) + bar.io.masters(0) <> io.master + bar.io.slaves <> io.slaves +} + +class HastiSlaveMux(n: Int)(implicit p: Parameters) extends HastiModule()(p) { + val io = new Bundle { + val ins = Vec(n, new HastiSlaveIO) + val out = new HastiSlaveIO().flip + } + + val amap = Seq({ (_:UInt) => Bool(true)}) + val bar = Module(new HastiXbar(n, amap)) + io.ins <> bar.io.masters + io.out <> bar.io.slaves(0) +} + +class HastiSlaveToMaster(implicit p: Parameters) extends HastiModule()(p) { + val io = new Bundle { + val in = new HastiSlaveIO + val out = new HastiMasterIO + } + + io.out.htrans := Mux(io.in.hsel, io.in.htrans, HTRANS_IDLE) + io.out.hmastlock := io.in.hmastlock + io.out.haddr := io.in.haddr + io.out.hwrite := io.in.hwrite + io.out.hburst := io.in.hburst + io.out.hsize := io.in.hsize + io.out.hprot := io.in.hprot + io.out.hwdata := io.in.hwdata + io.in.hrdata := io.out.hrdata + io.in.hready := io.out.hready + io.in.hresp := io.out.hresp +} + +class HastiMasterIONastiIOConverter(implicit p: Parameters) extends HastiModule()(p) + with HasNastiParameters { + val io = new Bundle { + val nasti = new NastiIO().flip + val hasti = new HastiMasterIO + } + + require(hastiAddrBits == nastiXAddrBits) + require(hastiDataBits == nastiXDataBits) + + val r_queue = Module(new Queue(new NastiReadDataChannel, 2, pipe = true)) + + val s_idle :: s_read :: s_write :: s_write_resp :: Nil = Enum(Bits(), 4) + val state = Reg(init = s_idle) + + val addr = Reg(UInt(width = hastiAddrBits)) + val id = Reg(UInt(width = nastiXIdBits)) + val size = Reg(UInt(width = nastiXSizeBits)) + val len = Reg(UInt(width = nastiXLenBits)) + val data = Reg(UInt(width = nastiXDataBits)) + val first = Reg(init = Bool(false)) + val is_rtrans = (state === s_read) && + (io.hasti.htrans === HTRANS_SEQ || + io.hasti.htrans === HTRANS_NONSEQ) + val rvalid = RegEnable(is_rtrans, Bool(false), io.hasti.hready) + + io.nasti.aw.ready := (state === s_idle) + io.nasti.ar.ready := (state === s_idle) && !io.nasti.aw.valid + io.nasti.w.ready := (state === s_write) && io.hasti.hready + io.nasti.b.valid := (state === s_write_resp) + io.nasti.b.bits := NastiWriteResponseChannel(id = id) + io.nasti.r <> r_queue.io.deq + + r_queue.io.enq.valid := io.hasti.hready && rvalid + r_queue.io.enq.bits := NastiReadDataChannel( + id = id, + data = io.hasti.hrdata, + last = (len === UInt(0))) + + assert(!r_queue.io.enq.valid || r_queue.io.enq.ready, + "NASTI -> HASTI converter queue overflow") + + // How many read requests have we not delivered a response for yet? + val pending_count = r_queue.io.count + rvalid + + io.hasti.haddr := addr + io.hasti.hsize := size + io.hasti.hwrite := (state === s_write) + io.hasti.hburst := HBURST_INCR + io.hasti.hprot := UInt(0) + io.hasti.hwdata := data + io.hasti.hmastlock := Bool(false) + io.hasti.htrans := MuxLookup(state, HTRANS_IDLE, Seq( + s_write -> Mux(io.nasti.w.valid, + Mux(first, HTRANS_NONSEQ, HTRANS_SEQ), + Mux(first, HTRANS_IDLE, HTRANS_BUSY)), + s_read -> MuxCase(HTRANS_BUSY, Seq( + first -> HTRANS_NONSEQ, + (pending_count <= UInt(1)) -> HTRANS_SEQ)))) + + when (io.nasti.aw.fire()) { + first := Bool(true) + addr := io.nasti.aw.bits.addr + id := io.nasti.aw.bits.id + size := io.nasti.aw.bits.size + state := s_write + } + + when (io.nasti.ar.fire()) { + first := Bool(true) + addr := io.nasti.ar.bits.addr + id := io.nasti.ar.bits.id + size := io.nasti.ar.bits.size + len := io.nasti.ar.bits.len + state := s_read + } + + when (io.nasti.w.fire()) { + first := Bool(false) + addr := addr + (UInt(1) << size) + data := io.nasti.w.bits.data + when (io.nasti.w.bits.last) { state := s_write_resp } + } + + when (io.nasti.b.fire()) { state := s_idle } + + when (is_rtrans && io.hasti.hready) { + first := Bool(false) + addr := addr + (UInt(1) << size) + len := len - UInt(1) + when (len === UInt(0)) { state := s_idle } + } +} + +class HastiTestSRAM(depth: Int)(implicit p: Parameters) extends HastiModule()(p) { + val io = new HastiSlaveIO + + // This is a test SRAM with random delays + val ready = LFSR16(Bool(true))(0) // Bool(true) + + // Calculate the bitmask of which bytes are being accessed + val mask_decode = Vec.tabulate(hastiAlignment+1) (UInt(_) <= io.hsize) + val mask_wide = Vec.tabulate(hastiDataBytes) { i => mask_decode(log2Up(i+1)) } + val mask_shift = if (hastiAlignment == 0) UInt(1) else + mask_wide.toBits().asUInt() << io.haddr(hastiAlignment-1,0) + + // The request had better have been aligned! (AHB-lite requires this) + if (hastiAlignment >= 1) { + assert (io.htrans === HTRANS_IDLE || io.htrans === HTRANS_BUSY || + (io.haddr & mask_decode.toBits()(hastiAlignment,1).asUInt) === UInt(0), + "HASTI request not aligned") + } + + // The mask and address during the address phase + val a_request = io.hsel && (io.htrans === HTRANS_NONSEQ || io.htrans === HTRANS_SEQ) + val a_mask = Wire(UInt(width = hastiDataBytes)) + val a_address = io.haddr(depth-1, hastiAlignment) + val a_write = io.hwrite + + // for backwards compatibility with chisel2, we needed a static width in definition + a_mask := mask_shift(hastiDataBytes-1, 0) + + // The data phase signals + val d_read = RegEnable(a_request && !a_write, Bool(false), ready) + val d_mask = RegEnable(a_mask, ready && a_request) + val d_wdata = Vec.tabulate(hastiDataBytes) { i => io.hwdata(8*(i+1)-1, 8*i) } + + // AHB writes must occur during the data phase; this poses a structural + // hazard with reads which must occur during the address phase. To solve + // this problem, we delay the writes until there is a free cycle. + // + // The idea is to record the address information from address phase and + // then as soon as possible flush the pending write. This cannot be done + // on a cycle when there is an address phase read, but on any other cycle + // the write will execute. In the case of reads following a write, the + // result must bypass data from the pending write into the read if they + // happen to have matching address. + + // Remove this once HoldUnless is in chisel3 + def holdUnless[T <: Data](in : T, enable: Bool): T = Mux(!enable, RegEnable(in, enable), in) + + // Pending write? + val p_valid = RegInit(Bool(false)) + val p_address = Reg(a_address) + val p_mask = Reg(a_mask) + val p_latch_d = RegNext(ready && a_request && a_write, Bool(false)) + val p_wdata = holdUnless(d_wdata, p_latch_d) + + // Use single-ported memory with byte-write enable + val mem = SeqMem(1 << (depth-hastiAlignment), Vec(hastiDataBytes, Bits(width = 8))) + + // Decide is the SRAM port is used for reading or (potentially) writing + val read = ready && a_request && !a_write + // In case we are stalled, we need to hold the read data + val d_rdata = holdUnless(mem.read(a_address, read), RegNext(read)) + // Whenever the port is not needed for reading, execute pending writes + when (!read) { + when (p_valid) { mem.write(p_address, p_wdata, p_mask.toBools) } + p_valid := Bool(false) + } + + // Record the request for later? + when (ready && a_request && a_write) { + p_valid := Bool(true) + p_address := a_address + p_mask := a_mask + } + + // Does the read need to be muxed with the previous write? + val a_bypass = a_address === p_address && p_valid + val d_bypass = RegEnable(a_bypass, ready && a_request) + + // Mux in data from the pending write + val muxdata = Vec((p_mask.toBools zip (p_wdata zip d_rdata)) + map { case (m, (p, r)) => Mux(d_bypass && m, p, r) }) + // Wipe out any data the master should not see (for testing) + val outdata = Vec((d_mask.toBools zip muxdata) + map { case (m, p) => Mux(d_read && ready && m, p, Bits(0)) }) + + // Finally, the outputs + io.hrdata := outdata.toBits() + io.hready := ready + io.hresp := HRESP_OKAY +} diff --git a/junctions/src/main/scala/memserdes.scala b/junctions/src/main/scala/memserdes.scala new file mode 100644 index 00000000..b66e5150 --- /dev/null +++ b/junctions/src/main/scala/memserdes.scala @@ -0,0 +1,317 @@ +// See LICENSE for license details. + +package junctions +import Chisel._ +import scala.math._ +import cde.{Parameters, Field} + +case object MIFAddrBits extends Field[Int] +case object MIFDataBits extends Field[Int] +case object MIFTagBits extends Field[Int] +case object MIFDataBeats extends Field[Int] + +trait HasMIFParameters { + implicit val p: Parameters + val mifTagBits = p(MIFTagBits) + val mifAddrBits = p(MIFAddrBits) + val mifDataBits = p(MIFDataBits) + val mifDataBeats = p(MIFDataBeats) +} + +abstract class MIFModule(implicit val p: Parameters) extends Module with HasMIFParameters +abstract class MIFBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) + with HasMIFParameters + +trait HasMemData extends HasMIFParameters { + val data = Bits(width = mifDataBits) +} + +trait HasMemAddr extends HasMIFParameters { + val addr = UInt(width = mifAddrBits) +} + +trait HasMemTag extends HasMIFParameters { + val tag = UInt(width = mifTagBits) +} + +class MemReqCmd(implicit p: Parameters) extends MIFBundle()(p) with HasMemAddr with HasMemTag { + val rw = Bool() +} + +class MemTag(implicit p: Parameters) extends MIFBundle()(p) with HasMemTag +class MemData(implicit p: Parameters) extends MIFBundle()(p) with HasMemData +class MemResp(implicit p: Parameters) extends MIFBundle()(p) with HasMemData with HasMemTag + +class MemIO(implicit p: Parameters) extends ParameterizedBundle()(p) { + val req_cmd = Decoupled(new MemReqCmd) + val req_data = Decoupled(new MemData) + val resp = Decoupled(new MemResp).flip +} + +class MemPipeIO(implicit p: Parameters) extends ParameterizedBundle()(p) { + val req_cmd = Decoupled(new MemReqCmd) + val req_data = Decoupled(new MemData) + val resp = Valid(new MemResp).flip +} + +class MemSerializedIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) { + val req = Decoupled(Bits(width = w)) + val resp = Valid(Bits(width = w)).flip + override def cloneType = new MemSerializedIO(w)(p).asInstanceOf[this.type] +} + +class MemSerdes(w: Int)(implicit p: Parameters) extends MIFModule +{ + val io = new Bundle { + val wide = new MemIO().flip + val narrow = new MemSerializedIO(w) + } + val abits = io.wide.req_cmd.bits.toBits.getWidth + val dbits = io.wide.req_data.bits.toBits.getWidth + val rbits = io.wide.resp.bits.getWidth + + val out_buf = Reg(Bits()) + val in_buf = Reg(Bits()) + + val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(UInt(), 5) + val state = Reg(init=s_idle) + val send_cnt = Reg(init=UInt(0, log2Up((max(abits, dbits)+w-1)/w))) + val data_send_cnt = Reg(init=UInt(0, log2Up(mifDataBeats))) + val adone = io.narrow.req.ready && send_cnt === UInt((abits-1)/w) + val ddone = io.narrow.req.ready && send_cnt === UInt((dbits-1)/w) + + when (io.narrow.req.valid && io.narrow.req.ready) { + send_cnt := send_cnt + UInt(1) + out_buf := out_buf >> UInt(w) + } + when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) { + out_buf := io.wide.req_cmd.bits.toBits + } + when (io.wide.req_data.valid && io.wide.req_data.ready) { + out_buf := io.wide.req_data.bits.toBits + } + + io.wide.req_cmd.ready := state === s_idle + io.wide.req_data.ready := state === s_write_idle + io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data + io.narrow.req.bits := out_buf + + when (state === s_idle && io.wide.req_cmd.valid) { + state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr) + } + when (state === s_read_addr && adone) { + state := s_idle + send_cnt := UInt(0) + } + when (state === s_write_addr && adone) { + state := s_write_idle + send_cnt := UInt(0) + } + when (state === s_write_idle && io.wide.req_data.valid) { + state := s_write_data + } + when (state === s_write_data && ddone) { + data_send_cnt := data_send_cnt + UInt(1) + state := Mux(data_send_cnt === UInt(mifDataBeats-1), s_idle, s_write_idle) + send_cnt := UInt(0) + } + + val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w))) + val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats))) + val resp_val = Reg(init=Bool(false)) + + resp_val := Bool(false) + when (io.narrow.resp.valid) { + recv_cnt := recv_cnt + UInt(1) + when (recv_cnt === UInt((rbits-1)/w)) { + recv_cnt := UInt(0) + data_recv_cnt := data_recv_cnt + UInt(1) + resp_val := Bool(true) + } + in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+w-1)/w*w-1,w)) + } + + io.wide.resp.valid := resp_val + io.wide.resp.bits := io.wide.resp.bits.fromBits(in_buf) +} + +class MemDesserIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) { + val narrow = new MemSerializedIO(w).flip + val wide = new MemIO +} + +class MemDesser(w: Int)(implicit p: Parameters) extends Module // test rig side +{ + val io = new MemDesserIO(w) + val abits = io.wide.req_cmd.bits.toBits.getWidth + val dbits = io.wide.req_data.bits.toBits.getWidth + val rbits = io.wide.resp.bits.getWidth + val mifDataBeats = p(MIFDataBeats) + + require(dbits >= abits && rbits >= dbits) + val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w))) + val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats))) + val adone = io.narrow.req.valid && recv_cnt === UInt((abits-1)/w) + val ddone = io.narrow.req.valid && recv_cnt === UInt((dbits-1)/w) + val rdone = io.narrow.resp.valid && recv_cnt === UInt((rbits-1)/w) + + val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(UInt(), 5) + val state = Reg(init=s_cmd_recv) + + val in_buf = Reg(Bits()) + when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) { + recv_cnt := recv_cnt + UInt(1) + in_buf := Cat(io.narrow.req.bits, in_buf((rbits+w-1)/w*w-1,w)) + } + io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv + + when (state === s_cmd_recv && adone) { + state := s_cmd + recv_cnt := UInt(0) + } + when (state === s_cmd && io.wide.req_cmd.ready) { + state := Mux(io.wide.req_cmd.bits.rw, s_data_recv, s_reply) + } + when (state === s_data_recv && ddone) { + state := s_data + recv_cnt := UInt(0) + } + when (state === s_data && io.wide.req_data.ready) { + state := s_data_recv + when (data_recv_cnt === UInt(mifDataBeats-1)) { + state := s_cmd_recv + } + data_recv_cnt := data_recv_cnt + UInt(1) + } + when (rdone) { // state === s_reply + when (data_recv_cnt === UInt(mifDataBeats-1)) { + state := s_cmd_recv + } + recv_cnt := UInt(0) + data_recv_cnt := data_recv_cnt + UInt(1) + } + + val req_cmd = in_buf >> UInt(((rbits+w-1)/w - (abits+w-1)/w)*w) + io.wide.req_cmd.valid := state === s_cmd + io.wide.req_cmd.bits := io.wide.req_cmd.bits.fromBits(req_cmd) + + io.wide.req_data.valid := state === s_data + io.wide.req_data.bits.data := in_buf >> UInt(((rbits+w-1)/w - (dbits+w-1)/w)*w) + + val dataq = Module(new Queue(new MemResp, mifDataBeats)) + dataq.io.enq <> io.wide.resp + dataq.io.deq.ready := recv_cnt === UInt((rbits-1)/w) + + io.narrow.resp.valid := dataq.io.deq.valid + io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UInt(w)) +} + +class MemIOArbiter(val arbN: Int)(implicit p: Parameters) extends MIFModule { + val io = new Bundle { + val inner = Vec(arbN, new MemIO).flip + val outer = new MemIO + } + + if(arbN > 1) { + val cmd_arb = Module(new RRArbiter(new MemReqCmd, arbN)) + val choice_q = Module(new Queue(cmd_arb.io.chosen, 4)) + val (data_cnt, data_done) = Counter(io.outer.req_data.fire(), mifDataBeats) + + io.inner.map(_.req_cmd).zipWithIndex.zip(cmd_arb.io.in).map{ case ((req, id), arb) => { + arb.valid := req.valid + arb.bits := req.bits + arb.bits.tag := Cat(req.bits.tag, UInt(id)) + req.ready := arb.ready + }} + io.outer.req_cmd.bits := cmd_arb.io.out.bits + io.outer.req_cmd.valid := cmd_arb.io.out.valid && choice_q.io.enq.ready + cmd_arb.io.out.ready := io.outer.req_cmd.ready && choice_q.io.enq.ready + choice_q.io.enq.bits := cmd_arb.io.chosen + choice_q.io.enq.valid := cmd_arb.io.out.fire() && cmd_arb.io.out.bits.rw + + io.outer.req_data.bits := io.inner(choice_q.io.deq.bits).req_data.bits + io.outer.req_data.valid := io.inner(choice_q.io.deq.bits).req_data.valid && choice_q.io.deq.valid + io.inner.map(_.req_data.ready).zipWithIndex.foreach { + case(r, i) => r := UInt(i) === choice_q.io.deq.bits && choice_q.io.deq.valid + } + choice_q.io.deq.ready := data_done + + io.outer.resp.ready := Bool(false) + for (i <- 0 until arbN) { + io.inner(i).resp.valid := Bool(false) + when(io.outer.resp.bits.tag(log2Up(arbN)-1,0).toUInt === UInt(i)) { + io.inner(i).resp.valid := io.outer.resp.valid + io.outer.resp.ready := io.inner(i).resp.ready + } + io.inner(i).resp.bits := io.outer.resp.bits + io.inner(i).resp.bits.tag := io.outer.resp.bits.tag >> UInt(log2Up(arbN)) + } + } else { io.outer <> io.inner.head } +} + +object MemIOMemPipeIOConverter { + def apply(in: MemPipeIO)(implicit p: Parameters): MemIO = { + val out = Wire(new MemIO()) + in.resp.valid := out.resp.valid + in.resp.bits := out.resp.bits + out.resp.ready := Bool(true) + out.req_cmd.valid := in.req_cmd.valid + out.req_cmd.bits := in.req_cmd.bits + in.req_cmd.ready := out.req_cmd.ready + out.req_data.valid := in.req_data.valid + out.req_data.bits := in.req_data.bits + in.req_data.ready := out.req_data.ready + out + } +} + +class MemPipeIOMemIOConverter(numRequests: Int)(implicit p: Parameters) extends MIFModule { + val io = new Bundle { + val cpu = new MemIO().flip + val mem = new MemPipeIO + } + + val numEntries = numRequests * mifDataBeats + val size = log2Down(numEntries) + 1 + + val inc = Wire(Bool()) + val dec = Wire(Bool()) + val count = Reg(init=UInt(numEntries, size)) + val watermark = count >= UInt(mifDataBeats) + + when (inc && !dec) { + count := count + UInt(1) + } + when (!inc && dec) { + count := count - UInt(mifDataBeats) + } + when (inc && dec) { + count := count - UInt(mifDataBeats-1) + } + + val cmdq_mask = io.cpu.req_cmd.bits.rw || watermark + + io.mem.req_cmd.valid := io.cpu.req_cmd.valid && cmdq_mask + io.cpu.req_cmd.ready := io.mem.req_cmd.ready && cmdq_mask + io.mem.req_cmd.bits := io.cpu.req_cmd.bits + + io.mem.req_data <> io.cpu.req_data + + // Have separate queues to allow for different mem implementations + val resp_data_q = Module((new HellaQueue(numEntries)) { new MemData }) + resp_data_q.io.enq.valid := io.mem.resp.valid + resp_data_q.io.enq.bits.data := io.mem.resp.bits.data + + val resp_tag_q = Module((new HellaQueue(numEntries)) { new MemTag }) + resp_tag_q.io.enq.valid := io.mem.resp.valid + resp_tag_q.io.enq.bits.tag := io.mem.resp.bits.tag + + io.cpu.resp.valid := resp_data_q.io.deq.valid && resp_tag_q.io.deq.valid + io.cpu.resp.bits.data := resp_data_q.io.deq.bits.data + io.cpu.resp.bits.tag := resp_tag_q.io.deq.bits.tag + resp_data_q.io.deq.ready := io.cpu.resp.ready + resp_tag_q.io.deq.ready := io.cpu.resp.ready + + inc := resp_data_q.io.deq.fire() && resp_tag_q.io.deq.fire() + dec := io.mem.req_cmd.fire() && !io.mem.req_cmd.bits.rw +} diff --git a/junctions/src/main/scala/nasti.scala b/junctions/src/main/scala/nasti.scala new file mode 100644 index 00000000..41251ac3 --- /dev/null +++ b/junctions/src/main/scala/nasti.scala @@ -0,0 +1,737 @@ +/// See LICENSE for license details. + +package junctions +import Chisel._ +import scala.math.max +import scala.collection.mutable.ArraySeq +import cde.{Parameters, Field} + +case object NastiKey extends Field[NastiParameters] + +case class NastiParameters(dataBits: Int, addrBits: Int, idBits: Int) + +trait HasNastiParameters { + implicit val p: Parameters + val nastiExternal = p(NastiKey) + val nastiXDataBits = nastiExternal.dataBits + val nastiWStrobeBits = nastiXDataBits / 8 + val nastiXAddrBits = nastiExternal.addrBits + val nastiWIdBits = nastiExternal.idBits + val nastiRIdBits = nastiExternal.idBits + val nastiXIdBits = max(nastiWIdBits, nastiRIdBits) + val nastiXUserBits = 1 + val nastiAWUserBits = nastiXUserBits + val nastiWUserBits = nastiXUserBits + val nastiBUserBits = nastiXUserBits + val nastiARUserBits = nastiXUserBits + val nastiRUserBits = nastiXUserBits + val nastiXLenBits = 8 + val nastiXSizeBits = 3 + val nastiXBurstBits = 2 + val nastiXCacheBits = 4 + val nastiXProtBits = 3 + val nastiXQosBits = 4 + val nastiXRegionBits = 4 + val nastiXRespBits = 2 + + def bytesToXSize(bytes: UInt) = MuxLookup(bytes, UInt("b111"), Array( + UInt(1) -> UInt(0), + UInt(2) -> UInt(1), + UInt(4) -> UInt(2), + UInt(8) -> UInt(3), + UInt(16) -> UInt(4), + UInt(32) -> UInt(5), + UInt(64) -> UInt(6), + UInt(128) -> UInt(7))) +} + +abstract class NastiModule(implicit val p: Parameters) extends Module + with HasNastiParameters +abstract class NastiBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) + with HasNastiParameters + +abstract class NastiChannel(implicit p: Parameters) extends NastiBundle()(p) +abstract class NastiMasterToSlaveChannel(implicit p: Parameters) extends NastiChannel()(p) +abstract class NastiSlaveToMasterChannel(implicit p: Parameters) extends NastiChannel()(p) + +trait HasNastiMetadata extends HasNastiParameters { + val addr = UInt(width = nastiXAddrBits) + val len = UInt(width = nastiXLenBits) + val size = UInt(width = nastiXSizeBits) + val burst = UInt(width = nastiXBurstBits) + val lock = Bool() + val cache = UInt(width = nastiXCacheBits) + val prot = UInt(width = nastiXProtBits) + val qos = UInt(width = nastiXQosBits) + val region = UInt(width = nastiXRegionBits) +} + +trait HasNastiData extends HasNastiParameters { + val data = UInt(width = nastiXDataBits) + val last = Bool() +} + +class NastiReadIO(implicit val p: Parameters) extends ParameterizedBundle()(p) { + val ar = Decoupled(new NastiReadAddressChannel) + val r = Decoupled(new NastiReadDataChannel).flip +} + +class NastiWriteIO(implicit val p: Parameters) extends ParameterizedBundle()(p) { + val aw = Decoupled(new NastiWriteAddressChannel) + val w = Decoupled(new NastiWriteDataChannel) + val b = Decoupled(new NastiWriteResponseChannel).flip +} + +class NastiIO(implicit val p: Parameters) extends ParameterizedBundle()(p) { + val aw = Decoupled(new NastiWriteAddressChannel) + val w = Decoupled(new NastiWriteDataChannel) + val b = Decoupled(new NastiWriteResponseChannel).flip + val ar = Decoupled(new NastiReadAddressChannel) + val r = Decoupled(new NastiReadDataChannel).flip +} + +class NastiAddressChannel(implicit p: Parameters) extends NastiMasterToSlaveChannel()(p) + with HasNastiMetadata + +class NastiResponseChannel(implicit p: Parameters) extends NastiSlaveToMasterChannel()(p) { + val resp = UInt(width = nastiXRespBits) +} + +class NastiWriteAddressChannel(implicit p: Parameters) extends NastiAddressChannel()(p) { + val id = UInt(width = nastiWIdBits) + val user = UInt(width = nastiAWUserBits) +} + +class NastiWriteDataChannel(implicit p: Parameters) extends NastiMasterToSlaveChannel()(p) + with HasNastiData { + val id = UInt(width = nastiWIdBits) + val strb = UInt(width = nastiWStrobeBits) + val user = UInt(width = nastiWUserBits) +} + +class NastiWriteResponseChannel(implicit p: Parameters) extends NastiResponseChannel()(p) { + val id = UInt(width = nastiWIdBits) + val user = UInt(width = nastiBUserBits) +} + +class NastiReadAddressChannel(implicit p: Parameters) extends NastiAddressChannel()(p) { + val id = UInt(width = nastiRIdBits) + val user = UInt(width = nastiARUserBits) +} + +class NastiReadDataChannel(implicit p: Parameters) extends NastiResponseChannel()(p) + with HasNastiData { + val id = UInt(width = nastiRIdBits) + val user = UInt(width = nastiRUserBits) +} + +object NastiConstants { + val BURST_FIXED = UInt("b00") + val BURST_INCR = UInt("b01") + val BURST_WRAP = UInt("b10") + + val RESP_OKAY = UInt("b00") + val RESP_EXOKAY = UInt("b01") + val RESP_SLVERR = UInt("b10") + val RESP_DECERR = UInt("b11") +} + +import NastiConstants._ + +object NastiWriteAddressChannel { + def apply(id: UInt, addr: UInt, size: UInt, + len: UInt = UInt(0), burst: UInt = BURST_INCR) + (implicit p: Parameters) = { + val aw = Wire(new NastiWriteAddressChannel) + aw.id := id + aw.addr := addr + aw.len := len + aw.size := size + aw.burst := burst + aw.lock := Bool(false) + aw.cache := UInt("b0000") + aw.prot := UInt("b000") + aw.qos := UInt("b0000") + aw.region := UInt("b0000") + aw.user := UInt(0) + aw + } +} + +object NastiReadAddressChannel { + def apply(id: UInt, addr: UInt, size: UInt, + len: UInt = UInt(0), burst: UInt = BURST_INCR) + (implicit p: Parameters) = { + val ar = Wire(new NastiReadAddressChannel) + ar.id := id + ar.addr := addr + ar.len := len + ar.size := size + ar.burst := burst + ar.lock := Bool(false) + ar.cache := UInt(0) + ar.prot := UInt(0) + ar.qos := UInt(0) + ar.region := UInt(0) + ar.user := UInt(0) + ar + } +} + +object NastiWriteDataChannel { + def apply(data: UInt, strb: Option[UInt] = None, + last: Bool = Bool(true), id: UInt = UInt(0)) + (implicit p: Parameters): NastiWriteDataChannel = { + val w = Wire(new NastiWriteDataChannel) + w.strb := strb.getOrElse(Fill(w.nastiWStrobeBits, UInt(1, 1))) + w.data := data + w.last := last + w.id := id + w.user := UInt(0) + w + } +} + +object NastiReadDataChannel { + def apply(id: UInt, data: UInt, last: Bool = Bool(true), resp: UInt = UInt(0))( + implicit p: Parameters) = { + val r = Wire(new NastiReadDataChannel) + r.id := id + r.data := data + r.last := last + r.resp := resp + r.user := UInt(0) + r + } +} + +object NastiWriteResponseChannel { + def apply(id: UInt, resp: UInt = UInt(0))(implicit p: Parameters) = { + val b = Wire(new NastiWriteResponseChannel) + b.id := id + b.resp := resp + b.user := UInt(0) + b + } +} + +class MemIONastiIOConverter(cacheBlockOffsetBits: Int)(implicit p: Parameters) extends MIFModule + with HasNastiParameters { + val io = new Bundle { + val nasti = (new NastiIO).flip + val mem = new MemIO + } + + require(mifDataBits == nastiXDataBits, "Data sizes between LLC and MC don't agree") + val (mif_cnt_out, mif_wrap_out) = Counter(io.mem.resp.fire(), mifDataBeats) + + assert(!io.nasti.aw.valid || io.nasti.aw.bits.size === UInt(log2Up(mifDataBits/8)), + "Nasti data size does not match MemIO data size") + assert(!io.nasti.ar.valid || io.nasti.ar.bits.size === UInt(log2Up(mifDataBits/8)), + "Nasti data size does not match MemIO data size") + assert(!io.nasti.aw.valid || io.nasti.aw.bits.len === UInt(mifDataBeats - 1), + "Nasti length does not match number of MemIO beats") + assert(!io.nasti.ar.valid || io.nasti.ar.bits.len === UInt(mifDataBeats - 1), + "Nasti length does not match number of MemIO beats") + + // according to the spec, we can't send b until the last transfer on w + val b_ok = Reg(init = Bool(true)) + when (io.nasti.aw.fire()) { b_ok := Bool(false) } + when (io.nasti.w.fire() && io.nasti.w.bits.last) { b_ok := Bool(true) } + + val id_q = Module(new Queue(UInt(width = nastiWIdBits), 2)) + id_q.io.enq.valid := io.nasti.aw.valid && io.mem.req_cmd.ready + id_q.io.enq.bits := io.nasti.aw.bits.id + id_q.io.deq.ready := io.nasti.b.ready && b_ok + + io.mem.req_cmd.bits.addr := Mux(io.nasti.aw.valid, io.nasti.aw.bits.addr, io.nasti.ar.bits.addr) >> + UInt(cacheBlockOffsetBits) + io.mem.req_cmd.bits.tag := Mux(io.nasti.aw.valid, io.nasti.aw.bits.id, io.nasti.ar.bits.id) + io.mem.req_cmd.bits.rw := io.nasti.aw.valid + io.mem.req_cmd.valid := (io.nasti.aw.valid && id_q.io.enq.ready) || io.nasti.ar.valid + io.nasti.ar.ready := io.mem.req_cmd.ready && !io.nasti.aw.valid + io.nasti.aw.ready := io.mem.req_cmd.ready && id_q.io.enq.ready + + io.nasti.b.valid := id_q.io.deq.valid && b_ok + io.nasti.b.bits.id := id_q.io.deq.bits + io.nasti.b.bits.resp := UInt(0) + + io.nasti.w.ready := io.mem.req_data.ready + io.mem.req_data.valid := io.nasti.w.valid + io.mem.req_data.bits.data := io.nasti.w.bits.data + assert(!io.nasti.w.valid || io.nasti.w.bits.strb.andR, "MemIO must write full cache line") + + io.nasti.r.valid := io.mem.resp.valid + io.nasti.r.bits.data := io.mem.resp.bits.data + io.nasti.r.bits.last := mif_wrap_out + io.nasti.r.bits.id := io.mem.resp.bits.tag + io.nasti.r.bits.resp := UInt(0) + io.mem.resp.ready := io.nasti.r.ready +} + +class NastiArbiterIO(arbN: Int)(implicit p: Parameters) extends Bundle { + val master = Vec(arbN, new NastiIO).flip + val slave = new NastiIO + override def cloneType = + new NastiArbiterIO(arbN).asInstanceOf[this.type] +} + +/** Arbitrate among arbN masters requesting to a single slave */ +class NastiArbiter(val arbN: Int)(implicit p: Parameters) extends NastiModule { + val io = new NastiArbiterIO(arbN) + + if (arbN > 1) { + val arbIdBits = log2Up(arbN) + + val ar_arb = Module(new RRArbiter(new NastiReadAddressChannel, arbN)) + val aw_arb = Module(new RRArbiter(new NastiWriteAddressChannel, arbN)) + + val slave_r_arb_id = io.slave.r.bits.id(arbIdBits - 1, 0) + val slave_b_arb_id = io.slave.b.bits.id(arbIdBits - 1, 0) + + val w_chosen = Reg(UInt(width = arbIdBits)) + val w_done = Reg(init = Bool(true)) + + when (aw_arb.io.out.fire()) { + w_chosen := aw_arb.io.chosen + w_done := Bool(false) + } + + when (io.slave.w.fire() && io.slave.w.bits.last) { + w_done := Bool(true) + } + + for (i <- 0 until arbN) { + val m_ar = io.master(i).ar + val m_aw = io.master(i).aw + val m_r = io.master(i).r + val m_b = io.master(i).b + val a_ar = ar_arb.io.in(i) + val a_aw = aw_arb.io.in(i) + val m_w = io.master(i).w + + a_ar <> m_ar + a_ar.bits.id := Cat(m_ar.bits.id, UInt(i, arbIdBits)) + + a_aw <> m_aw + a_aw.bits.id := Cat(m_aw.bits.id, UInt(i, arbIdBits)) + + m_r.valid := io.slave.r.valid && slave_r_arb_id === UInt(i) + m_r.bits := io.slave.r.bits + m_r.bits.id := io.slave.r.bits.id >> UInt(arbIdBits) + + m_b.valid := io.slave.b.valid && slave_b_arb_id === UInt(i) + m_b.bits := io.slave.b.bits + m_b.bits.id := io.slave.b.bits.id >> UInt(arbIdBits) + + m_w.ready := io.slave.w.ready && w_chosen === UInt(i) && !w_done + } + + io.slave.r.ready := io.master(slave_r_arb_id).r.ready + io.slave.b.ready := io.master(slave_b_arb_id).b.ready + + io.slave.w.bits := io.master(w_chosen).w.bits + io.slave.w.valid := io.master(w_chosen).w.valid && !w_done + + io.slave.ar <> ar_arb.io.out + + io.slave.aw.bits <> aw_arb.io.out.bits + io.slave.aw.valid := aw_arb.io.out.valid && w_done + aw_arb.io.out.ready := io.slave.aw.ready && w_done + + } else { io.slave <> io.master.head } +} + +/** A slave that send decode error for every request it receives */ +class NastiErrorSlave(implicit p: Parameters) extends NastiModule { + val io = (new NastiIO).flip + + when (io.ar.fire()) { printf("Invalid read address %x\n", io.ar.bits.addr) } + when (io.aw.fire()) { printf("Invalid write address %x\n", io.aw.bits.addr) } + + val r_queue = Module(new Queue(new NastiReadAddressChannel, 1)) + r_queue.io.enq <> io.ar + + val responding = Reg(init = Bool(false)) + val beats_left = Reg(init = UInt(0, nastiXLenBits)) + + when (!responding && r_queue.io.deq.valid) { + responding := Bool(true) + beats_left := r_queue.io.deq.bits.len + } + + io.r.valid := r_queue.io.deq.valid && responding + io.r.bits.id := r_queue.io.deq.bits.id + io.r.bits.data := UInt(0) + io.r.bits.resp := RESP_DECERR + io.r.bits.last := beats_left === UInt(0) + + r_queue.io.deq.ready := io.r.fire() && io.r.bits.last + + when (io.r.fire()) { + when (beats_left === UInt(0)) { + responding := Bool(false) + } .otherwise { + beats_left := beats_left - UInt(1) + } + } + + val draining = Reg(init = Bool(false)) + io.w.ready := draining + + when (io.aw.fire()) { draining := Bool(true) } + when (io.w.fire() && io.w.bits.last) { draining := Bool(false) } + + val b_queue = Module(new Queue(UInt(width = nastiWIdBits), 1)) + b_queue.io.enq.valid := io.aw.valid && !draining + b_queue.io.enq.bits := io.aw.bits.id + io.aw.ready := b_queue.io.enq.ready && !draining + io.b.valid := b_queue.io.deq.valid && !draining + io.b.bits.id := b_queue.io.deq.bits + io.b.bits.resp := Bits("b11") + b_queue.io.deq.ready := io.b.ready && !draining +} + +class NastiRouterIO(nSlaves: Int)(implicit p: Parameters) extends Bundle { + val master = (new NastiIO).flip + val slave = Vec(nSlaves, new NastiIO) + override def cloneType = + new NastiRouterIO(nSlaves).asInstanceOf[this.type] +} + +/** Take a single Nasti master and route its requests to various slaves + * @param nSlaves the number of slaves + * @param routeSel a function which takes an address and produces + * a one-hot encoded selection of the slave to write to */ +class NastiRouter(nSlaves: Int, routeSel: UInt => UInt)(implicit p: Parameters) + extends NastiModule { + + val io = new NastiRouterIO(nSlaves) + + val ar_route = routeSel(io.master.ar.bits.addr) + val aw_route = routeSel(io.master.aw.bits.addr) + + var ar_ready = Bool(false) + var aw_ready = Bool(false) + var w_ready = Bool(false) + + io.slave.zipWithIndex.foreach { case (s, i) => + s.ar.valid := io.master.ar.valid && ar_route(i) + s.ar.bits := io.master.ar.bits + ar_ready = ar_ready || (s.ar.ready && ar_route(i)) + + s.aw.valid := io.master.aw.valid && aw_route(i) + s.aw.bits := io.master.aw.bits + aw_ready = aw_ready || (s.aw.ready && aw_route(i)) + + val chosen = Reg(init = Bool(false)) + when (s.w.fire() && s.w.bits.last) { chosen := Bool(false) } + when (s.aw.fire()) { chosen := Bool(true) } + + s.w.valid := io.master.w.valid && chosen + s.w.bits := io.master.w.bits + w_ready = w_ready || (s.w.ready && chosen) + } + + val r_invalid = !ar_route.orR + val w_invalid = !aw_route.orR + + val err_slave = Module(new NastiErrorSlave) + err_slave.io.ar.valid := r_invalid && io.master.ar.valid + err_slave.io.ar.bits := io.master.ar.bits + err_slave.io.aw.valid := w_invalid && io.master.aw.valid + err_slave.io.aw.bits := io.master.aw.bits + err_slave.io.w.valid := io.master.w.valid + err_slave.io.w.bits := io.master.w.bits + + io.master.ar.ready := ar_ready || (r_invalid && err_slave.io.ar.ready) + io.master.aw.ready := aw_ready || (w_invalid && err_slave.io.aw.ready) + io.master.w.ready := w_ready || err_slave.io.w.ready + + val b_arb = Module(new RRArbiter(new NastiWriteResponseChannel, nSlaves + 1)) + val r_arb = Module(new JunctionsPeekingArbiter( + new NastiReadDataChannel, nSlaves + 1, + // we can unlock if it's the last beat + (r: NastiReadDataChannel) => r.last)) + + for (i <- 0 until nSlaves) { + b_arb.io.in(i) <> io.slave(i).b + r_arb.io.in(i) <> io.slave(i).r + } + + b_arb.io.in(nSlaves) <> err_slave.io.b + r_arb.io.in(nSlaves) <> err_slave.io.r + + io.master.b <> b_arb.io.out + io.master.r <> r_arb.io.out +} + +/** Crossbar between multiple Nasti masters and slaves + * @param nMasters the number of Nasti masters + * @param nSlaves the number of Nasti slaves + * @param routeSel a function selecting the slave to route an address to */ +class NastiCrossbar(nMasters: Int, nSlaves: Int, routeSel: UInt => UInt) + (implicit p: Parameters) extends NastiModule { + val io = new Bundle { + val masters = Vec(nMasters, new NastiIO).flip + val slaves = Vec(nSlaves, new NastiIO) + } + + if (nMasters == 1) { + val router = Module(new NastiRouter(nSlaves, routeSel)) + router.io.master <> io.masters.head + io.slaves <> router.io.slave + } else { + val routers = Vec.fill(nMasters) { Module(new NastiRouter(nSlaves, routeSel)).io } + val arbiters = Vec.fill(nSlaves) { Module(new NastiArbiter(nMasters)).io } + + for (i <- 0 until nMasters) { + routers(i).master <> io.masters(i) + } + + for (i <- 0 until nSlaves) { + arbiters(i).master <> Vec(routers.map(r => r.slave(i))) + io.slaves(i) <> arbiters(i).slave + } + } +} + +class NastiInterconnectIO(val nMasters: Int, val nSlaves: Int) + (implicit p: Parameters) extends Bundle { + /* This is a bit confusing. The interconnect is a slave to the masters and + * a master to the slaves. Hence why the declarations seem to be backwards. */ + val masters = Vec(nMasters, new NastiIO).flip + val slaves = Vec(nSlaves, new NastiIO) + override def cloneType = + new NastiInterconnectIO(nMasters, nSlaves).asInstanceOf[this.type] +} + +abstract class NastiInterconnect(implicit p: Parameters) extends NastiModule()(p) { + val nMasters: Int + val nSlaves: Int + + lazy val io = new NastiInterconnectIO(nMasters, nSlaves) +} + +class NastiRecursiveInterconnect(val nMasters: Int, addrMap: AddrMap) + (implicit p: Parameters) extends NastiInterconnect()(p) { + def port(name: String) = io.slaves(addrMap.port(name)) + val nSlaves = addrMap.numSlaves + val routeSel = (addr: UInt) => + Cat(addrMap.entries.map(e => addrMap(e.name).containsAddress(addr)).reverse) + + val xbar = Module(new NastiCrossbar(nMasters, addrMap.length, routeSel)) + xbar.io.masters <> io.masters + + io.slaves <> addrMap.entries.zip(xbar.io.slaves).flatMap { + case (entry, xbarSlave) => { + entry.region match { + case submap: AddrMap if submap.entries.isEmpty => + val err_slave = Module(new NastiErrorSlave) + err_slave.io <> xbarSlave + None + case submap: AddrMap => + val ic = Module(new NastiRecursiveInterconnect(1, submap)) + ic.io.masters.head <> xbarSlave + ic.io.slaves + case r: MemRange => + Some(xbarSlave) + } + } + } +} + +class ChannelHelper(nChannels: Int) + (implicit val p: Parameters) extends HasNastiParameters { + + val dataBytes = p(MIFDataBits) * p(MIFDataBeats) / 8 + val chanSelBits = log2Ceil(nChannels) + val selOffset = log2Up(dataBytes) + val blockOffset = selOffset + chanSelBits + + def getSelect(addr: UInt) = + if (nChannels > 1) addr(blockOffset - 1, selOffset) else UInt(0) + + def getAddr(addr: UInt) = + if (nChannels > 1) + Cat(addr(nastiXAddrBits - 1, blockOffset), addr(selOffset - 1, 0)) + else addr +} + +class NastiMemoryInterconnect( + nBanksPerChannel: Int, nChannels: Int) + (implicit p: Parameters) extends NastiInterconnect()(p) { + + val nBanks = nBanksPerChannel * nChannels + val nMasters = nBanks + val nSlaves = nChannels + + val chanHelper = new ChannelHelper(nChannels) + def connectChannel(outer: NastiIO, inner: NastiIO) { + outer <> inner + outer.ar.bits.addr := chanHelper.getAddr(inner.ar.bits.addr) + outer.aw.bits.addr := chanHelper.getAddr(inner.aw.bits.addr) + } + + for (i <- 0 until nChannels) { + /* Bank assignments to channels are strided so that consecutive banks + * map to different channels. That way, consecutive cache lines also + * map to different channels */ + val banks = (i until nBanks by nChannels).map(j => io.masters(j)) + + val channelArb = Module(new NastiArbiter(nBanksPerChannel)) + channelArb.io.master <> banks + connectChannel(io.slaves(i), channelArb.io.slave) + } +} + +/** Allows users to switch between various memory configurations. Note that + * this is a dangerous operation: not only does switching the select input to + * this module violate Nasti, it also causes the memory of the machine to + * become garbled. It's expected that select only changes at boot time, as + * part of the memory controller configuration. */ +class NastiMemorySelectorIO(val nBanks: Int, val maxMemChannels: Int, nConfigs: Int) + (implicit p: Parameters) + extends NastiInterconnectIO(nBanks, maxMemChannels) { + val select = UInt(INPUT, width = log2Up(nConfigs)) + override def cloneType = + new NastiMemorySelectorIO(nMasters, nSlaves, nConfigs).asInstanceOf[this.type] +} + +class NastiMemorySelector(nBanks: Int, maxMemChannels: Int, configs: Seq[Int]) + (implicit p: Parameters) + extends NastiInterconnect()(p) { + val nMasters = nBanks + val nSlaves = maxMemChannels + val nConfigs = configs.size + + override lazy val io = new NastiMemorySelectorIO(nBanks, maxMemChannels, nConfigs) + + def muxOnSelect(up: DecoupledIO[Bundle], dn: DecoupledIO[Bundle], active: Bool): Unit = { + when (active) { dn.bits := up.bits } + when (active) { up.ready := dn.ready } + when (active) { dn.valid := up.valid } + } + + def muxOnSelect(up: NastiIO, dn: NastiIO, active: Bool): Unit = { + muxOnSelect(up.aw, dn.aw, active) + muxOnSelect(up.w, dn.w, active) + muxOnSelect(dn.b, up.b, active) + muxOnSelect(up.ar, dn.ar, active) + muxOnSelect(dn.r, up.r, active) + } + + def muxOnSelect(up: Vec[NastiIO], dn: Vec[NastiIO], active: Bool) : Unit = { + for (i <- 0 until up.size) + muxOnSelect(up(i), dn(i), active) + } + + /* Disconnects a vector of Nasti ports, which involves setting them to + * invalid. Due to Chisel reasons, we need to also set the bits to 0 (since + * there can't be any unconnected inputs). */ + def disconnectSlave(slave: Vec[NastiIO]) = { + slave.foreach{ m => + m.aw.valid := Bool(false) + m.aw.bits := m.aw.bits.fromBits( UInt(0) ) + m.w.valid := Bool(false) + m.w.bits := m.w.bits.fromBits( UInt(0) ) + m.b.ready := Bool(false) + m.ar.valid := Bool(false) + m.ar.bits := m.ar.bits.fromBits( UInt(0) ) + m.r.ready := Bool(false) + } + } + + def disconnectMaster(master: Vec[NastiIO]) = { + master.foreach{ m => + m.aw.ready := Bool(false) + m.w.ready := Bool(false) + m.b.valid := Bool(false) + m.b.bits := m.b.bits.fromBits( UInt(0) ) + m.ar.ready := Bool(false) + m.r.valid := Bool(false) + m.r.bits := m.r.bits.fromBits( UInt(0) ) + } + } + + /* Provides default wires on all our outputs. */ + disconnectMaster(io.masters) + disconnectSlave(io.slaves) + + /* Constructs interconnects for each of the layouts suggested by the + * configuration and switches between them based on the select input. */ + configs.zipWithIndex.foreach{ case (nChannels, select) => + val nBanksPerChannel = nBanks / nChannels + val ic = Module(new NastiMemoryInterconnect(nBanksPerChannel, nChannels)) + disconnectMaster(ic.io.slaves) + disconnectSlave(ic.io.masters) + muxOnSelect( io.masters, ic.io.masters, io.select === UInt(select)) + muxOnSelect(ic.io.slaves, io.slaves, io.select === UInt(select)) + } +} + +class NastiMemoryDemux(nRoutes: Int)(implicit p: Parameters) extends NastiModule()(p) { + val io = new Bundle { + val master = (new NastiIO).flip + val slaves = Vec(nRoutes, new NastiIO) + val select = UInt(INPUT, log2Up(nRoutes)) + } + + def connectReqChannel[T <: Data](idx: Int, out: DecoupledIO[T], in: DecoupledIO[T]) { + out.valid := in.valid && io.select === UInt(idx) + out.bits := in.bits + when (io.select === UInt(idx)) { in.ready := out.ready } + } + + def connectRespChannel[T <: Data](idx: Int, out: DecoupledIO[T], in: DecoupledIO[T]) { + when (io.select === UInt(idx)) { out.valid := in.valid } + when (io.select === UInt(idx)) { out.bits := in.bits } + in.ready := out.ready && io.select === UInt(idx) + } + + io.master.ar.ready := Bool(false) + io.master.aw.ready := Bool(false) + io.master.w.ready := Bool(false) + io.master.r.valid := Bool(false) + io.master.r.bits := NastiReadDataChannel(id = UInt(0), data = UInt(0)) + io.master.b.valid := Bool(false) + io.master.b.bits := NastiWriteResponseChannel(id = UInt(0)) + + io.slaves.zipWithIndex.foreach { case (slave, i) => + connectReqChannel(i, slave.ar, io.master.ar) + connectReqChannel(i, slave.aw, io.master.aw) + connectReqChannel(i, slave.w, io.master.w) + connectRespChannel(i, io.master.r, slave.r) + connectRespChannel(i, io.master.b, slave.b) + } +} + +object AsyncNastiTo { + // source(master) is in our clock domain, output is in the 'to' clock domain + def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2)(implicit p: Parameters): NastiIO = { + val sink = Wire(new NastiIO) + + sink.aw <> AsyncDecoupledTo(to_clock, to_reset, source.aw, depth, sync) + sink.ar <> AsyncDecoupledTo(to_clock, to_reset, source.ar, depth, sync) + sink.w <> AsyncDecoupledTo(to_clock, to_reset, source.w, depth, sync) + source.b <> AsyncDecoupledFrom(to_clock, to_reset, sink.b, depth, sync) + source.r <> AsyncDecoupledFrom(to_clock, to_reset, sink.r, depth, sync) + + sink + } +} + +object AsyncNastiFrom { + // source(master) is in the 'from' clock domain, output is in our clock domain + def apply[T <: Data](from_clock: Clock, from_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2)(implicit p: Parameters): NastiIO = { + val sink = Wire(new NastiIO) + + sink.aw <> AsyncDecoupledFrom(from_clock, from_reset, source.aw, depth, sync) + sink.ar <> AsyncDecoupledFrom(from_clock, from_reset, source.ar, depth, sync) + sink.w <> AsyncDecoupledFrom(from_clock, from_reset, source.w, depth, sync) + source.b <> AsyncDecoupledTo(from_clock, from_reset, sink.b, depth, sync) + source.r <> AsyncDecoupledTo(from_clock, from_reset, sink.r, depth, sync) + + sink + } +} diff --git a/junctions/src/main/scala/package.scala b/junctions/src/main/scala/package.scala new file mode 100644 index 00000000..214a91e5 --- /dev/null +++ b/junctions/src/main/scala/package.scala @@ -0,0 +1 @@ +package object junctions diff --git a/junctions/src/main/scala/poci.scala b/junctions/src/main/scala/poci.scala new file mode 100644 index 00000000..ac089164 --- /dev/null +++ b/junctions/src/main/scala/poci.scala @@ -0,0 +1,82 @@ +package junctions + +import Chisel._ +import cde.{Parameters, Field} + +class PociIO(implicit p: Parameters) extends HastiBundle()(p) +{ + val paddr = UInt(OUTPUT, hastiAddrBits) + val pwrite = Bool(OUTPUT) + val psel = Bool(OUTPUT) + val penable = Bool(OUTPUT) + val pwdata = UInt(OUTPUT, hastiDataBits) + val prdata = UInt(INPUT, hastiDataBits) + val pready = Bool(INPUT) + val pslverr = Bool(INPUT) +} + +class HastiToPociBridge(implicit p: Parameters) extends HastiModule()(p) { + val io = new Bundle { + val in = new HastiSlaveIO + val out = new PociIO + } + + val s_idle :: s_setup :: s_access :: Nil = Enum(UInt(), 3) + val state = Reg(init = s_idle) + val transfer = io.in.hsel & io.in.htrans(1) + + switch (state) { + is (s_idle) { + when (transfer) { state := s_setup } + } + is (s_setup) { + state := s_access + } + is (s_access) { + when (io.out.pready & ~transfer) { state := s_idle } + when (io.out.pready & transfer) { state := s_setup } + when (~io.out.pready) { state := s_access } + } + } + + val haddr_reg = Reg(UInt(width = hastiAddrBits)) + val hwrite_reg = Reg(UInt(width = 1)) + when (transfer) { + haddr_reg := io.in.haddr + hwrite_reg := io.in.hwrite + } + + io.out.paddr := haddr_reg + io.out.pwrite := hwrite_reg(0) + io.out.psel := (state =/= s_idle) + io.out.penable := (state === s_access) + io.out.pwdata := io.in.hwdata + io.in.hrdata := io.out.prdata + io.in.hready := ((state === s_access) & io.out.pready) | (state === s_idle) + io.in.hresp := io.out.pslverr +} + +class PociBus(amap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p) +{ + val io = new Bundle { + val master = new PociIO().flip + val slaves = Vec(amap.size, new PociIO) + } + + val psels = PriorityEncoderOH( + (io.slaves zip amap) map { case (s, afn) => { + s.paddr := io.master.paddr + s.pwrite := io.master.pwrite + s.pwdata := io.master.pwdata + afn(io.master.paddr) && io.master.psel + }}) + + (io.slaves zip psels) foreach { case (s, psel) => { + s.psel := psel + s.penable := io.master.penable && psel + } } + + io.master.prdata := Mux1H(psels, io.slaves.map(_.prdata)) + io.master.pready := Mux1H(psels, io.slaves.map(_.pready)) + io.master.pslverr := Mux1H(psels, io.slaves.map(_.pslverr)) +} diff --git a/junctions/src/main/scala/slowio.scala b/junctions/src/main/scala/slowio.scala new file mode 100644 index 00000000..7e926918 --- /dev/null +++ b/junctions/src/main/scala/slowio.scala @@ -0,0 +1,70 @@ +// See LICENSE for license details. + +package junctions +import Chisel._ + +class SlowIO[T <: Data](val divisor_max: Int)(data: => T) extends Module +{ + val io = new Bundle { + val out_fast = Decoupled(data).flip + val out_slow = Decoupled(data) + val in_fast = Decoupled(data) + val in_slow = Decoupled(data).flip + val clk_slow = Bool(OUTPUT) + val set_divisor = Valid(Bits(width = 32)).flip + val divisor = Bits(OUTPUT, 32) + } + + require(divisor_max >= 8 && divisor_max <= 65536 && isPow2(divisor_max)) + val divisor = Reg(init=UInt(divisor_max-1)) + val d_shadow = Reg(init=UInt(divisor_max-1)) + val hold = Reg(init=UInt(divisor_max/4-1)) + val h_shadow = Reg(init=UInt(divisor_max/4-1)) + when (io.set_divisor.valid) { + d_shadow := io.set_divisor.bits(log2Up(divisor_max)-1, 0).toUInt + h_shadow := io.set_divisor.bits(log2Up(divisor_max)-1+16, 16).toUInt + } + io.divisor := (hold << 16) | divisor + + val count = Reg{UInt(width = log2Up(divisor_max))} + val myclock = Reg{Bool()} + count := count + UInt(1) + + val rising = count === (divisor >> 1) + val falling = count === divisor + val held = count === (divisor >> 1) + hold + + when (falling) { + divisor := d_shadow + hold := h_shadow + count := UInt(0) + myclock := Bool(false) + } + when (rising) { + myclock := Bool(true) + } + + val in_slow_rdy = Reg(init=Bool(false)) + val out_slow_val = Reg(init=Bool(false)) + val out_slow_bits = Reg(data) + + val fromhost_q = Module(new Queue(data,1)) + fromhost_q.io.enq.valid := rising && (io.in_slow.valid && in_slow_rdy || this.reset) + fromhost_q.io.enq.bits := io.in_slow.bits + io.in_fast <> fromhost_q.io.deq + + val tohost_q = Module(new Queue(data,1)) + tohost_q.io.enq <> io.out_fast + tohost_q.io.deq.ready := rising && io.out_slow.ready && out_slow_val + + when (held) { + in_slow_rdy := fromhost_q.io.enq.ready + out_slow_val := tohost_q.io.deq.valid + out_slow_bits := Mux(this.reset, fromhost_q.io.deq.bits, tohost_q.io.deq.bits) + } + + io.in_slow.ready := in_slow_rdy + io.out_slow.valid := out_slow_val + io.out_slow.bits := out_slow_bits + io.clk_slow := myclock +} diff --git a/junctions/src/main/scala/smi.scala b/junctions/src/main/scala/smi.scala new file mode 100644 index 00000000..777dd075 --- /dev/null +++ b/junctions/src/main/scala/smi.scala @@ -0,0 +1,281 @@ +package junctions + +import Chisel._ +import cde.Parameters + +class SmiReq(val dataWidth: Int, val addrWidth: Int) extends Bundle { + val rw = Bool() + val addr = UInt(width = addrWidth) + val data = Bits(width = dataWidth) + + override def cloneType = + new SmiReq(dataWidth, addrWidth).asInstanceOf[this.type] +} + +/** Simple Memory Interface IO. Used to communicate with PCR and SCR + * @param dataWidth the width in bits of the data field + * @param addrWidth the width in bits of the addr field */ +class SmiIO(val dataWidth: Int, val addrWidth: Int) extends Bundle { + val req = Decoupled(new SmiReq(dataWidth, addrWidth)) + val resp = Decoupled(Bits(width = dataWidth)).flip + + override def cloneType = + new SmiIO(dataWidth, addrWidth).asInstanceOf[this.type] +} + +abstract class SmiPeripheral extends Module { + val dataWidth: Int + val addrWidth: Int + + lazy val io = new SmiIO(dataWidth, addrWidth).flip +} + +/** A simple sequential memory accessed through Smi */ +class SmiMem(val dataWidth: Int, val memDepth: Int) extends SmiPeripheral { + // override + val addrWidth = log2Up(memDepth) + + val mem = SeqMem(memDepth, Bits(width = dataWidth)) + + val ren = io.req.fire() && !io.req.bits.rw + val wen = io.req.fire() && io.req.bits.rw + + when (wen) { mem.write(io.req.bits.addr, io.req.bits.data) } + + val resp_valid = Reg(init = Bool(false)) + + when (io.resp.fire()) { resp_valid := Bool(false) } + when (io.req.fire()) { resp_valid := Bool(true) } + + io.resp.valid := resp_valid + io.resp.bits := mem.read(io.req.bits.addr, ren) + io.req.ready := !resp_valid +} + +/** Arbitrate among several Smi clients + * @param n the number of clients + * @param dataWidth Smi data width + * @param addrWidth Smi address width */ +class SmiArbiter(val n: Int, val dataWidth: Int, val addrWidth: Int) + extends Module { + val io = new Bundle { + val in = Vec(n, new SmiIO(dataWidth, addrWidth)).flip + val out = new SmiIO(dataWidth, addrWidth) + } + + val wait_resp = Reg(init = Bool(false)) + val choice = Reg(UInt(width = log2Up(n))) + + val req_arb = Module(new RRArbiter(new SmiReq(dataWidth, addrWidth), n)) + req_arb.io.in <> io.in.map(_.req) + req_arb.io.out.ready := io.out.req.ready && !wait_resp + + io.out.req.bits := req_arb.io.out.bits + io.out.req.valid := req_arb.io.out.valid && !wait_resp + + when (io.out.req.fire()) { + choice := req_arb.io.chosen + wait_resp := Bool(true) + } + + when (io.out.resp.fire()) { wait_resp := Bool(false) } + + for ((resp, i) <- io.in.map(_.resp).zipWithIndex) { + resp.bits := io.out.resp.bits + resp.valid := io.out.resp.valid && choice === UInt(i) + } + + io.out.resp.ready := io.in(choice).resp.ready +} + +class SmiIONastiReadIOConverter(val dataWidth: Int, val addrWidth: Int) + (implicit p: Parameters) extends NastiModule()(p) { + val io = new Bundle { + val nasti = new NastiReadIO().flip + val smi = new SmiIO(dataWidth, addrWidth) + } + + private val maxWordsPerBeat = nastiXDataBits / dataWidth + private val wordCountBits = log2Up(maxWordsPerBeat) + private val byteOffBits = log2Up(dataWidth / 8) + private val addrOffBits = addrWidth + byteOffBits + + private def calcWordCount(size: UInt): UInt = + (UInt(1) << (size - UInt(byteOffBits))) - UInt(1) + + val (s_idle :: s_read :: s_resp :: Nil) = Enum(Bits(), 3) + val state = Reg(init = s_idle) + + val nWords = Reg(UInt(width = wordCountBits)) + val nBeats = Reg(UInt(width = nastiXLenBits)) + val addr = Reg(UInt(width = addrWidth)) + val id = Reg(UInt(width = nastiRIdBits)) + + val byteOff = Reg(UInt(width = byteOffBits)) + val recvInd = Reg(init = UInt(0, wordCountBits)) + val sendDone = Reg(init = Bool(false)) + + val buffer = Reg(init = Vec.fill(maxWordsPerBeat) { Bits(0, dataWidth) }) + + io.nasti.ar.ready := (state === s_idle) + + io.smi.req.valid := (state === s_read) && !sendDone + io.smi.req.bits.rw := Bool(false) + io.smi.req.bits.addr := addr + + io.smi.resp.ready := (state === s_read) + + io.nasti.r.valid := (state === s_resp) + io.nasti.r.bits := NastiReadDataChannel( + id = id, + data = buffer.toBits, + last = (nBeats === UInt(0))) + + when (io.nasti.ar.fire()) { + when (io.nasti.ar.bits.size < UInt(byteOffBits)) { + nWords := UInt(0) + } .otherwise { + nWords := calcWordCount(io.nasti.ar.bits.size) + } + nBeats := io.nasti.ar.bits.len + addr := io.nasti.ar.bits.addr(addrOffBits - 1, byteOffBits) + if (maxWordsPerBeat > 1) + recvInd := io.nasti.ar.bits.addr(wordCountBits + byteOffBits - 1, byteOffBits) + else + recvInd := UInt(0) + id := io.nasti.ar.bits.id + state := s_read + } + + when (io.smi.req.fire()) { + addr := addr + UInt(1) + sendDone := (nWords === UInt(0)) + } + + when (io.smi.resp.fire()) { + recvInd := recvInd + UInt(1) + nWords := nWords - UInt(1) + buffer(recvInd) := io.smi.resp.bits + when (nWords === UInt(0)) { state := s_resp } + } + + when (io.nasti.r.fire()) { + recvInd := UInt(0) + sendDone := Bool(false) + // clear all the registers in the buffer + buffer.foreach(_ := Bits(0)) + nBeats := nBeats - UInt(1) + state := Mux(io.nasti.r.bits.last, s_idle, s_read) + } +} + +class SmiIONastiWriteIOConverter(val dataWidth: Int, val addrWidth: Int) + (implicit p: Parameters) extends NastiModule()(p) { + val io = new Bundle { + val nasti = new NastiWriteIO().flip + val smi = new SmiIO(dataWidth, addrWidth) + } + + private val dataBytes = dataWidth / 8 + private val maxWordsPerBeat = nastiXDataBits / dataWidth + private val byteOffBits = log2Floor(dataBytes) + private val addrOffBits = addrWidth + byteOffBits + private val nastiByteOffBits = log2Ceil(nastiXDataBits / 8) + + assert(!io.nasti.aw.valid || io.nasti.aw.bits.size >= UInt(byteOffBits), + "Nasti size must be >= Smi size") + + val id = Reg(UInt(width = nastiWIdBits)) + val addr = Reg(UInt(width = addrWidth)) + val offset = Reg(UInt(width = nastiByteOffBits)) + + def makeStrobe(offset: UInt, size: UInt, strb: UInt) = { + val sizemask = (UInt(1) << (UInt(1) << size)) - UInt(1) + val bytemask = strb & (sizemask << offset) + Vec.tabulate(maxWordsPerBeat){i => bytemask(dataBytes * i)}.toBits + } + + val size = Reg(UInt(width = nastiXSizeBits)) + val strb = Reg(UInt(width = maxWordsPerBeat)) + val data = Reg(UInt(width = nastiXDataBits)) + val last = Reg(Bool()) + + val s_idle :: s_data :: s_send :: s_ack :: s_resp :: Nil = Enum(Bits(), 5) + val state = Reg(init = s_idle) + + io.nasti.aw.ready := (state === s_idle) + io.nasti.w.ready := (state === s_data) + io.smi.req.valid := (state === s_send) && strb(0) + io.smi.req.bits.rw := Bool(true) + io.smi.req.bits.addr := addr + io.smi.req.bits.data := data(dataWidth - 1, 0) + io.smi.resp.ready := (state === s_ack) + io.nasti.b.valid := (state === s_resp) + io.nasti.b.bits := NastiWriteResponseChannel(id) + + val jump = if (maxWordsPerBeat > 1) + PriorityMux(strb(maxWordsPerBeat - 1, 1), + (1 until maxWordsPerBeat).map(UInt(_))) + else UInt(1) + + when (io.nasti.aw.fire()) { + if (dataWidth == nastiXDataBits) { + addr := io.nasti.aw.bits.addr(addrOffBits - 1, byteOffBits) + } else { + addr := Cat(io.nasti.aw.bits.addr(addrOffBits - 1, nastiByteOffBits), + UInt(0, nastiByteOffBits - byteOffBits)) + } + offset := io.nasti.aw.bits.addr(nastiByteOffBits - 1, 0) + id := io.nasti.aw.bits.id + size := io.nasti.aw.bits.size + last := Bool(false) + state := s_data + } + + when (io.nasti.w.fire()) { + last := io.nasti.w.bits.last + strb := makeStrobe(offset, size, io.nasti.w.bits.strb) + data := io.nasti.w.bits.data + state := s_send + } + + when (state === s_send) { + when (io.smi.req.ready || !strb(0)) { + strb := strb >> jump + data := data >> Cat(jump, UInt(0, log2Up(dataWidth))) + addr := addr + jump + when (strb(0)) { state := s_ack } + } + } + + when (io.smi.resp.fire()) { + state := Mux(strb === UInt(0), + Mux(last, s_resp, s_data), s_send) + } + + when (io.nasti.b.fire()) { state := s_idle } +} + +/** Convert Nasti protocol to Smi protocol */ +class SmiIONastiIOConverter(val dataWidth: Int, val addrWidth: Int) + (implicit p: Parameters) extends NastiModule()(p) { + val io = new Bundle { + val nasti = (new NastiIO).flip + val smi = new SmiIO(dataWidth, addrWidth) + } + + require(isPow2(dataWidth), "SMI data width must be power of 2") + require(dataWidth <= nastiXDataBits, + "SMI data width must be less than or equal to NASTI data width") + + val reader = Module(new SmiIONastiReadIOConverter(dataWidth, addrWidth)) + reader.io.nasti <> io.nasti + + val writer = Module(new SmiIONastiWriteIOConverter(dataWidth, addrWidth)) + writer.io.nasti <> io.nasti + + val arb = Module(new SmiArbiter(2, dataWidth, addrWidth)) + arb.io.in(0) <> reader.io.smi + arb.io.in(1) <> writer.io.smi + io.smi <> arb.io.out +} diff --git a/junctions/src/main/scala/stream.scala b/junctions/src/main/scala/stream.scala new file mode 100644 index 00000000..5ee14c5e --- /dev/null +++ b/junctions/src/main/scala/stream.scala @@ -0,0 +1,187 @@ +package junctions + +import Chisel._ +import NastiConstants._ +import cde.Parameters + +class StreamChannel(w: Int) extends Bundle { + val data = UInt(width = w) + val last = Bool() + + override def cloneType = new StreamChannel(w).asInstanceOf[this.type] +} + +class StreamIO(w: Int) extends Bundle { + val out = Decoupled(new StreamChannel(w)) + val in = Decoupled(new StreamChannel(w)).flip + + override def cloneType = new StreamIO(w).asInstanceOf[this.type] +} + +class NastiIOStreamIOConverter(w: Int)(implicit p: Parameters) extends Module { + val io = new Bundle { + val nasti = (new NastiIO).flip + val stream = new StreamIO(w) + } + + val streamSize = UInt(log2Up(w / 8)) + assert(!io.nasti.ar.valid || io.nasti.ar.bits.size === streamSize, + "read channel wrong size on stream") + assert(!io.nasti.ar.valid || io.nasti.ar.bits.len === UInt(0) || + io.nasti.ar.bits.burst === BURST_FIXED, + "read channel wrong burst type on stream") + assert(!io.nasti.aw.valid || io.nasti.aw.bits.size === streamSize, + "write channel wrong size on stream") + assert(!io.nasti.aw.valid || io.nasti.aw.bits.len === UInt(0) || + io.nasti.aw.bits.burst === BURST_FIXED, + "write channel wrong burst type on stream") + assert(!io.nasti.w.valid || io.nasti.w.bits.strb.andR, + "write channel cannot take partial writes") + + val read_id = Reg(io.nasti.ar.bits.id) + val read_cnt = Reg(io.nasti.ar.bits.len) + val reading = Reg(init = Bool(false)) + + io.nasti.ar.ready := !reading + io.nasti.r.valid := reading && io.stream.in.valid + io.nasti.r.bits := io.stream.in.bits + io.nasti.r.bits.resp := UInt(0) + io.nasti.r.bits.id := read_id + io.stream.in.ready := reading && io.nasti.r.ready + + when (io.nasti.ar.fire()) { + read_id := io.nasti.ar.bits.id + read_cnt := io.nasti.ar.bits.len + reading := Bool(true) + } + + when (io.nasti.r.fire()) { + when (read_cnt === UInt(0)) { + reading := Bool(false) + } .otherwise { + read_cnt := read_cnt - UInt(1) + } + } + + val write_id = Reg(io.nasti.aw.bits.id) + val writing = Reg(init = Bool(false)) + val write_resp = Reg(init = Bool(false)) + + io.nasti.aw.ready := !writing && !write_resp + io.nasti.w.ready := writing && io.stream.out.ready + io.stream.out.valid := writing && io.nasti.w.valid + io.stream.out.bits := io.nasti.w.bits + io.nasti.b.valid := write_resp + io.nasti.b.bits.resp := UInt(0) + io.nasti.b.bits.id := write_id + + when (io.nasti.aw.fire()) { + write_id := io.nasti.aw.bits.id + writing := Bool(true) + } + + when (io.nasti.w.fire() && io.nasti.w.bits.last) { + writing := Bool(false) + write_resp := Bool(true) + } + + when (io.nasti.b.fire()) { write_resp := Bool(false) } +} + +class StreamNarrower(win: Int, wout: Int) extends Module { + require(win > wout, "Stream narrower input width must be larger than input width") + require(win % wout == 0, "Stream narrower input width must be multiple of output width") + + val io = new Bundle { + val in = Decoupled(new StreamChannel(win)).flip + val out = Decoupled(new StreamChannel(wout)) + } + + val n_pieces = win / wout + val buffer = Reg(Bits(width = win)) + val (piece_idx, pkt_done) = Counter(io.out.fire(), n_pieces) + val pieces = Vec.tabulate(n_pieces) { i => buffer(wout * (i + 1) - 1, wout * i) } + val last_piece = (piece_idx === UInt(n_pieces - 1)) + val sending = Reg(init = Bool(false)) + val in_last = Reg(Bool()) + + when (io.in.fire()) { + buffer := io.in.bits.data + in_last := io.in.bits.last + sending := Bool(true) + } + when (pkt_done) { sending := Bool(false) } + + io.out.valid := sending + io.out.bits.data := pieces(piece_idx) + io.out.bits.last := in_last && last_piece + io.in.ready := !sending +} + +class StreamExpander(win: Int, wout: Int) extends Module { + require(win < wout, "Stream expander input width must be smaller than input width") + require(wout % win == 0, "Stream narrower output width must be multiple of input width") + + val io = new Bundle { + val in = Decoupled(new StreamChannel(win)).flip + val out = Decoupled(new StreamChannel(wout)) + } + + val n_pieces = wout / win + val buffer = Reg(Vec(n_pieces, UInt(width = win))) + val last = Reg(Bool()) + val collecting = Reg(init = Bool(true)) + val (piece_idx, pkt_done) = Counter(io.in.fire(), n_pieces) + + when (io.in.fire()) { buffer(piece_idx) := io.in.bits.data } + when (pkt_done) { last := io.in.bits.last; collecting := Bool(false) } + when (io.out.fire()) { collecting := Bool(true) } + + io.in.ready := collecting + io.out.valid := !collecting + io.out.bits.data := buffer.toBits + io.out.bits.last := last +} + +object StreamUtils { + def connectStreams(a: StreamIO, b: StreamIO) { + a.in <> b.out + b.in <> a.out + } +} + +trait Serializable { + def nbits: Int +} + +class Serializer[T <: Data with Serializable](w: Int, typ: T) extends Module { + val io = new Bundle { + val in = Decoupled(typ).flip + val out = Decoupled(Bits(width = w)) + } + + val narrower = Module(new StreamNarrower(typ.nbits, w)) + narrower.io.in.bits.data := io.in.bits.toBits + narrower.io.in.bits.last := Bool(true) + narrower.io.in.valid := io.in.valid + io.in.ready := narrower.io.in.ready + io.out.valid := narrower.io.out.valid + io.out.bits := narrower.io.out.bits.data + narrower.io.out.ready := io.out.ready +} + +class Deserializer[T <: Data with Serializable](w: Int, typ: T) extends Module { + val io = new Bundle { + val in = Decoupled(Bits(width = w)).flip + val out = Decoupled(typ) + } + + val expander = Module(new StreamExpander(w, typ.nbits)) + expander.io.in.valid := io.in.valid + expander.io.in.bits.data := io.in.bits + expander.io.in.bits.last := Bool(true) + io.in.ready := expander.io.in.ready + io.out.valid := expander.io.out.valid + io.out.bits := typ.cloneType.fromBits(expander.io.out.bits.data) + expander.io.out.ready := io.out.ready +} diff --git a/junctions/src/main/scala/util.scala b/junctions/src/main/scala/util.scala new file mode 100644 index 00000000..f7a19ec8 --- /dev/null +++ b/junctions/src/main/scala/util.scala @@ -0,0 +1,314 @@ +/// See LICENSE for license details. +package junctions +import Chisel._ +import cde.Parameters + +class ParameterizedBundle(implicit p: Parameters) extends Bundle { + override def cloneType = { + try { + this.getClass.getConstructors.head.newInstance(p).asInstanceOf[this.type] + } catch { + case e: java.lang.IllegalArgumentException => + throwException("Unable to use ParamaterizedBundle.cloneType on " + + this.getClass + ", probably because " + this.getClass + + "() takes more than one argument. Consider overriding " + + "cloneType() on " + this.getClass, e) + } + } +} + +class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Module { + val io = new QueueIO(data, entries) + require(entries > 1) + + val do_flow = Wire(Bool()) + val do_enq = io.enq.fire() && !do_flow + val do_deq = io.deq.fire() && !do_flow + + val maybe_full = Reg(init=Bool(false)) + val enq_ptr = Counter(do_enq, entries)._1 + val (deq_ptr, deq_done) = Counter(do_deq, entries) + when (do_enq =/= do_deq) { maybe_full := do_enq } + + val ptr_match = enq_ptr === deq_ptr + val empty = ptr_match && !maybe_full + val full = ptr_match && maybe_full + val atLeastTwo = full || enq_ptr - deq_ptr >= UInt(2) + do_flow := empty && io.deq.ready + + val ram = SeqMem(entries, data) + when (do_enq) { ram.write(enq_ptr, io.enq.bits) } + + val ren = io.deq.ready && (atLeastTwo || !io.deq.valid && !empty) + val raddr = Mux(io.deq.valid, Mux(deq_done, UInt(0), deq_ptr + UInt(1)), deq_ptr) + val ram_out_valid = Reg(next = ren) + + io.deq.valid := Mux(empty, io.enq.valid, ram_out_valid) + io.enq.ready := !full + io.deq.bits := Mux(empty, io.enq.bits, ram.read(raddr, ren)) +} + +class HellaQueue[T <: Data](val entries: Int)(data: => T) extends Module { + val io = new QueueIO(data, entries) + + val fq = Module(new HellaFlowQueue(entries)(data)) + fq.io.enq <> io.enq + io.deq <> Queue(fq.io.deq, 1, pipe = true) +} + +object HellaQueue { + def apply[T <: Data](enq: DecoupledIO[T], entries: Int) = { + val q = Module((new HellaQueue(entries)) { enq.bits }) + q.io.enq.valid := enq.valid // not using <> so that override is allowed + q.io.enq.bits := enq.bits + enq.ready := q.io.enq.ready + q.io.deq + } +} + +/** A generalized locking RR arbiter that addresses the limitations of the + * version in the Chisel standard library */ +abstract class JunctionsAbstractLockingArbiter[T <: Data](typ: T, arbN: Int) + extends Module { + + val io = new Bundle { + val in = Vec(arbN, Decoupled(typ.cloneType)).flip + val out = Decoupled(typ.cloneType) + } + + def rotateLeft[T <: Data](norm: Vec[T], rot: UInt): Vec[T] = { + val n = norm.size + Vec.tabulate(n) { i => + Mux(rot < UInt(n - i), norm(UInt(i) + rot), norm(rot - UInt(n - i))) + } + } + + val lockIdx = Reg(init = UInt(0, log2Up(arbN))) + val locked = Reg(init = Bool(false)) + + val choice = PriorityMux( + rotateLeft(Vec(io.in.map(_.valid)), lockIdx + UInt(1)), + rotateLeft(Vec((0 until arbN).map(UInt(_))), lockIdx + UInt(1))) + + val chosen = Mux(locked, lockIdx, choice) + + for (i <- 0 until arbN) { + io.in(i).ready := io.out.ready && chosen === UInt(i) + } + + io.out.valid := io.in(chosen).valid + io.out.bits := io.in(chosen).bits +} + +/** This locking arbiter determines when it is safe to unlock + * by peeking at the data */ +class JunctionsPeekingArbiter[T <: Data]( + typ: T, arbN: Int, + canUnlock: T => Bool, + needsLock: Option[T => Bool] = None) + extends JunctionsAbstractLockingArbiter(typ, arbN) { + + def realNeedsLock(data: T): Bool = + needsLock.map(_(data)).getOrElse(Bool(true)) + + when (io.out.fire()) { + when (!locked && realNeedsLock(io.out.bits)) { + lockIdx := choice + locked := Bool(true) + } + // the unlock statement takes precedent + when (canUnlock(io.out.bits)) { + locked := Bool(false) + } + } +} + +/** This arbiter determines when it is safe to unlock by counting transactions */ +class JunctionsCountingArbiter[T <: Data]( + typ: T, arbN: Int, count: Int, + val needsLock: Option[T => Bool] = None) + extends JunctionsAbstractLockingArbiter(typ, arbN) { + + def realNeedsLock(data: T): Bool = + needsLock.map(_(data)).getOrElse(Bool(true)) + + // if count is 1, you should use a non-locking arbiter + require(count > 1, "CountingArbiter cannot have count <= 1") + + val lock_ctr = Counter(count) + + when (io.out.fire()) { + when (!locked && realNeedsLock(io.out.bits)) { + lockIdx := choice + locked := Bool(true) + lock_ctr.inc() + } + + when (locked) { + when (lock_ctr.inc()) { locked := Bool(false) } + } + } +} + +class ReorderQueueWrite[T <: Data](dType: T, tagWidth: Int) extends Bundle { + val data = dType.cloneType + val tag = UInt(width = tagWidth) + + override def cloneType = + new ReorderQueueWrite(dType, tagWidth).asInstanceOf[this.type] +} + +class ReorderEnqueueIO[T <: Data](dType: T, tagWidth: Int) + extends DecoupledIO(new ReorderQueueWrite(dType, tagWidth)) { + + override def cloneType = + new ReorderEnqueueIO(dType, tagWidth).asInstanceOf[this.type] +} + +class ReorderDequeueIO[T <: Data](dType: T, tagWidth: Int) extends Bundle { + val valid = Bool(INPUT) + val tag = UInt(INPUT, tagWidth) + val data = dType.cloneType.asOutput + val matches = Bool(OUTPUT) + + override def cloneType = + new ReorderDequeueIO(dType, tagWidth).asInstanceOf[this.type] +} + +class ReorderQueue[T <: Data](dType: T, tagWidth: Int, size: Option[Int] = None) + extends Module { + val io = new Bundle { + val enq = new ReorderEnqueueIO(dType, tagWidth).flip + val deq = new ReorderDequeueIO(dType, tagWidth) + } + + val tagSpaceSize = 1 << tagWidth + val actualSize = size.getOrElse(tagSpaceSize) + + if (tagSpaceSize > actualSize) { + val roq_data = Reg(Vec(actualSize, dType)) + val roq_tags = Reg(Vec(actualSize, UInt(width = tagWidth))) + val roq_free = Reg(init = Vec.fill(actualSize)(Bool(true))) + + val roq_enq_addr = PriorityEncoder(roq_free) + val roq_matches = roq_tags.zip(roq_free) + .map { case (tag, free) => tag === io.deq.tag && !free } + val roq_deq_onehot = PriorityEncoderOH(roq_matches) + + io.enq.ready := roq_free.reduce(_ || _) + io.deq.data := Mux1H(roq_deq_onehot, roq_data) + io.deq.matches := roq_matches.reduce(_ || _) + + when (io.enq.valid && io.enq.ready) { + roq_data(roq_enq_addr) := io.enq.bits.data + roq_tags(roq_enq_addr) := io.enq.bits.tag + roq_free(roq_enq_addr) := Bool(false) + } + + when (io.deq.valid) { + roq_free(OHToUInt(roq_deq_onehot)) := Bool(true) + } + + println(s"Warning - using a CAM for ReorderQueue, tagBits: ${tagWidth} size: ${actualSize}") + } else { + val roq_data = Mem(tagSpaceSize, dType) + val roq_free = Reg(init = Vec.fill(tagSpaceSize)(Bool(true))) + + io.enq.ready := roq_free(io.enq.bits.tag) + io.deq.data := roq_data(io.deq.tag) + io.deq.matches := !roq_free(io.deq.tag) + + when (io.enq.valid && io.enq.ready) { + roq_data(io.enq.bits.tag) := io.enq.bits.data + roq_free(io.enq.bits.tag) := Bool(false) + } + + when (io.deq.valid) { + roq_free(io.deq.tag) := Bool(true) + } + } +} + +object DecoupledHelper { + def apply(rvs: Bool*) = new DecoupledHelper(rvs) +} + +class DecoupledHelper(val rvs: Seq[Bool]) { + def fire(exclude: Bool, includes: Bool*) = { + (rvs.filter(_ ne exclude) ++ includes).reduce(_ && _) + } +} + +class MultiWidthFifo(inW: Int, outW: Int, n: Int) extends Module { + val io = new Bundle { + val in = Decoupled(Bits(width = inW)).flip + val out = Decoupled(Bits(width = outW)) + val count = UInt(OUTPUT, log2Up(n + 1)) + } + + if (inW == outW) { + val q = Module(new Queue(Bits(width = inW), n)) + q.io.enq <> io.in + io.out <> q.io.deq + io.count := q.io.count + } else if (inW > outW) { + val nBeats = inW / outW + + require(inW % outW == 0, s"MultiWidthFifo: in: $inW not divisible by out: $outW") + require(n % nBeats == 0, s"Cannot store $n output words when output beats is $nBeats") + + val wdata = Reg(Vec(n / nBeats, Bits(width = inW))) + val rdata = Vec(wdata.flatMap { indat => + (0 until nBeats).map(i => indat(outW * (i + 1) - 1, outW * i)) }) + + val head = Reg(init = UInt(0, log2Up(n / nBeats))) + val tail = Reg(init = UInt(0, log2Up(n))) + val size = Reg(init = UInt(0, log2Up(n + 1))) + + when (io.in.fire()) { + wdata(head) := io.in.bits + head := head + UInt(1) + } + + when (io.out.fire()) { tail := tail + UInt(1) } + + size := MuxCase(size, Seq( + (io.in.fire() && io.out.fire()) -> (size + UInt(nBeats - 1)), + io.in.fire() -> (size + UInt(nBeats)), + io.out.fire() -> (size - UInt(1)))) + + io.out.valid := size > UInt(0) + io.out.bits := rdata(tail) + io.in.ready := size < UInt(n) + io.count := size + } else { + val nBeats = outW / inW + + require(outW % inW == 0, s"MultiWidthFifo: out: $outW not divisible by in: $inW") + + val wdata = Reg(Vec(n * nBeats, Bits(width = inW))) + val rdata = Vec.tabulate(n) { i => + Cat(wdata.slice(i * nBeats, (i + 1) * nBeats).reverse)} + + val head = Reg(init = UInt(0, log2Up(n * nBeats))) + val tail = Reg(init = UInt(0, log2Up(n))) + val size = Reg(init = UInt(0, log2Up(n * nBeats + 1))) + + when (io.in.fire()) { + wdata(head) := io.in.bits + head := head + UInt(1) + } + + when (io.out.fire()) { tail := tail + UInt(1) } + + size := MuxCase(size, Seq( + (io.in.fire() && io.out.fire()) -> (size - UInt(nBeats - 1)), + io.in.fire() -> (size + UInt(1)), + io.out.fire() -> (size - UInt(nBeats)))) + + io.count := size >> UInt(log2Up(nBeats)) + io.out.valid := io.count > UInt(0) + io.out.bits := rdata(tail) + io.in.ready := size < UInt(n * nBeats) + } +}