diff --git a/uncore/.gitignore b/uncore/.gitignore
new file mode 100644
index 00000000..555feb41
--- /dev/null
+++ b/uncore/.gitignore
@@ -0,0 +1,2 @@
+target/
+project/target/
diff --git a/uncore/LICENSE b/uncore/LICENSE
new file mode 100644
index 00000000..7cff15e4
--- /dev/null
+++ b/uncore/LICENSE
@@ -0,0 +1,24 @@
+Copyright (c) 2012-2014, The Regents of the University of California
+(Regents). All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+3. Neither the name of the Regents nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
+SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
+OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS
+BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
+HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
+MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
diff --git a/uncore/README.md b/uncore/README.md
new file mode 100644
index 00000000..3a628a24
--- /dev/null
+++ b/uncore/README.md
@@ -0,0 +1,12 @@
+Uncore Library
+==============
+
+This is the repository for uncore components assosciated with Rocket chip
+project. To uses these modules, include this repo as a git submodule within
+the your chip repository and add it as a project in your chip's build.scala.
+These components are only dependent on the ucb-bar/chisel repo, i.e.
+
+ lazy val uncore = project.dependsOn(chisel)
+
+ScalaDoc for the uncore library is available here
+and an overview of the TileLink Protocol is available here, with associated CoherencePolicy documentation here.
diff --git a/uncore/build.sbt b/uncore/build.sbt
new file mode 100644
index 00000000..120670b5
--- /dev/null
+++ b/uncore/build.sbt
@@ -0,0 +1,19 @@
+organization := "edu.berkeley.cs"
+
+version := "2.0"
+
+name := "uncore"
+
+scalaVersion := "2.11.6"
+
+// Provide a managed dependency on X if -DXVersion="" is supplied on the command line.
+libraryDependencies ++= (Seq("chisel","junctions","cde").map {
+ dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten
+
+site.settings
+
+site.includeScaladoc()
+
+ghpages.settings
+
+git.remoteRepo := "git@github.com:ucb-bar/uncore.git"
diff --git a/uncore/project/plugins.sbt b/uncore/project/plugins.sbt
new file mode 100644
index 00000000..4f4825c4
--- /dev/null
+++ b/uncore/project/plugins.sbt
@@ -0,0 +1,5 @@
+resolvers += "jgit-repo" at "http://download.eclipse.org/jgit/maven"
+
+addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.5.3")
+
+addSbtPlugin("com.typesafe.sbt" % "sbt-site" % "0.8.1")
diff --git a/uncore/src/main/scala/Builder.scala b/uncore/src/main/scala/Builder.scala
new file mode 100644
index 00000000..ba2c296a
--- /dev/null
+++ b/uncore/src/main/scala/Builder.scala
@@ -0,0 +1,117 @@
+package uncore
+
+import Chisel._
+import cde.{Config, Parameters, ParameterDump, Knob, Dump}
+import junctions.PAddrBits
+import uncore.tilelink._
+import uncore.agents._
+import uncore.coherence._
+
+object UncoreBuilder extends App with FileSystemUtilities {
+ val topModuleName = args(0)
+ val configClassName = args(1)
+ val config = try {
+ Class.forName(s"uncore.$configClassName").newInstance.asInstanceOf[Config]
+ } catch {
+ case e: java.lang.ClassNotFoundException =>
+ throwException("Unable to find configClassName \"" + configClassName +
+ "\", did you misspell it?", e)
+ }
+ val world = config.toInstance
+ val paramsFromConfig: Parameters = Parameters.root(world)
+
+ val gen = () =>
+ Class.forName(s"uncore.$topModuleName")
+ .getConstructor(classOf[cde.Parameters])
+ .newInstance(paramsFromConfig)
+ .asInstanceOf[Module]
+
+ chiselMain.run(args.drop(2), gen)
+
+ val pdFile = createOutputFile(s"$topModuleName.prm")
+ pdFile.write(ParameterDump.getDump)
+ pdFile.close
+
+}
+
+class DefaultL2Config extends Config (
+ topDefinitions = { (pname,site,here) =>
+ pname match {
+ case PAddrBits => 32
+ case CacheId => 0
+ case CacheName => "L2Bank"
+ case TLId => "L1toL2"
+ case InnerTLId => "L1toL2"
+ case OuterTLId => "L2toMC"
+ case "N_CACHED" => Dump("N_CACHED",here[Int]("CACHED_CLIENTS_PER_PORT"))
+ case "N_UNCACHED" => Dump("N_UNCACHED",here[Int]("MAX_CLIENTS_PER_PORT") - here[Int]("N_CACHED"))
+ case "MAX_CLIENT_XACTS" => 4
+ case "MAX_CLIENTS_PER_PORT" => Knob("NTILES")
+ case "CACHED_CLIENTS_PER_PORT" => Knob("N_CACHED_TILES")
+ case TLKey("L1toL2") =>
+ TileLinkParameters(
+ coherencePolicy = new MESICoherence(site(L2DirectoryRepresentation)),
+ nManagers = 1,
+ nCachingClients = here[Int]("N_CACHED"),
+ nCachelessClients = here[Int]("N_UNCACHED"),
+ maxClientXacts = here[Int]("MAX_CLIENT_XACTS"),
+ maxClientsPerPort = here[Int]("MAX_CLIENTS_PER_PORT"),
+ maxManagerXacts = site(NAcquireTransactors) + 2,
+ dataBits = site(CacheBlockBytes)*8,
+ dataBeats = 2)
+ case TLKey("L2toMC") =>
+ TileLinkParameters(
+ coherencePolicy = new MEICoherence(new NullRepresentation(1)),
+ nManagers = 1,
+ nCachingClients = 1,
+ nCachelessClients = 0,
+ maxClientXacts = 1,
+ maxClientsPerPort = site(NAcquireTransactors) + 2,
+ maxManagerXacts = 1,
+ dataBits = site(CacheBlockBytes)*8,
+ dataBeats = 2)
+ case CacheBlockBytes => 64
+ case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes))
+ case "L2_SETS" => Knob("L2_SETS")
+ case NSets => Dump("L2_SETS",here[Int]("L2_SETS"))
+ case NWays => Knob("L2_WAYS")
+ case RowBits => site(TLKey(site(TLId))).dataBitsPerBeat
+ case CacheIdBits => Dump("CACHE_ID_BITS",1)
+ case L2StoreDataQueueDepth => 1
+ case NAcquireTransactors => Dump("N_ACQUIRE_TRANSACTORS",2)
+ case NSecondaryMisses => 4
+ case L2DirectoryRepresentation => new FullRepresentation(here[Int]("N_CACHED"))
+ case L2Replacer => () => new SeqRandom(site(NWays))
+ case ECCCode => None
+ case AmoAluOperandBits => 64
+ case SplitMetadata => false
+ // case XLen => 128
+ }},
+ knobValues = {
+ case "L2_WAYS" => 1
+ case "L2_SETS" => 1024
+ case "NTILES" => 2
+ case "N_CACHED_TILES" => 2
+ case "L2_CAPACITY_IN_KB" => 256
+ }
+)
+
+class WithPLRU extends Config(
+ (pname, site, here) => pname match {
+ case L2Replacer => () => new SeqPLRU(site(NSets), site(NWays))
+ })
+
+class PLRUL2Config extends Config(new WithPLRU ++ new DefaultL2Config)
+
+class With1L2Ways extends Config(knobValues = { case "L2_WAYS" => 1 })
+class With2L2Ways extends Config(knobValues = { case "L2_WAYS" => 2 })
+class With4L2Ways extends Config(knobValues = { case "L2_WAYS" => 4 })
+
+class With1Cached extends Config(knobValues = { case "N_CACHED_TILES" => 1 })
+class With2Cached extends Config(knobValues = { case "N_CACHED_TILES" => 2 })
+
+
+class W1Cached1WaysConfig extends Config(new With1L2Ways ++ new With1Cached ++ new DefaultL2Config)
+class W1Cached2WaysConfig extends Config(new With2L2Ways ++ new With1Cached ++ new DefaultL2Config)
+class W2Cached1WaysConfig extends Config(new With1L2Ways ++ new With2Cached ++ new DefaultL2Config)
+class W2Cached2WaysConfig extends Config(new With2L2Ways ++ new With2Cached ++ new DefaultL2Config)
diff --git a/uncore/src/main/scala/Consts.scala b/uncore/src/main/scala/Consts.scala
new file mode 100644
index 00000000..a4a4e93b
--- /dev/null
+++ b/uncore/src/main/scala/Consts.scala
@@ -0,0 +1,50 @@
+// See LICENSE for license details.
+
+package uncore
+package constants
+
+import Chisel._
+
+object MemoryOpConstants extends MemoryOpConstants
+trait MemoryOpConstants {
+ val MT_SZ = 3
+ val MT_X = BitPat("b???")
+ val MT_B = UInt("b000")
+ val MT_H = UInt("b001")
+ val MT_W = UInt("b010")
+ val MT_D = UInt("b011")
+ val MT_BU = UInt("b100")
+ val MT_HU = UInt("b101")
+ val MT_WU = UInt("b110")
+ val MT_Q = UInt("b111")
+
+ val NUM_XA_OPS = 9
+ val M_SZ = 5
+ val M_X = BitPat("b?????");
+ val M_XRD = UInt("b00000"); // int load
+ val M_XWR = UInt("b00001"); // int store
+ val M_PFR = UInt("b00010"); // prefetch with intent to read
+ val M_PFW = UInt("b00011"); // prefetch with intent to write
+ val M_XA_SWAP = UInt("b00100");
+ val M_FLUSH_ALL = UInt("b00101") // flush all lines
+ val M_XLR = UInt("b00110");
+ val M_XSC = UInt("b00111");
+ val M_XA_ADD = UInt("b01000");
+ val M_XA_XOR = UInt("b01001");
+ val M_XA_OR = UInt("b01010");
+ val M_XA_AND = UInt("b01011");
+ val M_XA_MIN = UInt("b01100");
+ val M_XA_MAX = UInt("b01101");
+ val M_XA_MINU = UInt("b01110");
+ val M_XA_MAXU = UInt("b01111");
+ val M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions
+ val M_PRODUCE = UInt("b10001") // write back dirty data and cede W permissions
+ val M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions
+
+ def isAMO(cmd: UInt) = cmd(3) || cmd === M_XA_SWAP
+ def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW
+ def isRead(cmd: UInt) = cmd === M_XRD || cmd === M_XLR || cmd === M_XSC || isAMO(cmd)
+ def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_XSC || isAMO(cmd)
+ def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR
+}
+
diff --git a/uncore/src/main/scala/Package.scala b/uncore/src/main/scala/Package.scala
new file mode 100644
index 00000000..c9a35dbb
--- /dev/null
+++ b/uncore/src/main/scala/Package.scala
@@ -0,0 +1,4 @@
+// See LICENSE for license details.
+package uncore
+
+package object constants extends uncore.constants.MemoryOpConstants
diff --git a/uncore/src/main/scala/Util.scala b/uncore/src/main/scala/Util.scala
new file mode 100644
index 00000000..aceee5c2
--- /dev/null
+++ b/uncore/src/main/scala/Util.scala
@@ -0,0 +1,11 @@
+package uncore
+
+import Chisel._
+
+package object Util {
+ implicit class UIntIsOneOf(val x: UInt) extends AnyVal {
+ def isOneOf(s: Seq[UInt]): Bool = s.map(x === _).reduce(_||_)
+
+ def isOneOf(u1: UInt, u2: UInt*): Bool = isOneOf(u1 +: u2.toSeq)
+ }
+}
diff --git a/uncore/src/main/scala/agents/Agents.scala b/uncore/src/main/scala/agents/Agents.scala
new file mode 100644
index 00000000..b7519633
--- /dev/null
+++ b/uncore/src/main/scala/agents/Agents.scala
@@ -0,0 +1,161 @@
+// See LICENSE for license details.
+
+package uncore.agents
+
+import Chisel._
+import cde.{Parameters, Field}
+import junctions._
+import uncore.tilelink._
+import uncore.converters._
+import uncore.coherence._
+
+case object NReleaseTransactors extends Field[Int]
+case object NProbeTransactors extends Field[Int]
+case object NAcquireTransactors extends Field[Int]
+
+trait HasCoherenceAgentParameters {
+ implicit val p: Parameters
+ val nReleaseTransactors = 1
+ val nAcquireTransactors = p(NAcquireTransactors)
+ val nTransactors = nReleaseTransactors + nAcquireTransactors
+ val blockAddrBits = p(PAddrBits) - p(CacheBlockOffsetBits)
+ val outerTLId = p(OuterTLId)
+ val outerTLParams = p(TLKey(outerTLId))
+ val outerDataBeats = outerTLParams.dataBeats
+ val outerDataBits = outerTLParams.dataBitsPerBeat
+ val outerBeatAddrBits = log2Up(outerDataBeats)
+ val outerByteAddrBits = log2Up(outerDataBits/8)
+ val outerWriteMaskBits = outerTLParams.writeMaskBits
+ val innerTLId = p(InnerTLId)
+ val innerTLParams = p(TLKey(innerTLId))
+ val innerDataBeats = innerTLParams.dataBeats
+ val innerDataBits = innerTLParams.dataBitsPerBeat
+ val innerWriteMaskBits = innerTLParams.writeMaskBits
+ val innerBeatAddrBits = log2Up(innerDataBeats)
+ val innerByteAddrBits = log2Up(innerDataBits/8)
+ val innerNCachingClients = innerTLParams.nCachingClients
+ val maxManagerXacts = innerTLParams.maxManagerXacts
+ require(outerDataBeats == innerDataBeats) //TODO: fix all xact_data Vecs to remove this requirement
+}
+
+abstract class CoherenceAgentModule(implicit val p: Parameters) extends Module
+ with HasCoherenceAgentParameters
+abstract class CoherenceAgentBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p)
+ with HasCoherenceAgentParameters
+
+trait HasCoherenceAgentWiringHelpers {
+ def doOutputArbitration[T <: TileLinkChannel](
+ out: DecoupledIO[T],
+ ins: Seq[DecoupledIO[T]]) {
+ def lock(o: T) = o.hasMultibeatData()
+ val arb = Module(new LockingRRArbiter(out.bits, ins.size, out.bits.tlDataBeats, Some(lock _)))
+ out <> arb.io.out
+ arb.io.in <> ins
+ }
+
+ def doInputRouting[T <: Bundle with HasManagerTransactionId](
+ in: DecoupledIO[T],
+ outs: Seq[DecoupledIO[T]]) {
+ val idx = in.bits.manager_xact_id
+ outs.map(_.bits := in.bits)
+ outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && idx === UInt(i) }
+ in.ready := Vec(outs.map(_.ready)).read(idx)
+ }
+
+ /** Broadcasts valid messages on this channel to all trackers,
+ * but includes logic to allocate a new tracker in the case where
+ * no previously allocated tracker matches the new req's addr.
+ *
+ * When a match is reported, if ready is high the new transaction
+ * is merged; when ready is low the transaction is being blocked.
+ * When no match is reported, any high idles are presumed to be
+ * from trackers that are available for allocation, and one is
+ * assigned via alloc based on priority; if no idles are high then
+ * all trackers are busy with other transactions. If idle is high
+ * but ready is low, the tracker will be allocated but does not
+ * have sufficient buffering for the data.
+ */
+ def doInputRoutingWithAllocation[T <: TileLinkChannel with HasTileLinkData](
+ in: DecoupledIO[T],
+ outs: Seq[DecoupledIO[T]],
+ allocs: Seq[TrackerAllocation],
+ dataOverrides: Option[Seq[UInt]] = None,
+ allocOverride: Option[Bool] = None,
+ matchOverride: Option[Bool] = None) {
+ val ready_bits = Vec(outs.map(_.ready)).toBits
+ val can_alloc_bits = Vec(allocs.map(_.can)).toBits
+ val should_alloc_bits = PriorityEncoderOH(can_alloc_bits)
+ val match_bits = Vec(allocs.map(_.matches)).toBits
+ val no_matches = !match_bits.orR
+ val alloc_ok = allocOverride.getOrElse(Bool(true))
+ val match_ok = matchOverride.getOrElse(Bool(true))
+ in.ready := (Mux(no_matches, can_alloc_bits, match_bits) & ready_bits).orR && alloc_ok && match_ok
+ outs.zip(allocs).zipWithIndex.foreach { case((out, alloc), i) =>
+ out.valid := in.valid && match_ok && alloc_ok
+ out.bits := in.bits
+ dataOverrides foreach { d => out.bits.data := d(i) }
+ alloc.should := should_alloc_bits(i) && no_matches && alloc_ok
+ }
+ }
+}
+
+trait HasInnerTLIO extends HasCoherenceAgentParameters {
+ val inner = new ManagerTileLinkIO()(p.alterPartial({case TLId => p(InnerTLId)}))
+ val incoherent = Vec(inner.tlNCachingClients, Bool()).asInput
+ def iacq(dummy: Int = 0) = inner.acquire.bits
+ def iprb(dummy: Int = 0) = inner.probe.bits
+ def irel(dummy: Int = 0) = inner.release.bits
+ def ignt(dummy: Int = 0) = inner.grant.bits
+ def ifin(dummy: Int = 0) = inner.finish.bits
+}
+
+trait HasUncachedOuterTLIO extends HasCoherenceAgentParameters {
+ val outer = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => p(OuterTLId)}))
+ def oacq(dummy: Int = 0) = outer.acquire.bits
+ def ognt(dummy: Int = 0) = outer.grant.bits
+}
+
+trait HasCachedOuterTLIO extends HasCoherenceAgentParameters {
+ val outer = new ClientTileLinkIO()(p.alterPartial({case TLId => p(OuterTLId)}))
+ def oacq(dummy: Int = 0) = outer.acquire.bits
+ def oprb(dummy: Int = 0) = outer.probe.bits
+ def orel(dummy: Int = 0) = outer.release.bits
+ def ognt(dummy: Int = 0) = outer.grant.bits
+}
+
+class ManagerTLIO(implicit p: Parameters) extends CoherenceAgentBundle()(p)
+ with HasInnerTLIO
+ with HasUncachedOuterTLIO
+
+abstract class CoherenceAgent(implicit p: Parameters) extends CoherenceAgentModule()(p) {
+ def innerTL: ManagerTileLinkIO
+ def outerTL: ClientTileLinkIO
+ def incoherent: Vec[Bool]
+}
+
+abstract class ManagerCoherenceAgent(implicit p: Parameters) extends CoherenceAgent()(p)
+ with HasCoherenceAgentWiringHelpers {
+ val io = new ManagerTLIO
+ def innerTL = io.inner
+ def outerTL = TileLinkIOWrapper(io.outer)(p.alterPartial({case TLId => p(OuterTLId)}))
+ def incoherent = io.incoherent
+}
+
+class HierarchicalTLIO(implicit p: Parameters) extends CoherenceAgentBundle()(p)
+ with HasInnerTLIO
+ with HasCachedOuterTLIO
+
+abstract class HierarchicalCoherenceAgent(implicit p: Parameters) extends CoherenceAgent()(p)
+ with HasCoherenceAgentWiringHelpers {
+ val io = new HierarchicalTLIO
+ def innerTL = io.inner
+ def outerTL = io.outer
+ def incoherent = io.incoherent
+
+ // TODO: Remove this function (and all its calls) when we support probing the L2
+ def disconnectOuterProbeAndFinish() {
+ io.outer.probe.ready := Bool(false)
+ io.outer.finish.valid := Bool(false)
+ assert(!io.outer.probe.valid, "L2 agent got illegal probe")
+ }
+}
diff --git a/uncore/src/main/scala/agents/Broadcast.scala b/uncore/src/main/scala/agents/Broadcast.scala
new file mode 100644
index 00000000..9845342e
--- /dev/null
+++ b/uncore/src/main/scala/agents/Broadcast.scala
@@ -0,0 +1,204 @@
+// See LICENSE for license details.
+
+package uncore.agents
+
+import Chisel._
+import uncore.coherence._
+import uncore.tilelink._
+import uncore.constants._
+import uncore.Util._
+import cde.Parameters
+
+class L2BroadcastHub(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) {
+
+ // Create TSHRs for outstanding transactions
+ val irelTrackerList =
+ (0 until nReleaseTransactors).map(id =>
+ Module(new BufferedBroadcastVoluntaryReleaseTracker(id)))
+ val iacqTrackerList =
+ (nReleaseTransactors until nTransactors).map(id =>
+ Module(new BufferedBroadcastAcquireTracker(id)))
+ val trackerList = irelTrackerList ++ iacqTrackerList
+
+ // Propagate incoherence flags
+ trackerList.map(_.io.incoherent) foreach { _ := io.incoherent }
+
+ // Create an arbiter for the one memory port
+ val outerList = trackerList.map(_.io.outer)
+ val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size)
+ (p.alterPartial({ case TLId => p(OuterTLId) })))
+ outer_arb.io.in <> outerList
+ io.outer <> outer_arb.io.out
+
+ // Handle acquire transaction initiation
+ val irel_vs_iacq_conflict =
+ io.inner.acquire.valid &&
+ io.inner.release.valid &&
+ io.irel().conflicts(io.iacq())
+
+ doInputRoutingWithAllocation(
+ in = io.inner.acquire,
+ outs = trackerList.map(_.io.inner.acquire),
+ allocs = trackerList.map(_.io.alloc.iacq),
+ allocOverride = Some(!irel_vs_iacq_conflict))
+
+ // Handle releases, which might be voluntary and might have data
+ doInputRoutingWithAllocation(
+ in = io.inner.release,
+ outs = trackerList.map(_.io.inner.release),
+ allocs = trackerList.map(_.io.alloc.irel))
+
+ // Wire probe requests and grant reply to clients, finish acks from clients
+ doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe))
+
+ doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant))
+
+ doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish))
+
+ disconnectOuterProbeAndFinish()
+}
+
+class BroadcastXactTracker(implicit p: Parameters) extends XactTracker()(p) {
+ val io = new HierarchicalXactTrackerIO
+ pinAllReadyValidLow(io)
+}
+
+trait BroadcastsToAllClients extends HasCoherenceAgentParameters {
+ val coh = HierarchicalMetadata.onReset
+ val inner_coh = coh.inner
+ val outer_coh = coh.outer
+ def full_representation = ~UInt(0, width = innerNCachingClients)
+}
+
+abstract class BroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters)
+ extends VoluntaryReleaseTracker(trackerId)(p)
+ with EmitsVoluntaryReleases
+ with BroadcastsToAllClients {
+ val io = new HierarchicalXactTrackerIO
+ pinAllReadyValidLow(io)
+
+ // Checks for illegal behavior
+ assert(!(state === s_idle && io.inner.release.fire() && io.alloc.irel.should && !io.irel().isVoluntary()),
+ "VoluntaryReleaseTracker accepted Release that wasn't voluntary!")
+}
+
+abstract class BroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters)
+ extends AcquireTracker(trackerId)(p)
+ with EmitsVoluntaryReleases
+ with BroadcastsToAllClients {
+ val io = new HierarchicalXactTrackerIO
+ pinAllReadyValidLow(io)
+
+ val alwaysWriteFullBeat = false
+ val nSecondaryMisses = 1
+ def iacq_can_merge = Bool(false)
+
+ // Checks for illegal behavior
+ // TODO: this could be allowed, but is a useful check against allocation gone wild
+ assert(!(state === s_idle && io.inner.acquire.fire() && io.alloc.iacq.should &&
+ io.iacq().hasMultibeatData() && !io.iacq().first()),
+ "AcquireTracker initialized with a tail data beat.")
+
+ assert(!(state =/= s_idle && pending_ignt && xact_iacq.isPrefetch()),
+ "Broadcast Hub does not support Prefetches.")
+
+ assert(!(state =/= s_idle && pending_ignt && xact_iacq.isAtomic()),
+ "Broadcast Hub does not support PutAtomics.")
+}
+
+class BufferedBroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters)
+ extends BroadcastVoluntaryReleaseTracker(trackerId)(p)
+ with HasDataBuffer {
+
+ // Tell the parent if any incoming messages conflict with the ongoing transaction
+ routeInParent(irelCanAlloc = Bool(true))
+
+ // Start transaction by accepting inner release
+ innerRelease(block_vol_ignt = pending_orel || vol_ognt_counter.pending)
+
+ // A release beat can be accepted if we are idle, if its a mergeable transaction, or if its a tail beat
+ io.inner.release.ready := state === s_idle || irel_can_merge || irel_same_xact
+
+ when(io.inner.release.fire()) { data_buffer(io.irel().addr_beat) := io.irel().data }
+
+ // Dispatch outer release
+ outerRelease(
+ coh = outer_coh.onHit(M_XWR),
+ data = data_buffer(vol_ognt_counter.up.idx),
+ add_pending_send_bit = irel_is_allocating)
+
+ quiesce() {}
+}
+
+class BufferedBroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters)
+ extends BroadcastAcquireTracker(trackerId)(p)
+ with HasByteWriteMaskBuffer {
+
+ // Setup IOs used for routing in the parent
+ routeInParent(iacqCanAlloc = Bool(true))
+
+ // First, take care of accpeting new acquires or secondary misses
+ // Handling of primary and secondary misses' data and write mask merging
+ innerAcquire(
+ can_alloc = Bool(false),
+ next = s_inner_probe)
+
+ io.inner.acquire.ready := state === s_idle || iacq_can_merge || iacq_same_xact_multibeat
+
+ // Track which clients yet need to be probed and make Probe message
+ // If a writeback occurs, we can forward its data via the buffer,
+ // and skip having to go outwards
+ val skip_outer_acquire = pending_ignt_data.andR
+
+ innerProbe(
+ inner_coh.makeProbe(curr_probe_dst, xact_iacq, xact_addr_block),
+ Mux(!skip_outer_acquire, s_outer_acquire, s_busy))
+
+ // Handle incoming releases from clients, which may reduce sharer counts
+ // and/or write back dirty data, and may be unexpected voluntary releases
+ def irel_can_merge = io.irel().conflicts(xact_addr_block) &&
+ io.irel().isVoluntary() &&
+ !state.isOneOf(s_idle, s_meta_write) &&
+ !all_pending_done &&
+ !io.outer.grant.fire() &&
+ !io.inner.grant.fire() &&
+ !vol_ignt_counter.pending &&
+ !blockInnerRelease()
+
+ innerRelease(block_vol_ignt = vol_ognt_counter.pending)
+
+ //TODO: accept vol irels when state === s_idle, operate like the VolRelTracker
+ io.inner.release.ready := irel_can_merge || irel_same_xact
+
+ mergeDataInner(io.inner.release)
+
+ // If there was a writeback, forward it outwards
+ outerRelease(
+ coh = outer_coh.onHit(M_XWR),
+ data = data_buffer(vol_ognt_counter.up.idx))
+
+ // Send outer request for miss
+ outerAcquire(
+ caching = !xact_iacq.isBuiltInType(),
+ coh = outer_coh,
+ data = data_buffer(ognt_counter.up.idx),
+ wmask = wmask_buffer(ognt_counter.up.idx),
+ next = s_busy)
+
+ // Handle the response from outer memory
+ mergeDataOuter(io.outer.grant)
+
+ // Acknowledge or respond with data
+ innerGrant(
+ data = data_buffer(ignt_data_idx),
+ external_pending = pending_orel || ognt_counter.pending || vol_ognt_counter.pending)
+
+ when(iacq_is_allocating) {
+ initializeProbes()
+ }
+
+ initDataInner(io.inner.acquire, iacq_is_allocating || iacq_is_merging)
+
+ // Wait for everything to quiesce
+ quiesce() { clearWmaskBuffer() }
+}
diff --git a/uncore/src/main/scala/agents/Bufferless.scala b/uncore/src/main/scala/agents/Bufferless.scala
new file mode 100644
index 00000000..5371d74a
--- /dev/null
+++ b/uncore/src/main/scala/agents/Bufferless.scala
@@ -0,0 +1,162 @@
+// See LICENSE for license details.
+
+package uncore.agents
+
+import Chisel._
+import uncore.coherence._
+import uncore.tilelink._
+import uncore.constants._
+import cde.Parameters
+
+
+class BufferlessBroadcastHub(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) {
+
+ // Create TSHRs for outstanding transactions
+ val irelTrackerList =
+ (0 until nReleaseTransactors).map(id =>
+ Module(new BufferlessBroadcastVoluntaryReleaseTracker(id)))
+ val iacqTrackerList =
+ (nReleaseTransactors until nTransactors).map(id =>
+ Module(new BufferlessBroadcastAcquireTracker(id)))
+ val trackerList = irelTrackerList ++ iacqTrackerList
+
+ // Propagate incoherence flags
+ trackerList.map(_.io.incoherent) foreach { _ := io.incoherent }
+
+ // Create an arbiter for the one memory port
+ val outerList = trackerList.map(_.io.outer)
+ val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size)
+ (p.alterPartial({ case TLId => p(OuterTLId) })))
+ outer_arb.io.in <> outerList
+ io.outer <> outer_arb.io.out
+
+ val iacq = Queue(io.inner.acquire, 1, pipe=true)
+ val irel = Queue(io.inner.release, 1, pipe=true)
+
+ // Handle acquire transaction initiation
+ val irel_vs_iacq_conflict =
+ iacq.valid &&
+ irel.valid &&
+ irel.bits.conflicts(iacq.bits)
+
+ doInputRoutingWithAllocation(
+ in = iacq,
+ outs = trackerList.map(_.io.inner.acquire),
+ allocs = trackerList.map(_.io.alloc.iacq),
+ allocOverride = Some(!irel_vs_iacq_conflict))
+ io.outer.acquire.bits.data := iacq.bits.data
+ when (io.oacq().hasData()) {
+ io.outer.acquire.bits.addr_beat := iacq.bits.addr_beat
+ }
+
+ // Handle releases, which might be voluntary and might have data
+ doInputRoutingWithAllocation(
+ in = irel,
+ outs = trackerList.map(_.io.inner.release),
+ allocs = trackerList.map(_.io.alloc.irel))
+ io.outer.release.bits.data := irel.bits.data
+ when (io.orel().hasData()) {
+ io.outer.release.bits.addr_beat := irel.bits.addr_beat
+ }
+
+ // Wire probe requests and grant reply to clients, finish acks from clients
+ doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe))
+
+ doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant))
+ io.inner.grant.bits.data := io.outer.grant.bits.data
+ io.inner.grant.bits.addr_beat := io.outer.grant.bits.addr_beat
+
+ doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish))
+
+ disconnectOuterProbeAndFinish()
+}
+
+class BufferlessBroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters)
+ extends BroadcastVoluntaryReleaseTracker(trackerId)(p) {
+
+ // Tell the parent if any incoming messages conflict with the ongoing transaction
+ routeInParent(irelCanAlloc = Bool(true))
+
+ // Start transaction by accepting inner release
+ innerRelease(block_vol_ignt = pending_orel || vol_ognt_counter.pending)
+
+ // A release beat can be accepted if we are idle, if its a mergeable transaction, or if its a tail beat
+ // and if the outer relase path is clear
+ io.inner.release.ready := Mux(io.irel().hasData(),
+ (state =/= s_idle) && (irel_can_merge || irel_same_xact) && io.outer.release.ready,
+ (state === s_idle) || irel_can_merge || irel_same_xact)
+
+ // Dispatch outer release
+ outerRelease(coh = outer_coh.onHit(M_XWR), buffering = Bool(false))
+
+ quiesce() {}
+}
+
+class BufferlessBroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters)
+ extends BroadcastAcquireTracker(trackerId)(p) {
+
+ // Setup IOs used for routing in the parent
+ routeInParent(iacqCanAlloc = Bool(true))
+
+ // First, take care of accpeting new acquires or secondary misses
+ // Handling of primary and secondary misses' data and write mask merging
+ innerAcquire(
+ can_alloc = Bool(false),
+ next = s_inner_probe)
+
+ // We are never going to merge anything in the bufferless hub
+ // Therefore, we only need to concern ourselves with the allocated
+ // transaction and (in case of PutBlock) subsequent tail beats
+ val iacq_can_forward = iacq_same_xact && !vol_ognt_counter.pending
+ io.inner.acquire.ready := Mux(io.iacq().hasData(),
+ state === s_outer_acquire && iacq_can_forward && io.outer.acquire.ready,
+ state === s_idle && io.alloc.iacq.should)
+
+ // Track which clients yet need to be probed and make Probe message
+ innerProbe(
+ inner_coh.makeProbe(curr_probe_dst, xact_iacq, xact_addr_block),
+ s_outer_acquire)
+
+ // Handle incoming releases from clients, which may reduce sharer counts
+ // and/or write back dirty data, and may be unexpected voluntary releases
+ def irel_can_merge = io.irel().conflicts(xact_addr_block) &&
+ io.irel().isVoluntary() &&
+ !vol_ignt_counter.pending &&
+ !(io.irel().hasData() && ognt_counter.pending) &&
+ (state =/= s_idle)
+
+ innerRelease(block_vol_ignt = vol_ognt_counter.pending)
+
+ val irel_could_accept = irel_can_merge || irel_same_xact
+ io.inner.release.ready := irel_could_accept &&
+ (!io.irel().hasData() || io.outer.release.ready)
+
+ // If there was a writeback, forward it outwards
+ outerRelease(
+ coh = outer_coh.onHit(M_XWR),
+ buffering = Bool(false),
+ block_orel = !irel_could_accept)
+
+ // Send outer request for miss
+ outerAcquire(
+ caching = !xact_iacq.isBuiltInType(),
+ block_outer_acquire = vol_ognt_counter.pending,
+ buffering = Bool(false),
+ coh = outer_coh,
+ next = s_busy)
+
+ // Handle the response from outer memory
+ when (ognt_counter.pending && io.ognt().hasData()) {
+ io.outer.grant.ready := io.inner.grant.ready // bypass data
+ }
+
+ // Acknowledge or respond with data
+ innerGrant(
+ external_pending = pending_orel || vol_ognt_counter.pending,
+ buffering = Bool(false))
+
+ when(iacq_is_allocating) { initializeProbes() }
+
+ // Wait for everything to quiesce
+ quiesce() {}
+}
diff --git a/uncore/src/main/scala/agents/Cache.scala b/uncore/src/main/scala/agents/Cache.scala
new file mode 100644
index 00000000..80f8f8dd
--- /dev/null
+++ b/uncore/src/main/scala/agents/Cache.scala
@@ -0,0 +1,1146 @@
+// See LICENSE for license details.
+
+package uncore.agents
+
+import Chisel._
+import scala.reflect.ClassTag
+import junctions._
+import uncore.util.AMOALU
+import uncore.coherence._
+import uncore.tilelink._
+import uncore.constants._
+import uncore.Util._
+import cde.{Parameters, Field}
+
+case object CacheName extends Field[String]
+case object NSets extends Field[Int]
+case object NWays extends Field[Int]
+case object RowBits extends Field[Int]
+case object Replacer extends Field[() => ReplacementPolicy]
+case object L2Replacer extends Field[() => SeqReplacementPolicy]
+case object NPrimaryMisses extends Field[Int]
+case object NSecondaryMisses extends Field[Int]
+case object CacheBlockBytes extends Field[Int]
+case object ECCCode extends Field[Option[Code]]
+case object CacheIdBits extends Field[Int]
+case object CacheId extends Field[Int]
+case object SplitMetadata extends Field[Boolean]
+
+trait HasCacheParameters {
+ implicit val p: Parameters
+ val nSets = p(NSets)
+ val blockOffBits = p(CacheBlockOffsetBits)
+ val cacheIdBits = p(CacheIdBits)
+ val idxBits = log2Up(nSets)
+ val untagBits = blockOffBits + cacheIdBits + idxBits
+ val tagBits = p(PAddrBits) - untagBits
+ val nWays = p(NWays)
+ val wayBits = log2Up(nWays)
+ val isDM = nWays == 1
+ val rowBits = p(RowBits)
+ val rowBytes = rowBits/8
+ val rowOffBits = log2Up(rowBytes)
+ val code = p(ECCCode).getOrElse(new IdentityCode)
+ val hasSplitMetadata = p(SplitMetadata)
+}
+
+abstract class CacheModule(implicit val p: Parameters) extends Module
+ with HasCacheParameters
+abstract class CacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
+ with HasCacheParameters
+
+abstract class ReplacementPolicy {
+ def way: UInt
+ def miss: Unit
+ def hit: Unit
+}
+
+class RandomReplacement(ways: Int) extends ReplacementPolicy {
+ private val replace = Wire(Bool())
+ replace := Bool(false)
+ val lfsr = LFSR16(replace)
+
+ def way = if(ways == 1) UInt(0) else lfsr(log2Up(ways)-1,0)
+ def miss = replace := Bool(true)
+ def hit = {}
+}
+
+abstract class SeqReplacementPolicy {
+ def access(set: UInt): Unit
+ def update(valid: Bool, hit: Bool, set: UInt, way: UInt): Unit
+ def way: UInt
+}
+
+class SeqRandom(n_ways: Int) extends SeqReplacementPolicy {
+ val logic = new RandomReplacement(n_ways)
+ def access(set: UInt) = { }
+ def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = {
+ when (valid && !hit) { logic.miss }
+ }
+ def way = logic.way
+}
+
+class PseudoLRU(n: Int)
+{
+ require(isPow2(n))
+ val state_reg = Reg(Bits(width = n))
+ def access(way: UInt) {
+ state_reg := get_next_state(state_reg,way)
+ }
+ def get_next_state(state: UInt, way: UInt) = {
+ var next_state = state
+ var idx = UInt(1,1)
+ for (i <- log2Up(n)-1 to 0 by -1) {
+ val bit = way(i)
+ next_state = next_state.bitSet(idx, !bit)
+ idx = Cat(idx, bit)
+ }
+ next_state
+ }
+ def replace = get_replace_way(state_reg)
+ def get_replace_way(state: Bits) = {
+ var idx = UInt(1,1)
+ for (i <- 0 until log2Up(n))
+ idx = Cat(idx, state(idx))
+ idx(log2Up(n)-1,0)
+ }
+}
+
+class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy {
+ val state = SeqMem(n_sets, Bits(width = n_ways-1))
+ val logic = new PseudoLRU(n_ways)
+ val current_state = Wire(Bits())
+ val plru_way = logic.get_replace_way(current_state)
+ val next_state = Wire(Bits())
+
+ def access(set: UInt) = {
+ current_state := Cat(state.read(set), Bits(0, width = 1))
+ }
+
+ def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = {
+ val update_way = Mux(hit, way, plru_way)
+ next_state := logic.get_next_state(current_state, update_way)
+ when (valid) { state.write(set, next_state(n_ways-1,1)) }
+ }
+
+ def way = plru_way
+}
+
+abstract class Metadata(implicit p: Parameters) extends CacheBundle()(p) {
+ val tag = Bits(width = tagBits)
+ val coh: CoherenceMetadata
+}
+
+class MetaReadReq(implicit p: Parameters) extends CacheBundle()(p) {
+ val idx = Bits(width = idxBits)
+ val way_en = Bits(width = nWays)
+}
+
+class MetaWriteReq[T <: Metadata](gen: T)(implicit p: Parameters) extends MetaReadReq()(p) {
+ val data = gen.cloneType
+ override def cloneType = new MetaWriteReq(gen)(p).asInstanceOf[this.type]
+}
+
+class MetadataArray[T <: Metadata](onReset: () => T)(implicit p: Parameters) extends CacheModule()(p) {
+ val rstVal = onReset()
+ val io = new Bundle {
+ val read = Decoupled(new MetaReadReq).flip
+ val write = Decoupled(new MetaWriteReq(rstVal)).flip
+ val resp = Vec(nWays, rstVal.cloneType).asOutput
+ }
+ val rst_cnt = Reg(init=UInt(0, log2Up(nSets+1)))
+ val rst = rst_cnt < UInt(nSets)
+ val waddr = Mux(rst, rst_cnt, io.write.bits.idx)
+ val wdata = Mux(rst, rstVal, io.write.bits.data).toBits
+ val wmask = Mux(rst || Bool(nWays == 1), SInt(-1), io.write.bits.way_en.toSInt).toBools
+ val rmask = Mux(rst || Bool(nWays == 1), SInt(-1), io.read.bits.way_en.toSInt).toBools
+ when (rst) { rst_cnt := rst_cnt+UInt(1) }
+
+ val metabits = rstVal.getWidth
+
+ if (hasSplitMetadata) {
+ val tag_arrs = List.fill(nWays){ SeqMem(nSets, UInt(width = metabits)) }
+ val tag_readout = Wire(Vec(nWays,rstVal.cloneType))
+ val tags_vec = Wire(Vec(nWays, UInt(width = metabits)))
+ (0 until nWays).foreach { (i) =>
+ when (rst || (io.write.valid && wmask(i))) {
+ tag_arrs(i).write(waddr, wdata)
+ }
+ tags_vec(i) := tag_arrs(i).read(io.read.bits.idx, io.read.valid && rmask(i))
+ }
+ io.resp := io.resp.fromBits(tags_vec.toBits)
+ } else {
+ val tag_arr = SeqMem(nSets, Vec(nWays, UInt(width = metabits)))
+ when (rst || io.write.valid) {
+ tag_arr.write(waddr, Vec.fill(nWays)(wdata), wmask)
+ }
+ val tags = tag_arr.read(io.read.bits.idx, io.read.valid).toBits
+ io.resp := io.resp.fromBits(tags)
+ }
+
+ io.read.ready := !rst && !io.write.valid // so really this could be a 6T RAM
+ io.write.ready := !rst
+}
+
+case object L2DirectoryRepresentation extends Field[DirectoryRepresentation]
+
+trait HasOuterCacheParameters extends HasCacheParameters with HasCoherenceAgentParameters {
+ val cacheId = p(CacheId)
+ val idxLSB = cacheIdBits
+ val idxMSB = idxLSB + idxBits - 1
+ val tagLSB = idxLSB + idxBits
+ val tagMSB = tagLSB + tagBits - 1
+
+ def inSameSet(block_a: HasCacheBlockAddress, block_b: HasCacheBlockAddress): Bool =
+ inSameSet(block_a, block_b.addr_block)
+
+ def inSameSet(block: HasCacheBlockAddress, addr: UInt): Bool =
+ inSet(block, addr(idxMSB, idxLSB))
+
+ def inSet(block: HasCacheBlockAddress, idx: UInt): Bool =
+ block.addr_block(idxMSB,idxLSB) === idx
+
+ def haveSameTag(block: HasCacheBlockAddress, addr: UInt): Bool =
+ hasTag(block, addr(tagMSB, tagLSB))
+
+ def hasTag(block: HasCacheBlockAddress, tag: UInt): Bool =
+ block.addr_block(tagMSB, tagLSB) === tag
+
+ def isSameBlock(block: HasCacheBlockAddress, tag: UInt, idx: UInt) =
+ hasTag(block, tag) && inSet(block, idx)
+
+ //val blockAddrBits = p(TLBlockAddrBits)
+ val refillCyclesPerBeat = outerDataBits/rowBits
+ val refillCycles = refillCyclesPerBeat*outerDataBeats
+ val internalDataBeats = p(CacheBlockBytes)*8/rowBits
+ require(refillCyclesPerBeat == 1)
+ val amoAluOperandBits = p(AmoAluOperandBits)
+ require(amoAluOperandBits <= innerDataBits)
+ require(rowBits == innerDataBits) // TODO: relax this by improving s_data_* states
+ val nSecondaryMisses = p(NSecondaryMisses)
+ val isLastLevelCache = true
+ val alwaysWriteFullBeat = !p(ECCCode).isEmpty
+}
+
+abstract class L2HellaCacheModule(implicit val p: Parameters) extends Module
+ with HasOuterCacheParameters {
+ def doInternalOutputArbitration[T <: Data : ClassTag](
+ out: DecoupledIO[T],
+ ins: Seq[DecoupledIO[T]],
+ block_transfer: T => Bool = (t: T) => Bool(false)) {
+ val arb = Module(new RRArbiter(out.bits, ins.size))
+ out.valid := arb.io.out.valid && !block_transfer(arb.io.out.bits)
+ out.bits := arb.io.out.bits
+ arb.io.out.ready := out.ready && !block_transfer(arb.io.out.bits)
+ arb.io.in <> ins
+ }
+
+ def doInternalInputRouting[T <: Bundle with HasL2Id](in: ValidIO[T], outs: Seq[ValidIO[T]]) {
+ outs.map(_.bits := in.bits)
+ outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && in.bits.id === UInt(i) }
+ }
+}
+
+abstract class L2HellaCacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
+ with HasOuterCacheParameters
+
+trait HasL2Id extends HasCoherenceAgentParameters {
+ val id = UInt(width = log2Up(nTransactors + 1))
+}
+
+trait HasL2InternalRequestState extends HasOuterCacheParameters {
+ val tag_match = Bool()
+ val meta = new L2Metadata
+ val way_en = Bits(width = nWays)
+}
+
+trait HasL2BeatAddr extends HasOuterCacheParameters {
+ val addr_beat = UInt(width = log2Up(refillCycles))
+}
+
+trait HasL2Data extends HasOuterCacheParameters
+ with HasL2BeatAddr {
+ val data = UInt(width = rowBits)
+ def hasData(dummy: Int = 0) = Bool(true)
+ def hasMultibeatData(dummy: Int = 0) = Bool(refillCycles > 1)
+}
+
+class L2Metadata(implicit p: Parameters) extends Metadata()(p) with HasOuterCacheParameters {
+ val coh = new HierarchicalMetadata
+}
+
+object L2Metadata {
+ def apply(tag: Bits, coh: HierarchicalMetadata)
+ (implicit p: Parameters): L2Metadata = {
+ val meta = Wire(new L2Metadata)
+ meta.tag := tag
+ meta.coh := coh
+ meta
+ }
+
+ def apply(
+ tag: Bits,
+ inner: ManagerMetadata,
+ outer: ClientMetadata)(implicit p: Parameters): L2Metadata = {
+ val coh = Wire(new HierarchicalMetadata)
+ coh.inner := inner
+ coh.outer := outer
+ apply(tag, coh)
+ }
+}
+
+class L2MetaReadReq(implicit p: Parameters) extends MetaReadReq()(p) with HasL2Id {
+ val tag = Bits(width = tagBits)
+}
+
+class L2MetaWriteReq(implicit p: Parameters) extends MetaWriteReq[L2Metadata](new L2Metadata)(p)
+ with HasL2Id {
+ override def cloneType = new L2MetaWriteReq().asInstanceOf[this.type]
+}
+
+class L2MetaResp(implicit p: Parameters) extends L2HellaCacheBundle()(p)
+ with HasL2Id
+ with HasL2InternalRequestState
+
+trait HasL2MetaReadIO extends HasOuterCacheParameters {
+ val read = Decoupled(new L2MetaReadReq)
+ val resp = Valid(new L2MetaResp).flip
+}
+
+trait HasL2MetaWriteIO extends HasOuterCacheParameters {
+ val write = Decoupled(new L2MetaWriteReq)
+}
+
+class L2MetaRWIO(implicit p: Parameters) extends L2HellaCacheBundle()(p)
+ with HasL2MetaReadIO
+ with HasL2MetaWriteIO
+
+class L2MetaReadOnlyIO(implicit p: Parameters) extends L2HellaCacheBundle()(p)
+ with HasL2MetaReadIO
+
+trait HasL2MetaRWIO extends HasOuterCacheParameters {
+ val meta = new L2MetaRWIO
+}
+
+class L2MetadataArray(implicit p: Parameters) extends L2HellaCacheModule()(p) {
+ val io = new L2MetaRWIO().flip
+
+ def onReset = L2Metadata(UInt(0), HierarchicalMetadata.onReset)
+ val meta = Module(new MetadataArray(onReset _))
+ meta.io.read <> io.read
+ meta.io.write <> io.write
+ val way_en_1h = (Vec.fill(nWays){Bool(true)}).toBits
+ val s1_way_en_1h = RegEnable(way_en_1h, io.read.valid)
+ meta.io.read.bits.way_en := way_en_1h
+
+ val s1_tag = RegEnable(io.read.bits.tag, io.read.valid)
+ val s1_id = RegEnable(io.read.bits.id, io.read.valid)
+ def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f))
+ val s1_clk_en = Reg(next = io.read.fire())
+ val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === s1_tag)
+ val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta.io.resp(w).coh.outer.isValid() && s1_way_en_1h(w).toBool).toBits
+ val s1_idx = RegEnable(io.read.bits.idx, io.read.valid) // deal with stalls?
+ val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en)
+ val s2_tag_match = s2_tag_match_way.orR
+ val s2_hit_coh = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en)))
+
+ val replacer = p(L2Replacer)()
+ val s1_hit_way = Wire(Bits())
+ s1_hit_way := Bits(0)
+ (0 until nWays).foreach(i => when (s1_tag_match_way(i)) { s1_hit_way := Bits(i) })
+ replacer.access(io.read.bits.idx)
+ replacer.update(s1_clk_en, s1_tag_match_way.orR, s1_idx, s1_hit_way)
+
+ val s1_replaced_way_en = UIntToOH(replacer.way)
+ val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en))
+ val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) =>
+ RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq)
+
+ io.resp.valid := Reg(next = s1_clk_en)
+ io.resp.bits.id := RegEnable(s1_id, s1_clk_en)
+ io.resp.bits.tag_match := s2_tag_match
+ io.resp.bits.meta := Mux(s2_tag_match,
+ L2Metadata(s2_repl_meta.tag, s2_hit_coh),
+ s2_repl_meta)
+ io.resp.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en)
+}
+
+class L2DataReadReq(implicit p: Parameters) extends L2HellaCacheBundle()(p)
+ with HasL2BeatAddr
+ with HasL2Id {
+ val addr_idx = UInt(width = idxBits)
+ val way_en = Bits(width = nWays)
+}
+
+object L2DataReadReq {
+ def apply(
+ id: UInt,
+ way_en: UInt,
+ addr_idx: UInt,
+ addr_beat: UInt)(implicit p: Parameters) = {
+ val req = Wire(new L2DataReadReq)
+ req.id := id
+ req.way_en := way_en
+ req.addr_idx := addr_idx
+ req.addr_beat := addr_beat
+ req
+ }
+}
+
+class L2DataWriteReq(implicit p: Parameters) extends L2DataReadReq()(p)
+ with HasL2Data {
+ val wmask = Bits(width = rowBits/8)
+}
+
+object L2DataWriteReq {
+ def apply(
+ id: UInt,
+ way_en: UInt,
+ addr_idx: UInt,
+ addr_beat: UInt,
+ wmask: UInt,
+ data: UInt)(implicit p: Parameters) = {
+ val req = Wire(new L2DataWriteReq)
+ req.id := id
+ req.way_en := way_en
+ req.addr_idx := addr_idx
+ req.addr_beat := addr_beat
+ req.wmask := wmask
+ req.data := data
+ req
+ }
+}
+
+class L2DataResp(implicit p: Parameters) extends L2HellaCacheBundle()(p)
+ with HasL2Id
+ with HasL2Data
+
+trait HasL2DataReadIO extends HasOuterCacheParameters {
+ val read = Decoupled(new L2DataReadReq)
+ val resp = Valid(new L2DataResp).flip
+}
+
+trait HasL2DataWriteIO extends HasOuterCacheParameters {
+ val write = Decoupled(new L2DataWriteReq)
+}
+
+class L2DataRWIO(implicit p: Parameters) extends L2HellaCacheBundle()(p)
+ with HasL2DataReadIO
+ with HasL2DataWriteIO
+
+trait HasL2DataRWIO extends HasOuterCacheParameters {
+ val data = new L2DataRWIO
+}
+
+class L2DataArray(delay: Int)(implicit p: Parameters) extends L2HellaCacheModule()(p) {
+ val io = new L2DataRWIO().flip
+
+ val array = SeqMem(nWays*nSets*refillCycles, Vec(rowBits/8, Bits(width=8)))
+ val ren = !io.write.valid && io.read.valid
+ val raddr = Cat(OHToUInt(io.read.bits.way_en), io.read.bits.addr_idx, io.read.bits.addr_beat)
+ val waddr = Cat(OHToUInt(io.write.bits.way_en), io.write.bits.addr_idx, io.write.bits.addr_beat)
+ val wdata = Vec.tabulate(rowBits/8)(i => io.write.bits.data(8*(i+1)-1,8*i))
+ val wmask = io.write.bits.wmask.toBools
+ when (io.write.valid) { array.write(waddr, wdata, wmask) }
+
+ val r_req = Pipe(io.read.fire(), io.read.bits)
+ io.resp := Pipe(r_req.valid, r_req.bits, delay)
+ io.resp.bits.data := Pipe(r_req.valid, array.read(raddr, ren).toBits, delay).bits
+ io.read.ready := !io.write.valid
+ io.write.ready := Bool(true)
+}
+
+class L2HellaCacheBank(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p)
+ with HasOuterCacheParameters {
+ require(isPow2(nSets))
+ require(isPow2(nWays))
+
+ val meta = Module(new L2MetadataArray) // TODO: add delay knob
+ val data = Module(new L2DataArray(1))
+ val tshrfile = Module(new TSHRFile)
+ io.inner <> tshrfile.io.inner
+ io.outer <> tshrfile.io.outer
+ tshrfile.io.incoherent <> io.incoherent
+ meta.io <> tshrfile.io.meta
+ data.io <> tshrfile.io.data
+
+ disconnectOuterProbeAndFinish()
+}
+
+class TSHRFileIO(implicit p: Parameters) extends HierarchicalTLIO()(p)
+ with HasL2MetaRWIO
+ with HasL2DataRWIO
+
+class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p)
+ with HasCoherenceAgentWiringHelpers {
+ val io = new TSHRFileIO
+
+ // Create TSHRs for outstanding transactions
+ val irelTrackerList =
+ (0 until nReleaseTransactors).map(id =>
+ Module(new CacheVoluntaryReleaseTracker(id)))
+ val iacqTrackerList =
+ (nReleaseTransactors until nTransactors).map(id =>
+ Module(new CacheAcquireTracker(id)))
+ val trackerList = irelTrackerList ++ iacqTrackerList
+
+ // Don't allow a writeback request to go through if we are taking
+ // a voluntary release for the same block.
+ // The writeback can go forward once the voluntary release is handled
+ def writebackConflictsWithVolRelease(wb: L2WritebackReq): Bool =
+ irelTrackerList
+ .map(tracker =>
+ !tracker.io.alloc.idle &&
+ isSameBlock(tracker.io.alloc, wb.tag, wb.idx))
+ .reduce(_ || _) ||
+ (io.inner.release.valid &&
+ isSameBlock(io.inner.release.bits, wb.tag, wb.idx))
+
+ // WritebackUnit evicts data from L2, including invalidating L1s
+ val wb = Module(new L2WritebackUnit(nTransactors))
+ val trackerAndWbIOs = trackerList.map(_.io) :+ wb.io
+ doInternalOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req),
+ block_transfer = writebackConflictsWithVolRelease _)
+ doInternalInputRouting(wb.io.wb.resp, trackerList.map(_.io.wb.resp))
+
+ // Propagate incoherence flags
+ (trackerList.map(_.io.incoherent) :+ wb.io.incoherent) foreach { _ := io.incoherent }
+
+ // Handle acquire transaction initiation
+ val irel_vs_iacq_conflict =
+ io.inner.acquire.valid &&
+ io.inner.release.valid &&
+ inSameSet(io.inner.acquire.bits, io.inner.release.bits)
+ doInputRoutingWithAllocation(
+ in = io.inner.acquire,
+ outs = trackerList.map(_.io.inner.acquire),
+ allocs = trackerList.map(_.io.alloc.iacq),
+ allocOverride = Some(!irel_vs_iacq_conflict))
+
+ assert(PopCount(trackerList.map(_.io.alloc.iacq.should)) <= UInt(1),
+ "At most a single tracker should now be allocated for any given Acquire")
+
+ // Wire releases from clients
+ doInputRoutingWithAllocation(
+ in = io.inner.release,
+ outs = trackerAndWbIOs.map(_.inner.release),
+ allocs = trackerAndWbIOs.map(_.alloc.irel))
+
+ assert(PopCount(trackerAndWbIOs.map(_.alloc.irel.should)) <= UInt(1),
+ "At most a single tracker should now be allocated for any given Release")
+
+ // Wire probe requests and grant reply to clients, finish acks from clients
+ doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe) :+ wb.io.inner.probe)
+ doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant) :+ wb.io.inner.grant)
+ doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish))
+
+ // Create an arbiter for the one memory port
+ val outerList = trackerList.map(_.io.outer) :+ wb.io.outer
+ val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size)
+ (p.alterPartial({ case TLId => p(OuterTLId)})))
+ outer_arb.io.in <> outerList
+ io.outer <> outer_arb.io.out
+
+ // Wire local memory arrays
+ doInternalOutputArbitration(io.meta.read, trackerList.map(_.io.meta.read) :+ wb.io.meta.read)
+ doInternalOutputArbitration(io.meta.write, trackerList.map(_.io.meta.write))
+ doInternalOutputArbitration(io.data.read, trackerList.map(_.io.data.read) :+ wb.io.data.read)
+ doInternalOutputArbitration(io.data.write, trackerList.map(_.io.data.write))
+ doInternalInputRouting(io.meta.resp, trackerList.map(_.io.meta.resp) :+ wb.io.meta.resp)
+ doInternalInputRouting(io.data.resp, trackerList.map(_.io.data.resp) :+ wb.io.data.resp)
+}
+
+
+class L2XactTrackerIO(implicit p: Parameters) extends HierarchicalXactTrackerIO()(p)
+ with HasL2DataRWIO
+ with HasL2MetaRWIO
+ with HasL2WritebackIO
+
+trait HasRowBeatCounters extends HasOuterCacheParameters with HasPendingBitHelpers {
+ def mergeData(dataBits: Int)(beat: UInt, incoming: UInt): Unit
+
+ def connectDataBeatCounter[S <: L2HellaCacheBundle](inc: Bool, data: S, beat: UInt, full_block: Bool) = {
+ if(data.refillCycles > 1) {
+ val (multi_cnt, multi_done) = Counter(full_block && inc, data.refillCycles)
+ (Mux(!full_block, beat, multi_cnt), Mux(!full_block, inc, multi_done))
+ } else { (UInt(0), inc) }
+ }
+
+ def connectInternalDataBeatCounter[T <: L2HellaCacheBundle with HasL2BeatAddr](
+ in: DecoupledIO[T],
+ beat: UInt = UInt(0),
+ full_block: Bool = Bool(true)): (UInt, Bool) = {
+ connectDataBeatCounter(in.fire(), in.bits, beat, full_block)
+ }
+
+ def connectInternalDataBeatCounter[T <: L2HellaCacheBundle with HasL2Data](
+ in: ValidIO[T],
+ full_block: Bool): Bool = {
+ connectDataBeatCounter(in.valid, in.bits, UInt(0), full_block)._2
+ }
+
+ def addPendingBitInternal[T <: L2HellaCacheBundle with HasL2BeatAddr](in: DecoupledIO[T]) =
+ Fill(in.bits.refillCycles, in.fire()) & UIntToOH(in.bits.addr_beat)
+
+ def addPendingBitInternal[T <: L2HellaCacheBundle with HasL2BeatAddr](in: ValidIO[T]) =
+ Fill(in.bits.refillCycles, in.valid) & UIntToOH(in.bits.addr_beat)
+
+ def dropPendingBit[T <: L2HellaCacheBundle with HasL2BeatAddr] (in: DecoupledIO[T]) =
+ ~Fill(in.bits.refillCycles, in.fire()) | ~UIntToOH(in.bits.addr_beat)
+
+ def dropPendingBitInternal[T <: L2HellaCacheBundle with HasL2BeatAddr] (in: ValidIO[T]) =
+ ~Fill(in.bits.refillCycles, in.valid) | ~UIntToOH(in.bits.addr_beat)
+
+ // TODO: Deal with the possibility that rowBits != tlDataBits
+ def mergeDataInternal[T <: L2HellaCacheBundle with HasL2Data with HasL2BeatAddr](in: ValidIO[T]) {
+ when(in.valid) { mergeData(rowBits)(in.bits.addr_beat, in.bits.data) }
+ }
+}
+
+trait ReadsFromOuterCacheDataArray extends HasCoherenceMetadataBuffer
+ with HasRowBeatCounters
+ with HasDataBuffer {
+ def io: HasL2DataRWIO
+
+ val pending_reads = Reg(init=Bits(0, width = innerDataBeats))
+ val pending_resps = Reg(init=Bits(0, width = innerDataBeats))
+ val curr_read_beat = PriorityEncoder(pending_reads)
+
+ def readDataArray(drop_pending_bit: UInt,
+ add_pending_bit: UInt = UInt(0),
+ block_pending_read: Bool = Bool(false),
+ can_update_pending: Bool = Bool(true)) {
+ val port = io.data
+ when (can_update_pending) {
+ pending_reads := (pending_reads &
+ dropPendingBit(port.read) & drop_pending_bit) |
+ add_pending_bit
+ }
+ port.read.valid := state === s_busy && pending_reads.orR && !block_pending_read
+ port.read.bits := L2DataReadReq(
+ id = UInt(trackerId),
+ way_en = xact_way_en,
+ addr_idx = xact_addr_idx,
+ addr_beat = curr_read_beat)
+
+ pending_resps := (pending_resps & dropPendingBitInternal(port.resp)) |
+ addPendingBitInternal(port.read)
+
+ scoreboard += (pending_reads.orR, pending_resps.orR)
+
+ mergeDataInternal(port.resp)
+ }
+}
+
+trait WritesToOuterCacheDataArray extends HasCoherenceMetadataBuffer
+ with HasRowBeatCounters
+ with HasDataBuffer {
+ def io: HasL2DataRWIO
+
+ val pending_writes = Reg(init=Bits(0, width = innerDataBeats))
+ val curr_write_beat = PriorityEncoder(pending_writes)
+
+ def writeDataArray(add_pending_bit: UInt = UInt(0),
+ block_pending_write: Bool = Bool(false),
+ can_update_pending: Bool = Bool(true)) {
+ val port = io.data
+ when (can_update_pending) {
+ pending_writes := (pending_writes & dropPendingBit(port.write)) |
+ add_pending_bit
+ }
+ port.write.valid := state === s_busy && pending_writes.orR && !block_pending_write
+ port.write.bits := L2DataWriteReq(
+ id = UInt(trackerId),
+ way_en = xact_way_en,
+ addr_idx = xact_addr_idx,
+ addr_beat = curr_write_beat,
+ wmask = ~UInt(0, port.write.bits.wmask.getWidth),
+ data = data_buffer(curr_write_beat))
+
+ scoreboard += pending_writes.orR
+ }
+}
+
+trait HasAMOALU extends HasAcquireMetadataBuffer
+ with HasByteWriteMaskBuffer
+ with HasRowBeatCounters {
+ val io: L2XactTrackerIO
+
+ // Provide a single ALU per tracker to merge Puts and AMOs with data being
+ // refilled, written back, or extant in the cache
+ val amoalu = Module(new AMOALU(rhsIsAligned = true))
+ val amo_result = Reg(init = UInt(0, innerDataBits))
+
+ def initializeAMOALUIOs() {
+ amoalu.io.addr := Cat(xact_addr_block, xact_addr_beat, xact_addr_byte)
+ amoalu.io.cmd := xact_op_code
+ amoalu.io.typ := xact_op_size
+ amoalu.io.lhs := io.data.resp.bits.data // default, overwritten by calls to mergeData
+ amoalu.io.rhs := data_buffer.head // default, overwritten by calls to mergeData
+ }
+
+ // Utility function for applying any buffered stored data to the cache line
+ // before storing it back into the data array
+ override def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) {
+ val old_data = incoming // Refilled, written back, or de-cached data
+ val new_data = data_buffer(beat) // Newly Put data is already in the buffer
+ val amo_shift_bits = xact_amo_shift_bytes << 3
+ amoalu.io.lhs := old_data >> amo_shift_bits
+ amoalu.io.rhs := new_data >> amo_shift_bits
+ val wmask = FillInterleaved(8, wmask_buffer(beat))
+ data_buffer(beat) := ~wmask & old_data |
+ wmask & Mux(xact_iacq.isAtomic(), amoalu.io.out << amo_shift_bits, new_data)
+ when(xact_iacq.isAtomic() && xact_addr_beat === beat) { amo_result := old_data }
+ }
+}
+
+trait HasCoherenceMetadataBuffer extends HasOuterCacheParameters
+ with HasBlockAddressBuffer
+ with HasXactTrackerStates {
+ def trackerId: Int
+
+ val xact_way_en = Reg{ Bits(width = nWays) }
+ val xact_old_meta = Reg{ new L2Metadata }
+ val pending_coh = Reg{ xact_old_meta.coh }
+ val pending_meta_write = Reg{ Bool() } // pending_meta_write has own state (s_meta_write)
+
+ val inner_coh = pending_coh.inner
+ val outer_coh = pending_coh.outer
+
+ val xact_addr_idx = xact_addr_block(idxMSB,idxLSB)
+ val xact_addr_tag = xact_addr_block >> UInt(tagLSB)
+
+ // Utility function for updating the metadata that will be kept in this cache
+ def updatePendingCohWhen(flag: Bool, next: HierarchicalMetadata) {
+ when(flag && pending_coh =/= next) {
+ pending_meta_write := Bool(true)
+ pending_coh := next
+ }
+ }
+
+ def metaRead(port: HasL2MetaReadIO, next_state: UInt, way_en_known: Bool = Bool(false)) {
+ port.read.valid := state === s_meta_read
+ port.read.bits.id := UInt(trackerId)
+ port.read.bits.idx := xact_addr_idx
+ port.read.bits.tag := xact_addr_tag
+ port.read.bits.way_en := Mux(way_en_known, xact_way_en, ~UInt(0, nWays))
+
+ when(state === s_meta_read && port.read.ready) { state := s_meta_resp }
+
+ when(state === s_meta_resp && port.resp.valid) {
+ xact_old_meta := port.resp.bits.meta
+ when (!way_en_known) { xact_way_en := port.resp.bits.way_en }
+ state := next_state
+ }
+ }
+
+ def metaWrite(port: HasL2MetaWriteIO, to_write: L2Metadata, next_state: UInt) {
+ port.write.valid := state === s_meta_write
+ port.write.bits.id := UInt(trackerId)
+ port.write.bits.idx := xact_addr_idx
+ port.write.bits.way_en := xact_way_en
+ port.write.bits.data := to_write
+
+ when(state === s_meta_write && port.write.ready) { state := next_state }
+ }
+}
+
+trait TriggersWritebacks extends HasCoherenceMetadataBuffer {
+ def triggerWriteback(wb: L2WritebackIO, next_state: UInt) {
+ wb.req.valid := state === s_wb_req
+ wb.req.bits.id := UInt(trackerId)
+ wb.req.bits.idx := xact_addr_idx
+ wb.req.bits.tag := xact_old_meta.tag
+ wb.req.bits.way_en := xact_way_en
+
+ when(state === s_wb_req && wb.req.ready) { state := s_wb_resp }
+ when(state === s_wb_resp && wb.resp.valid) { state := s_outer_acquire }
+ }
+}
+
+class CacheVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters)
+ extends VoluntaryReleaseTracker(trackerId)(p)
+ with HasDataBuffer
+ with WritesToOuterCacheDataArray {
+ val io = new L2XactTrackerIO
+ pinAllReadyValidLow(io)
+
+ // Avoid metatdata races with writebacks
+ routeInParent(
+ iacqMatches = inSameSet(_, xact_addr_block),
+ irelCanAlloc = Bool(true))
+
+ // Initialize and accept pending Release beats
+ innerRelease(
+ block_vol_ignt = pending_writes.orR,
+ next = s_meta_read)
+
+ io.inner.release.ready := state === s_idle || irel_can_merge || irel_same_xact
+
+ // Begin a transaction by getting the current block metadata
+ metaRead(io.meta, s_busy)
+
+ // Write the voluntarily written back data to this cache
+ writeDataArray(add_pending_bit = addPendingBitWhenBeatHasData(io.inner.release),
+ can_update_pending = state =/= s_idle || io.alloc.irel.should)
+
+ // End a transaction by updating the block metadata
+ metaWrite(
+ io.meta,
+ L2Metadata(
+ tag = xact_addr_tag,
+ inner = xact_old_meta.coh.inner.onRelease(xact_vol_irel),
+ outer = Mux(xact_vol_irel.hasData(),
+ xact_old_meta.coh.outer.onHit(M_XWR),
+ xact_old_meta.coh.outer)),
+ s_idle)
+
+ when(io.inner.release.fire()) { data_buffer(io.irel().addr_beat) := io.irel().data }
+
+ when(irel_is_allocating) {
+ pending_writes := addPendingBitWhenBeatHasData(io.inner.release)
+ }
+
+ quiesce(s_meta_write) {}
+
+ // Checks for illegal behavior
+ assert(!(state === s_meta_resp && io.meta.resp.valid && !io.meta.resp.bits.tag_match),
+ "VoluntaryReleaseTracker accepted Release for a block not resident in this cache!")
+}
+
+class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters)
+ extends AcquireTracker(trackerId)(p)
+ with HasByteWriteMaskBuffer
+ with HasAMOALU
+ with TriggersWritebacks
+ with ReadsFromOuterCacheDataArray
+ with WritesToOuterCacheDataArray {
+ val io = new L2XactTrackerIO
+ pinAllReadyValidLow(io)
+ initializeAMOALUIOs()
+
+
+ val pending_coh_on_ognt = HierarchicalMetadata(
+ ManagerMetadata.onReset,
+ pending_coh.outer.onGrant(io.outer.grant.bits, xact_op_code))
+
+ val pending_coh_on_ignt = HierarchicalMetadata(
+ pending_coh.inner.onGrant(io.ignt()),
+ Mux(ognt_counter.down.done,
+ pending_coh_on_ognt.outer,
+ pending_coh.outer))
+
+ val pending_coh_on_irel = HierarchicalMetadata(
+ pending_coh.inner.onRelease(io.irel()), // Drop sharer
+ Mux(io.irel().hasData(), // Dirty writeback
+ pending_coh.outer.onHit(M_XWR),
+ pending_coh.outer))
+
+ val pending_coh_on_hit = HierarchicalMetadata(
+ io.meta.resp.bits.meta.coh.inner,
+ io.meta.resp.bits.meta.coh.outer.onHit(xact_op_code))
+
+ val pending_coh_on_miss = HierarchicalMetadata.onReset
+
+ val before_wb_req = state.isOneOf(s_meta_read, s_meta_resp)
+
+ routeInParent(
+ iacqMatches = inSameSet(_, xact_addr_block),
+ irelMatches = (irel: HasCacheBlockAddress) =>
+ Mux(before_wb_req, inSameSet(irel, xact_addr_block), exactAddrMatch(irel)),
+ iacqCanAlloc = Bool(true))
+
+ // TileLink allows for Gets-under-Get
+ // and Puts-under-Put, and either may also merge with a preceding prefetch
+ // that requested the correct permissions (via op_code)
+ def acquiresAreMergeable(sec: AcquireMetadata): Bool = {
+ val allowedTypes = List((Acquire.getType, Acquire.getType),
+ (Acquire.putType, Acquire.putType),
+ (Acquire.putBlockType, Acquire.putBlockType),
+ (Acquire.getPrefetchType, Acquire.getPrefetchType),
+ (Acquire.putPrefetchType, Acquire.putPrefetchType),
+ (Acquire.getPrefetchType, Acquire.getType),
+ (Acquire.putPrefetchType, Acquire.putType),
+ (Acquire.putPrefetchType, Acquire.putBlockType))
+ allowedTypes.map { case(a, b) => xact_iacq.isBuiltInType(a) && sec.isBuiltInType(b) }.reduce(_||_) &&
+ xact_op_code === sec.op_code() &&
+ sec.conflicts(xact_addr_block) &&
+ xact_allocate
+ }
+
+ // First, take care of accpeting new acquires or secondary misses
+ // Handling of primary and secondary misses' data and write mask merging
+ def iacq_can_merge = acquiresAreMergeable(io.iacq()) &&
+ state =/= s_idle &&
+ state =/= s_meta_resp &&
+ state =/= s_meta_write &&
+ !all_pending_done &&
+ !io.inner.release.fire() &&
+ !io.outer.grant.fire() &&
+ !io.data.resp.valid &&
+ ignt_q.io.enq.ready && ignt_q.io.deq.valid
+
+ innerAcquire(
+ can_alloc = Bool(true),
+ next = s_meta_read)
+
+ io.inner.acquire.ready := state === s_idle || iacq_can_merge ||
+ iacq_same_xact_multibeat
+
+ // Begin a transaction by getting the current block metadata
+ // Defined here because of Chisel default wire demands, used in s_meta_resp
+ val coh = io.meta.resp.bits.meta.coh
+ val tag_match = io.meta.resp.bits.tag_match
+ val is_hit = (if(!isLastLevelCache) tag_match && coh.outer.isHit(xact_op_code)
+ else tag_match && coh.outer.isValid())
+ val needs_writeback = !tag_match &&
+ xact_allocate &&
+ (coh.outer.requiresVoluntaryWriteback() ||
+ coh.inner.requiresProbesOnVoluntaryWriteback())
+ val needs_inner_probes = tag_match && coh.inner.requiresProbes(xact_iacq)
+ val should_update_meta = !tag_match && xact_allocate ||
+ is_hit && pending_coh_on_hit =/= coh
+ def full_representation = coh.inner.full()
+
+ metaRead(
+ io.meta,
+ Mux(needs_writeback, s_wb_req,
+ Mux(needs_inner_probes, s_inner_probe,
+ Mux(!is_hit, s_outer_acquire, s_busy))))
+
+ updatePendingCohWhen(
+ io.meta.resp.valid,
+ Mux(is_hit, pending_coh_on_hit,
+ Mux(tag_match, coh, pending_coh_on_miss)))
+
+ // Issue a request to the writeback unit
+ triggerWriteback(io.wb, s_outer_acquire)
+
+ // Track which clients yet need to be probed and make Probe message
+ // If we're probing, we know the tag matches, so if this is the
+ // last level cache, we can use the data without upgrading permissions
+ val skip_outer_acquire =
+ (if(!isLastLevelCache) xact_old_meta.coh.outer.isHit(xact_op_code)
+ else xact_old_meta.coh.outer.isValid())
+
+ innerProbe(
+ inner_coh.makeProbe(curr_probe_dst, xact_iacq, xact_addr_block),
+ Mux(!skip_outer_acquire, s_outer_acquire, s_busy))
+
+ // Handle incoming releases from clients, which may reduce sharer counts
+ // and/or write back dirty data, and may be unexpected voluntary releases
+
+ innerRelease() // Don't block on pending_writes because they won't happen until s_busy
+
+ def irel_can_merge = io.irel().conflicts(xact_addr_block) &&
+ io.irel().isVoluntary() &&
+ !state.isOneOf(s_idle, s_meta_read, s_meta_resp, s_meta_write) &&
+ !all_pending_done &&
+ !io.outer.grant.fire() &&
+ !io.inner.grant.fire() &&
+ !vol_ignt_counter.pending
+
+ io.inner.release.ready := irel_can_merge || irel_same_xact
+
+ updatePendingCohWhen(io.inner.release.fire(), pending_coh_on_irel)
+
+ mergeDataInner(io.inner.release)
+
+ // Send outer request
+ outerAcquire(
+ caching = xact_allocate,
+ coh = xact_old_meta.coh.outer, // TODO outer_coh?
+ data = data_buffer(ognt_counter.up.idx),
+ wmask = wmask_buffer(ognt_counter.up.idx),
+ next = s_busy)
+
+ // Handle the response from outer memory
+ updatePendingCohWhen(ognt_counter.down.done, pending_coh_on_ognt)
+ mergeDataOuter(io.outer.grant)
+
+ // Send read request and get resp
+ // Going back to the original inner transaction:
+ // We read from the the cache at this level if data wasn't written back or refilled.
+ // We may still merge further Gets, requiring further beats to be read.
+ // If ECC requires a full writemask, we'll read out data on partial writes as well.
+ readDataArray(
+ drop_pending_bit = (dropPendingBitWhenBeatHasData(io.inner.release) &
+ dropPendingBitWhenBeatHasData(io.outer.grant)),
+ add_pending_bit = addPendingBitWhenBeatNeedsRead(io.inner.acquire, Bool(alwaysWriteFullBeat)),
+ block_pending_read = ognt_counter.pending,
+ can_update_pending = state =/= s_idle || io.alloc.irel.should)
+
+ // No override for first accepted acquire
+ val alloc_override = xact_allocate && (state =/= s_idle)
+
+ // Do write
+ // We write data to the cache at this level if it was Put here with allocate flag,
+ // written back dirty, or refilled from outer memory.
+ writeDataArray(
+ add_pending_bit = (addPendingBitWhenBeatHasDataAndAllocs(io.inner.acquire, alloc_override) |
+ addPendingBitWhenBeatHasData(io.inner.release) |
+ addPendingBitWhenBeatHasData(io.outer.grant, xact_allocate)),
+ block_pending_write = (ognt_counter.pending ||
+ pending_put_data.orR ||
+ pending_reads(curr_write_beat) ||
+ pending_resps(curr_write_beat)),
+ can_update_pending = state =/= s_idle || io.alloc.iacq.should || io.alloc.irel.should)
+
+ // Acknowledge or respond with data
+ innerGrant(
+ data = Mux(xact_iacq.isAtomic(), amo_result, data_buffer(ignt_data_idx)),
+ external_pending = pending_writes.orR || ognt_counter.pending,
+ add_pending_bits = addPendingBitInternal(io.data.resp))
+
+ updatePendingCohWhen(io.inner.grant.fire() && io.ignt().last(), pending_coh_on_ignt)
+
+ // End a transaction by updating the block metadata
+ metaWrite(io.meta, L2Metadata(xact_addr_tag, pending_coh), s_idle)
+
+ // Initialization of some scoreboard logic based on the original
+ // Acquire message on on the results of the metadata read:
+ when(state === s_meta_resp && io.meta.resp.valid) {
+ // If some kind of Put is marked no-allocate but is already in the cache,
+ // we need to write its data to the data array
+ when(is_hit && !xact_allocate && xact_iacq.hasData()) {
+ pending_writes := addPendingBitsFromAcquire(xact_iacq)
+ xact_allocate := Bool(true)
+ }
+ when (needs_inner_probes) { initializeProbes() }
+ pending_meta_write := should_update_meta //TODO what edge case was this covering?
+ }
+
+ // Initialize more transaction metadata. Pla
+ when(iacq_is_allocating) {
+ amo_result := UInt(0)
+ pending_meta_write := Bool(false)
+ pending_reads := Mux( // Pick out the specific beats of data that need to be read
+ io.iacq().isBuiltInType(Acquire.getBlockType) || !io.iacq().isBuiltInType(),
+ ~UInt(0, width = innerDataBeats),
+ addPendingBitWhenBeatNeedsRead(io.inner.acquire, Bool(alwaysWriteFullBeat)))
+ pending_writes := addPendingBitWhenBeatHasDataAndAllocs(io.inner.acquire)
+ pending_resps := UInt(0)
+ }
+
+ initDataInner(io.inner.acquire, iacq_is_allocating || iacq_is_merging)
+
+ // Wait for everything to quiesce
+ quiesce(Mux(pending_meta_write, s_meta_write, s_idle)) { clearWmaskBuffer() }
+}
+
+class L2WritebackReq(implicit p: Parameters)
+ extends L2HellaCacheBundle()(p) with HasL2Id {
+ val tag = Bits(width = tagBits)
+ val idx = Bits(width = idxBits)
+ val way_en = Bits(width = nWays)
+}
+
+class L2WritebackResp(implicit p: Parameters) extends L2HellaCacheBundle()(p) with HasL2Id
+
+class L2WritebackIO(implicit p: Parameters) extends L2HellaCacheBundle()(p) {
+ val req = Decoupled(new L2WritebackReq)
+ val resp = Valid(new L2WritebackResp).flip
+}
+
+trait HasL2WritebackIO extends HasOuterCacheParameters {
+ val wb = new L2WritebackIO()
+}
+
+class L2WritebackUnitIO(implicit p: Parameters)
+ extends HierarchicalXactTrackerIO()(p) with HasL2DataRWIO {
+ val wb = new L2WritebackIO().flip()
+ val meta = new L2MetaReadOnlyIO
+}
+
+class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p)
+ with AcceptsVoluntaryReleases
+ with EmitsVoluntaryReleases
+ with EmitsInnerProbes
+ with ReadsFromOuterCacheDataArray
+ with RoutesInParent {
+ val io = new L2WritebackUnitIO
+ pinAllReadyValidLow(io)
+
+ val xact_id = Reg{ io.wb.req.bits.id }
+
+ val pending_coh_on_irel = HierarchicalMetadata(
+ inner_coh.onRelease(io.irel()), // Drop sharer
+ Mux(io.irel().hasData(), // Dirty writeback
+ outer_coh.onHit(M_XWR),
+ outer_coh))
+
+ routeInParent()
+
+ // Start the writeback sub-transaction
+ io.wb.req.ready := state === s_idle
+
+ val coh = io.meta.resp.bits.meta.coh
+ val needs_inner_probes = coh.inner.requiresProbesOnVoluntaryWriteback()
+ val needs_outer_release = coh.outer.requiresVoluntaryWriteback()
+ def full_representation = coh.inner.full()
+
+ // Even though we already read the metadata in the acquire tracker that
+ // sent the writeback request, we have to read it again in the writeback
+ // unit, since it may have been updated in the meantime.
+ metaRead(io.meta,
+ next_state = Mux(needs_inner_probes, s_inner_probe, s_busy),
+ way_en_known = Bool(true))
+
+ // Track which clients yet need to be probed and make Probe message
+ innerProbe(
+ inner_coh.makeProbeForVoluntaryWriteback(curr_probe_dst, xact_addr_block),
+ s_busy)
+
+ // Handle incoming releases from clients, which may reduce sharer counts
+ // and/or write back dirty data
+ innerRelease()
+
+ def irel_can_merge = io.irel().conflicts(xact_addr_block) &&
+ io.irel().isVoluntary() &&
+ !state.isOneOf(s_idle, s_meta_read, s_meta_resp) &&
+ !(state === s_busy && all_pending_done) &&
+ !vol_ignt_counter.pending &&
+ !blockInnerRelease()
+
+ io.inner.release.ready := irel_can_merge || irel_same_xact
+
+ updatePendingCohWhen(io.inner.release.fire(), pending_coh_on_irel)
+
+ mergeDataInner(io.inner.release)
+
+ // If a release didn't write back data, have to read it from data array
+ readDataArray(
+ drop_pending_bit = dropPendingBitWhenBeatHasData(io.inner.release))
+
+ // Once the data is buffered we can write it back to outer memory
+ outerRelease(
+ coh = outer_coh,
+ data = data_buffer(vol_ognt_counter.up.idx),
+ add_pending_data_bits = addPendingBitInternal(io.data.resp),
+ add_pending_send_bit = io.meta.resp.valid && needs_outer_release)
+
+
+ // Respond to the initiating transaction handler signalling completion of the writeback
+ io.wb.resp.valid := state === s_busy && all_pending_done
+ io.wb.resp.bits.id := xact_id
+
+ quiesce() {}
+
+ // State machine updates and transaction handler metadata intialization
+ when(state === s_idle && io.wb.req.valid) {
+ xact_id := io.wb.req.bits.id
+ xact_way_en := io.wb.req.bits.way_en
+ xact_addr_block := (if (cacheIdBits == 0) Cat(io.wb.req.bits.tag, io.wb.req.bits.idx)
+ else Cat(io.wb.req.bits.tag, io.wb.req.bits.idx, UInt(cacheId, cacheIdBits)))
+ state := s_meta_read
+ }
+
+ when (state === s_meta_resp && io.meta.resp.valid) {
+ pending_reads := Fill(innerDataBeats, needs_outer_release)
+ pending_coh := coh
+ when(needs_inner_probes) { initializeProbes() }
+ }
+
+ assert(!io.meta.resp.valid || io.meta.resp.bits.tag_match,
+ "L2 requested Writeback for block not present in cache")
+}
diff --git a/uncore/src/main/scala/agents/Ecc.scala b/uncore/src/main/scala/agents/Ecc.scala
new file mode 100644
index 00000000..6e5fdba6
--- /dev/null
+++ b/uncore/src/main/scala/agents/Ecc.scala
@@ -0,0 +1,146 @@
+// See LICENSE for license details.
+
+package uncore.agents
+
+import Chisel._
+
+abstract class Decoding
+{
+ def uncorrected: UInt
+ def corrected: UInt
+ def correctable: Bool
+ def uncorrectable: Bool
+ def error = correctable || uncorrectable
+}
+
+abstract class Code
+{
+ def width(w0: Int): Int
+ def encode(x: UInt): UInt
+ def decode(x: UInt): Decoding
+}
+
+class IdentityCode extends Code
+{
+ def width(w0: Int) = w0
+ def encode(x: UInt) = x
+ def decode(y: UInt) = new Decoding {
+ def uncorrected = y
+ def corrected = y
+ def correctable = Bool(false)
+ def uncorrectable = Bool(false)
+ }
+}
+
+class ParityCode extends Code
+{
+ def width(w0: Int) = w0+1
+ def encode(x: UInt) = Cat(x.xorR, x)
+ def decode(y: UInt) = new Decoding {
+ def uncorrected = y(y.getWidth-2,0)
+ def corrected = uncorrected
+ def correctable = Bool(false)
+ def uncorrectable = y.xorR
+ }
+}
+
+class SECCode extends Code
+{
+ def width(k: Int) = {
+ val m = log2Floor(k) + 1
+ k + m + (if((1 << m) < m+k+1) 1 else 0)
+ }
+ def encode(x: UInt) = {
+ val k = x.getWidth
+ require(k > 0)
+ val n = width(k)
+
+ val y = for (i <- 1 to n) yield {
+ if (isPow2(i)) {
+ val r = for (j <- 1 to n; if j != i && (j & i) != 0)
+ yield x(mapping(j))
+ r reduce (_^_)
+ } else
+ x(mapping(i))
+ }
+ Vec(y).toBits
+ }
+ def decode(y: UInt) = new Decoding {
+ val n = y.getWidth
+ require(n > 0 && !isPow2(n))
+
+ val p2 = for (i <- 0 until log2Up(n)) yield 1 << i
+ val syndrome = p2 map { i =>
+ val r = for (j <- 1 to n; if (j & i) != 0)
+ yield y(j-1)
+ r reduce (_^_)
+ }
+ val s = Vec(syndrome).toBits
+
+ private def swizzle(z: UInt) = Vec((1 to n).filter(i => !isPow2(i)).map(i => z(i-1))).toBits
+ def uncorrected = swizzle(y)
+ def corrected = swizzle(((y.toUInt << 1) ^ UIntToOH(s)) >> 1)
+ def correctable = s.orR
+ def uncorrectable = Bool(false)
+ }
+ private def mapping(i: Int) = i-1-log2Up(i)
+}
+
+class SECDEDCode extends Code
+{
+ private val sec = new SECCode
+ private val par = new ParityCode
+
+ def width(k: Int) = sec.width(k)+1
+ def encode(x: UInt) = par.encode(sec.encode(x))
+ def decode(x: UInt) = new Decoding {
+ val secdec = sec.decode(x(x.getWidth-2,0))
+ val pardec = par.decode(x)
+
+ def uncorrected = secdec.uncorrected
+ def corrected = secdec.corrected
+ def correctable = pardec.uncorrectable
+ def uncorrectable = !pardec.uncorrectable && secdec.correctable
+ }
+}
+
+object ErrGen
+{
+ // generate a 1-bit error with approximate probability 2^-f
+ def apply(width: Int, f: Int): UInt = {
+ require(width > 0 && f >= 0 && log2Up(width) + f <= 16)
+ UIntToOH(LFSR16()(log2Up(width)+f-1,0))(width-1,0)
+ }
+ def apply(x: UInt, f: Int): UInt = x ^ apply(x.getWidth, f)
+}
+
+class SECDEDTest extends Module
+{
+ val code = new SECDEDCode
+ val k = 4
+ val n = code.width(k)
+
+ val io = new Bundle {
+ val original = Bits(OUTPUT, k)
+ val encoded = Bits(OUTPUT, n)
+ val injected = Bits(OUTPUT, n)
+ val uncorrected = Bits(OUTPUT, k)
+ val corrected = Bits(OUTPUT, k)
+ val correctable = Bool(OUTPUT)
+ val uncorrectable = Bool(OUTPUT)
+ }
+
+ val c = Counter(Bool(true), 1 << k)
+ val numErrors = Counter(c._2, 3)._1
+ val e = code.encode(c._1)
+ val i = e ^ Mux(numErrors < UInt(1), UInt(0), ErrGen(n, 1)) ^ Mux(numErrors < UInt(2), UInt(0), ErrGen(n, 1))
+ val d = code.decode(i)
+
+ io.original := c._1
+ io.encoded := e
+ io.injected := i
+ io.uncorrected := d.uncorrected
+ io.corrected := d.corrected
+ io.correctable := d.correctable
+ io.uncorrectable := d.uncorrectable
+}
diff --git a/uncore/src/main/scala/agents/Mmio.scala b/uncore/src/main/scala/agents/Mmio.scala
new file mode 100644
index 00000000..a3b2ab03
--- /dev/null
+++ b/uncore/src/main/scala/agents/Mmio.scala
@@ -0,0 +1,73 @@
+package uncore.agents
+
+import Chisel._
+import uncore.tilelink._
+import cde.Parameters
+
+class MMIOTileLinkManagerData(implicit p: Parameters)
+ extends TLBundle()(p)
+ with HasClientId
+ with HasClientTransactionId
+
+class MMIOTileLinkManager(implicit p: Parameters)
+ extends CoherenceAgentModule()(p) {
+ val io = new ManagerTLIO
+
+ // MMIO requests should never need probe or release
+ io.inner.probe.valid := Bool(false)
+ io.inner.release.ready := Bool(false)
+
+ val multibeat_fire = io.outer.acquire.fire() && io.oacq().hasMultibeatData()
+ val multibeat_start = multibeat_fire && io.oacq().addr_beat === UInt(0)
+ val multibeat_end = multibeat_fire && io.oacq().addr_beat === UInt(outerDataBeats - 1)
+
+ // Acquire and Grant are basically passthru,
+ // except client_id and client_xact_id need to be converted.
+ // Associate the inner client_id and client_xact_id
+ // with the outer client_xact_id.
+ val xact_pending = Reg(init = UInt(0, maxManagerXacts))
+ val xact_id_sel = PriorityEncoder(~xact_pending)
+ val xact_id_reg = RegEnable(xact_id_sel, multibeat_start)
+ val xact_multibeat = Reg(init = Bool(false))
+ val outer_xact_id = Mux(xact_multibeat, xact_id_reg, xact_id_sel)
+ val xact_free = !xact_pending.andR
+ val xact_buffer = Reg(Vec(maxManagerXacts, new MMIOTileLinkManagerData))
+
+ io.inner.acquire.ready := io.outer.acquire.ready && xact_free
+ io.outer.acquire.valid := io.inner.acquire.valid && xact_free
+ io.outer.acquire.bits := io.inner.acquire.bits
+ io.outer.acquire.bits.client_xact_id := outer_xact_id
+
+ def isLastBeat[T <: TileLinkChannel with HasTileLinkBeatId](in: T): Bool =
+ !in.hasMultibeatData() || in.addr_beat === UInt(outerDataBeats - 1)
+
+ def addPendingBitOnAcq[T <: AcquireMetadata](in: DecoupledIO[T]): UInt =
+ Mux(in.fire() && isLastBeat(in.bits), UIntToOH(in.bits.client_xact_id), UInt(0))
+
+ def clearPendingBitOnGnt[T <: GrantMetadata](in: DecoupledIO[T]): UInt =
+ ~Mux(in.fire() && isLastBeat(in.bits) && !in.bits.requiresAck(),
+ UIntToOH(in.bits.manager_xact_id), UInt(0))
+
+ def clearPendingBitOnFin(in: DecoupledIO[Finish]): UInt =
+ ~Mux(in.fire(), UIntToOH(in.bits.manager_xact_id), UInt(0))
+
+ xact_pending := (xact_pending | addPendingBitOnAcq(io.outer.acquire)) &
+ clearPendingBitOnFin(io.inner.finish) &
+ clearPendingBitOnGnt(io.inner.grant)
+
+ when (io.outer.acquire.fire() && isLastBeat(io.outer.acquire.bits)) {
+ xact_buffer(outer_xact_id) := io.iacq()
+ }
+
+ when (multibeat_start) { xact_multibeat := Bool(true) }
+ when (multibeat_end) { xact_multibeat := Bool(false) }
+
+ val gnt_xact = xact_buffer(io.ognt().client_xact_id)
+ io.outer.grant.ready := io.inner.grant.ready
+ io.inner.grant.valid := io.outer.grant.valid
+ io.inner.grant.bits := io.outer.grant.bits
+ io.inner.grant.bits.client_id := gnt_xact.client_id
+ io.inner.grant.bits.client_xact_id := gnt_xact.client_xact_id
+ io.inner.grant.bits.manager_xact_id := io.ognt().client_xact_id
+ io.inner.finish.ready := Bool(true)
+}
diff --git a/uncore/src/main/scala/agents/StatelessBridge.scala b/uncore/src/main/scala/agents/StatelessBridge.scala
new file mode 100644
index 00000000..0ed818cf
--- /dev/null
+++ b/uncore/src/main/scala/agents/StatelessBridge.scala
@@ -0,0 +1,69 @@
+// See LICENSE for license details.
+
+package uncore.agents
+
+import Chisel._
+import uncore.coherence._
+import uncore.tilelink._
+import uncore.constants._
+import uncore.devices._
+import cde.{Parameters, Field, Config}
+
+/** The ManagerToClientStateless Bridge does not maintain any state for the messages
+ * which pass through it. It simply passes the messages back and forth without any
+ * tracking or translation.
+ *
+ * This can reduce area and timing in very constrained situations:
+ * - The Manager and Client implement the same coherence protocol
+ * - There are no probe or finish messages.
+ * - The outer transaction ID is large enough to handle all possible inner
+ * transaction IDs, such that no remapping state must be maintained.
+ *
+ * This bridge DOES NOT keep the uncached channel coherent with the cached
+ * channel. Uncached requests to blocks cached by the L1 will not probe the L1.
+ * As a result, uncached reads to cached blocks will get stale data until
+ * the L1 performs a voluntary writeback, and uncached writes to cached blocks
+ * will get lost, as the voluntary writeback from the L1 will overwrite the
+ * changes. If your tile relies on probing the L1 data cache in order to
+ * share data between the instruction cache and data cache (e.g. you are using
+ * a non-blocking L1 D$) or if the tile has uncached channels capable of
+ * writes (e.g. Hwacha and other RoCC accelerators), DO NOT USE THIS BRIDGE.
+ */
+
+class ManagerToClientStatelessBridge(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) {
+ val icid = io.inner.tlClientIdBits
+ val ixid = io.inner.tlClientXactIdBits
+ val oxid = io.outer.tlClientXactIdBits
+
+ val innerCoh = io.inner.tlCoh.getClass
+ val outerCoh = io.outer.tlCoh.getClass
+
+ // Stateless Bridge is only usable in certain constrained situations.
+ // Sanity check its usage here.
+
+ require(io.inner.tlNCachingClients <= 1)
+ require(icid + ixid <= oxid)
+ require(innerCoh eq outerCoh,
+ s"Coherence policies do not match: inner is ${innerCoh.getSimpleName}, outer is ${outerCoh.getSimpleName}")
+
+ io.outer.acquire.valid := io.inner.acquire.valid
+ io.inner.acquire.ready := io.outer.acquire.ready
+ io.outer.acquire.bits := io.inner.acquire.bits
+ io.outer.acquire.bits.client_xact_id := Cat(io.inner.acquire.bits.client_id, io.inner.acquire.bits.client_xact_id)
+
+ io.outer.release.valid := io.inner.release.valid
+ io.inner.release.ready := io.outer.release.ready
+ io.outer.release.bits := io.inner.release.bits
+ io.outer.release.bits.client_xact_id := Cat(io.inner.release.bits.client_id, io.inner.release.bits.client_xact_id)
+
+ io.inner.grant.valid := io.outer.grant.valid
+ io.outer.grant.ready := io.inner.grant.ready
+ io.inner.grant.bits := io.outer.grant.bits
+ io.inner.grant.bits.client_xact_id := io.outer.grant.bits.client_xact_id(ixid-1, 0)
+ io.inner.grant.bits.client_id := io.outer.grant.bits.client_xact_id(icid+ixid-1, ixid)
+
+ io.inner.probe.valid := Bool(false)
+ io.inner.finish.ready := Bool(true)
+
+ disconnectOuterProbeAndFinish()
+}
diff --git a/uncore/src/main/scala/agents/StoreDataQueue.scala b/uncore/src/main/scala/agents/StoreDataQueue.scala
new file mode 100644
index 00000000..e2079772
--- /dev/null
+++ b/uncore/src/main/scala/agents/StoreDataQueue.scala
@@ -0,0 +1,119 @@
+// See LICENSE for license details.
+
+package uncore.agents
+import Chisel._
+import uncore.tilelink._
+import cde.{Parameters, Field}
+
+case object L2StoreDataQueueDepth extends Field[Int]
+
+trait HasStoreDataQueueParameters extends HasCoherenceAgentParameters {
+ val sdqDepth = p(L2StoreDataQueueDepth)*innerDataBeats
+ val dqIdxBits = math.max(log2Up(nReleaseTransactors) + 1, log2Up(sdqDepth))
+ val nDataQueueLocations = 3 //Stores, VoluntaryWBs, Releases
+}
+
+class DataQueueLocation(implicit p: Parameters) extends CoherenceAgentBundle()(p)
+ with HasStoreDataQueueParameters {
+ val idx = UInt(width = dqIdxBits)
+ val loc = UInt(width = log2Ceil(nDataQueueLocations))
+}
+
+object DataQueueLocation {
+ def apply(idx: UInt, loc: UInt)(implicit p: Parameters) = {
+ val d = Wire(new DataQueueLocation)
+ d.idx := idx
+ d.loc := loc
+ d
+ }
+}
+
+trait HasStoreDataQueue extends HasStoreDataQueueParameters {
+ val io: HierarchicalTLIO
+ val trackerIOsList: Seq[HierarchicalXactTrackerIO]
+
+ val internalDataBits = new DataQueueLocation().getWidth
+ val inStoreQueue :: inVolWBQueue :: inClientReleaseQueue :: Nil = Enum(UInt(), nDataQueueLocations)
+
+ val usingStoreDataQueue = p.alterPartial({
+ case TLKey(`innerTLId`) => innerTLParams.copy(overrideDataBitsPerBeat = Some(internalDataBits))
+ case TLKey(`outerTLId`) => outerTLParams.copy(overrideDataBitsPerBeat = Some(internalDataBits))
+ })
+
+ // Queue to store impending Put data
+ lazy val sdq = Reg(Vec(sdqDepth, io.iacq().data))
+ lazy val sdq_val = Reg(init=Bits(0, sdqDepth))
+ lazy val sdq_alloc_id = PriorityEncoder(~sdq_val)
+ lazy val sdq_rdy = !sdq_val.andR
+ lazy val sdq_enq = trackerIOsList.map( t =>
+ (t.alloc.iacq.should || t.alloc.iacq.matches) &&
+ t.inner.acquire.fire() &&
+ t.iacq().hasData()
+ ).reduce(_||_)
+
+ lazy val sdqLoc = List.fill(nTransactors) {
+ DataQueueLocation(sdq_alloc_id, inStoreQueue).toBits
+ }
+
+ /*
+ doInputRoutingWithAllocation(
+ in = io.inner.acquire,
+ outs = trackerList.map(_.io.inner.acquire),
+ allocs = trackerList.map(_.io.alloc._iacq),
+ dataOverride = Some(sdqLoc),
+ allocOverride = Some(sdq_rdy && !irel_vs_iacq_conflict))
+ */
+
+ // Queue to store impending Voluntary Release data
+ lazy val voluntary = io.irel().isVoluntary()
+ lazy val vwbdq_enq = io.inner.release.fire() && voluntary && io.irel().hasData()
+ lazy val (rel_data_cnt, rel_data_done) = Counter(vwbdq_enq, innerDataBeats) //TODO Zero width
+ lazy val vwbdq = Reg(Vec(innerDataBeats, io.irel().data)) //TODO Assumes nReleaseTransactors == 1
+
+
+ lazy val vwbqLoc = (0 until nTransactors).map(i =>
+ (DataQueueLocation(rel_data_cnt,
+ (if(i < nReleaseTransactors) inVolWBQueue
+ else inClientReleaseQueue)).toBits))
+ /*
+ doInputRoutingWithAllocation(
+ io.inner.release,
+ trackerList.map(_.io.inner.release),
+ trackerList.map(_.io.matches.irel),
+ trackerList.map(_.io.alloc.irel),
+ Some(vwbqLoc))
+ */
+
+ val outer_arb: ClientTileLinkIOArbiter
+ lazy val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.data)
+ /*
+ val outer_arb = Module(new ClientTileLinkIOArbiter(trackerList.size)
+ (usingStoreDataQueue.alterPartial({ case TLId => p(OuterTLId) })))
+ outer_arb.io.in <> trackerList
+ */
+ // Get the pending data out of the store data queue
+ lazy val is_in_sdq = outer_data_ptr.loc === inStoreQueue
+ lazy val free_sdq = io.outer.acquire.fire() &&
+ io.outer.acquire.bits.hasData() &&
+ outer_data_ptr.loc === inStoreQueue
+ /*
+ io.outer <> outer_arb.io.out
+ io.outer.acquire.bits.data := MuxLookup(outer_data_ptr.loc, io.irel().data, Array(
+ inStoreQueue -> sdq(outer_data_ptr.idx),
+ inVolWBQueue -> vwbdq(outer_data_ptr.idx)))
+ */
+
+ // Enqueue SDQ data
+ def sdqEnqueue() {
+ when (sdq_enq) { sdq(sdq_alloc_id) := io.iacq().data }
+ when(vwbdq_enq) { vwbdq(rel_data_cnt) := io.irel().data }
+ }
+
+ // Update SDQ valid bits
+ def sdqUpdate() {
+ when (io.outer.acquire.valid || sdq_enq) {
+ sdq_val := sdq_val & ~(UIntToOH(outer_data_ptr.idx) & Fill(sdqDepth, free_sdq)) |
+ PriorityEncoderOH(~sdq_val(sdqDepth-1,0)) & Fill(sdqDepth, sdq_enq)
+ }
+ }
+}
diff --git a/uncore/src/main/scala/agents/Trackers.scala b/uncore/src/main/scala/agents/Trackers.scala
new file mode 100644
index 00000000..80d7a409
--- /dev/null
+++ b/uncore/src/main/scala/agents/Trackers.scala
@@ -0,0 +1,651 @@
+// See LICENSE for license details.
+
+package uncore.agents
+
+import Chisel._
+import uncore.coherence._
+import uncore.tilelink._
+import uncore.util._
+import uncore.Util._
+import junctions._
+import cde.{Field, Parameters}
+import scala.math.max
+
+case object EnableL2Logging extends Field[Boolean]
+
+class TrackerAllocation extends Bundle {
+ val matches = Bool(OUTPUT)
+ val can = Bool(OUTPUT)
+ val should = Bool(INPUT)
+}
+
+class TrackerAllocationIO(implicit val p: Parameters)
+ extends ParameterizedBundle()(p)
+ with HasCacheBlockAddress {
+ val iacq = new TrackerAllocation
+ val irel = new TrackerAllocation
+ val oprb = new TrackerAllocation
+ val idle = Bool(OUTPUT)
+ override val addr_block = UInt(OUTPUT, tlBlockAddrBits)
+}
+
+trait HasTrackerAllocationIO extends Bundle {
+ implicit val p: Parameters
+ val alloc = new TrackerAllocationIO
+}
+
+class ManagerXactTrackerIO(implicit p: Parameters) extends ManagerTLIO()(p)
+ with HasTrackerAllocationIO
+
+class HierarchicalXactTrackerIO(implicit p: Parameters) extends HierarchicalTLIO()(p)
+ with HasTrackerAllocationIO
+
+abstract class XactTracker(implicit p: Parameters) extends CoherenceAgentModule()(p)
+ with HasXactTrackerStates
+ with HasPendingBitHelpers {
+ override val s_idle :: s_meta_read :: s_meta_resp :: s_wb_req :: s_wb_resp :: s_inner_probe :: s_outer_acquire :: s_busy :: s_meta_write :: Nil = Enum(UInt(), 9)
+ val state = Reg(init=s_idle)
+
+ def quiesce(next: UInt = s_idle)(restore: => Unit) {
+ all_pending_done := !scoreboard.foldLeft(Bool(false))(_||_)
+ when(state === s_busy && all_pending_done) {
+ state := next
+ restore
+ }
+ }
+
+ def pinAllReadyValidLow[T <: Data](b: Bundle) {
+ b.elements.foreach {
+ _._2 match {
+ case d: DecoupledIO[_] =>
+ if(d.ready.dir == OUTPUT) d.ready := Bool(false)
+ else if(d.valid.dir == OUTPUT) d.valid := Bool(false)
+ case v: ValidIO[_] => if(v.valid.dir == OUTPUT) v.valid := Bool(false)
+ case b: Bundle => pinAllReadyValidLow(b)
+ case _ =>
+ }
+ }
+ }
+}
+
+trait HasXactTrackerStates {
+ def state: UInt
+ def s_idle: UInt = UInt(0)
+ def s_meta_read: UInt
+ def s_meta_resp: UInt
+ def s_wb_req: UInt
+ def s_wb_resp: UInt
+ def s_inner_probe: UInt
+ def s_outer_acquire: UInt
+ def s_busy: UInt
+ def s_meta_write: UInt
+}
+
+trait HasPendingBitHelpers extends HasDataBeatCounters {
+ val scoreboard = scala.collection.mutable.ListBuffer.empty[Bool]
+ val all_pending_done = Wire(Bool())
+
+ def addPendingBitWhenBeat[T <: HasBeat](inc: Bool, in: T): UInt =
+ Fill(in.tlDataBeats, inc) & UIntToOH(in.addr_beat)
+
+ def dropPendingBitWhenBeat[T <: HasBeat](dec: Bool, in: T): UInt =
+ ~Fill(in.tlDataBeats, dec) | ~UIntToOH(in.addr_beat)
+
+ def addPendingBitWhenId[T <: HasClientId](inc: Bool, in: T): UInt =
+ Fill(in.tlNCachingClients, inc) & UIntToOH(in.client_id)
+
+ def dropPendingBitWhenId[T <: HasClientId](dec: Bool, in: T): UInt =
+ ~Fill(in.tlNCachingClients, dec) | ~UIntToOH(in.client_id)
+
+ def addPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T], inc: Bool = Bool(true)): UInt =
+ addPendingBitWhenBeat(in.fire() && in.bits.hasData() && inc, in.bits)
+
+ def addPendingBitWhenBeatHasDataAndAllocs(
+ in: DecoupledIO[AcquireFromSrc],
+ alloc_override: Bool = Bool(false)): UInt =
+ addPendingBitWhenBeatHasData(in, in.bits.allocate() || alloc_override)
+
+ def addPendingBitWhenBeatNeedsRead(in: DecoupledIO[AcquireFromSrc], inc: Bool = Bool(true)): UInt = {
+ val a = in.bits
+ val needs_read = (a.isGet() || a.isAtomic() || a.hasPartialWritemask()) || inc
+ addPendingBitWhenBeat(in.fire() && needs_read, a)
+ }
+
+ def addPendingBitWhenBeatHasPartialWritemask(in: DecoupledIO[AcquireFromSrc]): UInt =
+ addPendingBitWhenBeat(in.fire() && in.bits.hasPartialWritemask(), in.bits)
+
+ def addPendingBitsFromAcquire(a: SecondaryMissInfo): UInt =
+ Mux(a.hasMultibeatData(), Fill(a.tlDataBeats, UInt(1, 1)), UIntToOH(a.addr_beat))
+
+ def dropPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T]): UInt =
+ dropPendingBitWhenBeat(in.fire() && in.bits.hasData(), in.bits)
+
+ def dropPendingBitAtDest[T <: HasId](in: DecoupledIO[T]): UInt =
+ dropPendingBitWhenId(in.fire(), in.bits)
+
+ def dropPendingBitAtDestWhenVoluntary[T <: HasId with MightBeVoluntary](in: DecoupledIO[T]): UInt =
+ dropPendingBitWhenId(in.fire() && in.bits.isVoluntary(), in.bits)
+
+ def addPendingBitAtSrc[T <: HasId](in: DecoupledIO[T]): UInt =
+ addPendingBitWhenId(in.fire(), in.bits)
+
+ def addPendingBitAtSrcWhenVoluntary[T <: HasId with MightBeVoluntary](in: DecoupledIO[T]): UInt =
+ addPendingBitWhenId(in.fire() && in.bits.isVoluntary(), in.bits)
+
+ def addOtherBits(en: Bool, nBits: Int): UInt =
+ Mux(en, Cat(Fill(nBits - 1, UInt(1, 1)), UInt(0, 1)), UInt(0, nBits))
+
+ def addPendingBitsOnFirstBeat(in: DecoupledIO[Acquire]): UInt =
+ addOtherBits(in.fire() &&
+ in.bits.hasMultibeatData() &&
+ in.bits.addr_beat === UInt(0),
+ in.bits.tlDataBeats)
+
+ def dropPendingBitsOnFirstBeat(in: DecoupledIO[Acquire]): UInt =
+ ~addPendingBitsOnFirstBeat(in)
+}
+
+trait HasDataBuffer extends HasCoherenceAgentParameters {
+ val data_buffer = Reg(init=Vec.fill(innerDataBeats)(UInt(0, width = innerDataBits)))
+
+ type TLDataBundle = TLBundle with HasTileLinkData with HasTileLinkBeatId
+
+ def initDataInner[T <: Acquire](in: DecoupledIO[T], alloc: Bool) {
+ when(in.fire() && in.bits.hasData() && alloc) {
+ data_buffer(in.bits.addr_beat) := in.bits.data
+ }
+ }
+
+ // TODO: provide func for accessing when innerDataBeats =/= outerDataBeats or internalDataBeats
+ def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) {
+ data_buffer(beat) := incoming
+ }
+
+ def mergeDataInner[T <: TLDataBundle](in: DecoupledIO[T]) {
+ when(in.fire() && in.bits.hasData()) {
+ mergeData(innerDataBits)(in.bits.addr_beat, in.bits.data)
+ }
+ }
+
+ def mergeDataOuter[T <: TLDataBundle](in: DecoupledIO[T]) {
+ when(in.fire() && in.bits.hasData()) {
+ mergeData(outerDataBits)(in.bits.addr_beat, in.bits.data)
+ }
+ }
+}
+
+trait HasByteWriteMaskBuffer extends HasDataBuffer {
+ val wmask_buffer = Reg(init=Vec.fill(innerDataBeats)(UInt(0, width = innerWriteMaskBits)))
+
+ override def initDataInner[T <: Acquire](in: DecoupledIO[T], alloc: Bool) {
+ when(in.fire() && in.bits.hasData() && alloc) {
+ val beat = in.bits.addr_beat
+ val full = FillInterleaved(8, in.bits.wmask())
+ data_buffer(beat) := (~full & data_buffer(beat)) | (full & in.bits.data)
+ wmask_buffer(beat) := in.bits.wmask() | wmask_buffer(beat) // assumes wmask_buffer is zeroed
+ }
+ }
+
+ override def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) {
+ val old_data = incoming // Refilled, written back, or de-cached data
+ val new_data = data_buffer(beat) // Newly Put data is already in the buffer
+ val wmask = FillInterleaved(8, wmask_buffer(beat))
+ data_buffer(beat) := ~wmask & old_data | wmask & new_data
+ }
+
+ def clearWmaskBuffer() {
+ wmask_buffer.foreach { w => w := UInt(0) }
+ }
+}
+
+trait HasBlockAddressBuffer extends HasCoherenceAgentParameters {
+ val xact_addr_block = Reg(init = UInt(0, width = blockAddrBits))
+}
+
+
+trait HasAcquireMetadataBuffer extends HasBlockAddressBuffer {
+ val xact_allocate = Reg{ Bool() }
+ val xact_amo_shift_bytes = Reg{ UInt() }
+ val xact_op_code = Reg{ UInt() }
+ val xact_addr_byte = Reg{ UInt() }
+ val xact_op_size = Reg{ UInt() }
+ val xact_addr_beat = Wire(UInt())
+ val xact_iacq = Wire(new SecondaryMissInfo)
+}
+
+trait HasVoluntaryReleaseMetadataBuffer extends HasBlockAddressBuffer
+ with HasPendingBitHelpers
+ with HasXactTrackerStates {
+ def io: HierarchicalXactTrackerIO
+
+ val xact_vol_ir_r_type = Reg{ UInt() }
+ val xact_vol_ir_src = Reg{ UInt() }
+ val xact_vol_ir_client_xact_id = Reg{ UInt() }
+
+ def xact_vol_irel = Release(
+ src = xact_vol_ir_src,
+ voluntary = Bool(true),
+ r_type = xact_vol_ir_r_type,
+ client_xact_id = xact_vol_ir_client_xact_id,
+ addr_block = xact_addr_block)
+ (p.alterPartial({ case TLId => p(InnerTLId) }))
+}
+
+trait AcceptsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer {
+ def inner_coh: ManagerMetadata
+
+ val pending_irel_data = Reg(init=Bits(0, width = innerDataBeats))
+ val vol_ignt_counter = Wire(new TwoWayBeatCounterStatus)
+
+ def irel_can_merge: Bool
+ def irel_same_xact: Bool
+ def irel_is_allocating: Bool = state === s_idle && io.alloc.irel.should && io.inner.release.valid
+ def irel_is_merging: Bool = (irel_can_merge || irel_same_xact) && io.inner.release.valid
+
+ def innerRelease(block_vol_ignt: Bool = Bool(false), next: UInt = s_busy) {
+ connectTwoWayBeatCounters(
+ status = vol_ignt_counter,
+ up = io.inner.release,
+ down = io.inner.grant,
+ trackUp = (r: Release) => {
+ Mux(state === s_idle, io.alloc.irel.should, io.alloc.irel.matches) && r.isVoluntary() && r.requiresAck()
+ },
+ trackDown = (g: Grant) => (state =/= s_idle) && g.isVoluntary())
+
+
+ when(irel_is_allocating) {
+ xact_addr_block := io.irel().addr_block
+ // Set all of them to pending in the beginning as a precaution
+ // If it turns out we don't need some or all of the beats, they will
+ // be overridden below
+ pending_irel_data := ~UInt(0, innerDataBeats)
+ state := next
+ }
+
+ val irel_fire = (irel_is_allocating || irel_is_merging) && io.inner.release.ready
+ when (irel_fire) {
+ when (io.irel().first()) {
+ when (io.irel().isVoluntary()) {
+ xact_vol_ir_r_type := io.irel().r_type
+ xact_vol_ir_src := io.irel().client_id
+ xact_vol_ir_client_xact_id := io.irel().client_xact_id
+ }
+ // If this release has data, set all the pending bits except the first.
+ // Otherwise, clear all the pending bits
+ pending_irel_data := Mux(io.irel().hasMultibeatData(),
+ dropPendingBitWhenBeatHasData(io.inner.release),
+ UInt(0))
+ } .otherwise {
+ pending_irel_data := (pending_irel_data & dropPendingBitWhenBeatHasData(io.inner.release))
+ }
+ if (p(EnableL2Logging)) {
+ when (io.irel().hasData()) {
+ printf("[release] addr_block=%x addr_beat=%d data=%x\n",
+ io.irel().addr_block, io.irel().addr_beat, io.irel().data)
+ }
+ }
+ }
+
+ io.inner.grant.valid := state.isOneOf(s_wb_req, s_wb_resp, s_inner_probe, s_busy) &&
+ vol_ignt_counter.pending &&
+ !(pending_irel_data.orR || block_vol_ignt)
+
+ io.inner.grant.bits := inner_coh.makeGrant(xact_vol_irel)
+
+ scoreboard += (pending_irel_data.orR, vol_ignt_counter.pending)
+ }
+
+}
+
+trait EmitsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer {
+ val pending_orel_send = Reg(init=Bool(false))
+ val pending_orel_data = Reg(init=Bits(0, width = innerDataBeats))
+ val vol_ognt_counter = Wire(new TwoWayBeatCounterStatus)
+ val pending_orel = pending_orel_send || pending_orel_data.orR || vol_ognt_counter.pending
+ val sending_orel = Reg(init = Bool(false))
+
+ // Block acceptance of inner releases if we have already started sending
+ // outer releases, but have not yet sent out the beat corresponding to the
+ // inner release. This function must be included in io.inner.release.ready
+ // if it is possible to start accepting a new inner release as the previous
+ // outer release is still being sent. DO NOT include this in the
+ // io.inner.release.ready if the releases are not buffered
+ // (i.e. io.inner.release and io.outer.release combinationally linked).
+ def blockInnerRelease(rel: ReleaseMetadata = io.irel()): Bool = {
+ val waiting_to_send = sending_orel && pending_orel_data(rel.addr_beat)
+ val sending_now = io.outer.release.fire() && rel.addr_beat === io.orel().addr_beat
+ rel.hasData() && (waiting_to_send || sending_now)
+ }
+
+ def outerRelease(
+ coh: ClientMetadata,
+ buffering: Bool = Bool(true),
+ data: UInt = io.irel().data,
+ add_pending_data_bits: UInt = UInt(0),
+ add_pending_send_bit: Bool = Bool(false),
+ block_orel: Bool = Bool(false)) {
+
+ when (state =/= s_idle || io.alloc.irel.should) {
+ pending_orel_data := (pending_orel_data |
+ addPendingBitWhenBeatHasData(io.inner.release) |
+ add_pending_data_bits) &
+ dropPendingBitWhenBeatHasData(io.outer.release)
+ }
+ when (add_pending_send_bit) { pending_orel_send := Bool(true) }
+ when (io.outer.release.fire()) {
+ when (io.outer.release.bits.first()) { sending_orel := Bool(true) }
+ when (io.outer.release.bits.last()) { sending_orel := Bool(false) }
+ pending_orel_send := Bool(false)
+ }
+
+ connectTwoWayBeatCounters(
+ status = vol_ognt_counter,
+ up = io.outer.release,
+ down = io.outer.grant,
+ trackUp = (r: Release) => r.isVoluntary() && r.requiresAck(),
+ trackDown = (g: Grant) => g.isVoluntary())
+
+ io.outer.release.valid := !block_orel && Mux(buffering,
+ (state === s_busy) && Mux(io.orel().hasData(),
+ pending_orel_data(vol_ognt_counter.up.idx),
+ pending_orel_send),
+ // only writebacks need to be forwarded to the outer interface
+ state =/= s_idle && io.alloc.irel.matches &&
+ io.irel().hasData() && io.inner.release.valid)
+
+ io.outer.release.bits := coh.makeVoluntaryWriteback(
+ client_xact_id = UInt(0), // TODO was tracker id, but not needed?
+ addr_block = xact_addr_block,
+ addr_beat = vol_ognt_counter.up.idx,
+ data = data)
+
+ when (vol_ognt_counter.pending) { io.outer.grant.ready := Bool(true) }
+
+ scoreboard += (pending_orel, vol_ognt_counter.pending)
+ }
+}
+
+trait EmitsInnerProbes extends HasBlockAddressBuffer
+ with HasXactTrackerStates
+ with HasPendingBitHelpers {
+ def io: HierarchicalXactTrackerIO
+
+ val needs_probes = (innerNCachingClients > 0)
+ val pending_iprbs = Reg(UInt(width = max(innerNCachingClients, 1)))
+ val curr_probe_dst = PriorityEncoder(pending_iprbs)
+
+ def full_representation: UInt
+ def initializeProbes() {
+ if (needs_probes)
+ pending_iprbs := full_representation & ~io.incoherent.toBits
+ else
+ pending_iprbs := UInt(0)
+ }
+ def irel_same_xact = io.irel().conflicts(xact_addr_block) &&
+ !io.irel().isVoluntary() &&
+ state === s_inner_probe
+
+ def innerProbe(prb: Probe, next: UInt) {
+ if (needs_probes) {
+ val irel_counter = Wire(new TwoWayBeatCounterStatus)
+
+ pending_iprbs := pending_iprbs & dropPendingBitAtDest(io.inner.probe)
+ io.inner.probe.valid := state === s_inner_probe && pending_iprbs.orR
+ io.inner.probe.bits := prb
+
+ connectTwoWayBeatCounters(
+ status = irel_counter,
+ up = io.inner.probe,
+ down = io.inner.release,
+ max = innerNCachingClients,
+ trackDown = (r: Release) => (state =/= s_idle) && !r.isVoluntary())
+
+ when(state === s_inner_probe && !(pending_iprbs.orR || irel_counter.pending)) {
+ state := next
+ }
+ } else {
+ when (state === s_inner_probe) { state := next }
+ }
+
+ //N.B. no pending bits added to scoreboard because all handled in s_inner_probe
+ }
+}
+
+trait RoutesInParent extends HasBlockAddressBuffer
+ with HasXactTrackerStates {
+ def io: HierarchicalXactTrackerIO
+ type AddrComparison = HasCacheBlockAddress => Bool
+ def exactAddrMatch(a: HasCacheBlockAddress): Bool = a.conflicts(xact_addr_block)
+ def routeInParent(iacqMatches: AddrComparison = exactAddrMatch,
+ irelMatches: AddrComparison = exactAddrMatch,
+ oprbMatches: AddrComparison = exactAddrMatch,
+ iacqCanAlloc: Bool = Bool(false),
+ irelCanAlloc: Bool = Bool(false),
+ oprbCanAlloc: Bool = Bool(false)) {
+ io.alloc.iacq.matches := (state =/= s_idle) && iacqMatches(io.iacq())
+ io.alloc.irel.matches := (state =/= s_idle) && irelMatches(io.irel())
+ io.alloc.oprb.matches := (state =/= s_idle) && oprbMatches(io.oprb())
+ io.alloc.iacq.can := state === s_idle && iacqCanAlloc
+ io.alloc.irel.can := state === s_idle && irelCanAlloc
+ io.alloc.oprb.can := state === s_idle && oprbCanAlloc
+ io.alloc.addr_block := xact_addr_block
+ io.alloc.idle := state === s_idle
+ }
+}
+
+trait AcceptsInnerAcquires extends HasAcquireMetadataBuffer
+ with AcceptsVoluntaryReleases
+ with HasXactTrackerStates
+ with HasPendingBitHelpers {
+ def io: HierarchicalXactTrackerIO
+ def nSecondaryMisses: Int
+ def alwaysWriteFullBeat: Boolean
+ def inner_coh: ManagerMetadata
+ def trackerId: Int
+
+ // Secondary miss queue holds transaction metadata used to make grants
+ lazy val ignt_q = Module(new Queue(
+ new SecondaryMissInfo()(p.alterPartial({ case TLId => p(InnerTLId) })),
+ 1 + nSecondaryMisses))
+
+ val pending_ignt = Wire(Bool())
+ val ignt_data_idx = Wire(UInt())
+ val ignt_data_done = Wire(Bool())
+ val ifin_counter = Wire(new TwoWayBeatCounterStatus)
+ val pending_put_data = Reg(init=Bits(0, width = innerDataBeats))
+ val pending_ignt_data = Reg(init=Bits(0, width = innerDataBeats))
+
+ def iacq_same_xact: Bool =
+ (xact_iacq.client_xact_id === io.iacq().client_xact_id) &&
+ (xact_iacq.client_id === io.iacq().client_id) &&
+ pending_ignt
+ def iacq_same_xact_multibeat = iacq_same_xact && io.iacq().hasMultibeatData()
+ def iacq_can_merge: Bool
+ def iacq_is_allocating: Bool = state === s_idle && io.alloc.iacq.should && io.inner.acquire.valid
+ def iacq_is_merging: Bool = (iacq_can_merge || iacq_same_xact) && io.inner.acquire.valid
+
+ def innerAcquire(can_alloc: Bool, next: UInt) {
+ val iacq_matches_head = iacq_same_xact && xact_iacq.addr_beat === io.iacq().addr_beat
+
+ // Enqueue some metadata information that we'll use to make coherence updates with later
+ ignt_q.io.enq.valid := iacq_is_allocating ||
+ (!iacq_matches_head && pending_ignt &&
+ io.inner.acquire.fire() && io.iacq().first())
+ ignt_q.io.enq.bits := io.iacq()
+
+ // Use the outputs of the queue to make further messages
+ xact_iacq := Mux(ignt_q.io.deq.valid, ignt_q.io.deq.bits, ignt_q.io.enq.bits)
+ xact_addr_beat := xact_iacq.addr_beat
+ pending_ignt := ignt_q.io.count > UInt(0)
+
+ // Track whether any beats are missing from a PutBlock
+ when (state =/= s_idle || io.alloc.iacq.should) {
+ pending_put_data := (pending_put_data &
+ dropPendingBitWhenBeatHasData(io.inner.acquire)) |
+ addPendingBitsOnFirstBeat(io.inner.acquire)
+ }
+
+ // Intialize transaction metadata for accepted Acquire
+ when(iacq_is_allocating) {
+ xact_addr_block := io.iacq().addr_block
+ xact_allocate := io.iacq().allocate() && can_alloc
+ xact_amo_shift_bytes := io.iacq().amo_shift_bytes()
+ xact_op_code := io.iacq().op_code()
+ xact_addr_byte := io.iacq().addr_byte()
+ xact_op_size := io.iacq().op_size()
+ // Make sure to collect all data from a PutBlock
+ pending_put_data := Mux(
+ io.iacq().isBuiltInType(Acquire.putBlockType),
+ dropPendingBitWhenBeatHasData(io.inner.acquire),
+ UInt(0))
+ pending_ignt_data := UInt(0)
+ state := next
+ }
+
+ scoreboard += (pending_put_data.orR)
+ }
+
+ def innerGrant(
+ data: UInt = io.ognt().data,
+ external_pending: Bool = Bool(false),
+ buffering: Bool = Bool(true),
+ add_pending_bits: UInt = UInt(0)) {
+ // Track the number of outstanding inner.finishes
+ connectTwoWayBeatCounters(
+ status = ifin_counter,
+ up = io.inner.grant,
+ down = io.inner.finish,
+ max = nSecondaryMisses,
+ trackUp = (g: Grant) => g.requiresAck())
+
+ // Track which beats are ready for response
+ when(!iacq_is_allocating) {
+ pending_ignt_data := (pending_ignt_data & dropPendingBitWhenBeatHasData(io.inner.grant)) |
+ addPendingBitWhenBeatHasData(io.inner.release) |
+ addPendingBitWhenBeatHasData(io.outer.grant) |
+ add_pending_bits
+ }
+
+ if (p(EnableL2Logging)) {
+ when (io.inner.grant.fire() && io.ignt().hasData()) {
+ printf("[get] addr_block=%x addr_beat=%d data=%x\n",
+ xact_addr_block, io.ignt().addr_beat, io.ignt().data)
+ }
+ }
+
+ // Have we finished receiving the complete inner acquire transaction?
+ val iacq_finished = !(state === s_idle ||
+ state === s_meta_read ||
+ pending_put_data.orR)
+
+ val ignt_from_iacq = inner_coh.makeGrant(
+ sec = ignt_q.io.deq.bits,
+ manager_xact_id = UInt(trackerId),
+ data = data)
+
+ // Make the Grant message using the data stored in the secondary miss queue
+ val (cnt, done) = connectOutgoingDataBeatCounter(io.inner.grant, ignt_q.io.deq.bits.addr_beat)
+ ignt_data_idx := cnt
+ ignt_data_done := done
+ ignt_q.io.deq.ready := Bool(false)
+ when(!vol_ignt_counter.pending) {
+ ignt_q.io.deq.ready := ignt_data_done
+ io.inner.grant.bits := ignt_from_iacq
+ io.inner.grant.bits.addr_beat := ignt_data_idx // override based on outgoing counter
+ when (state === s_busy && pending_ignt) {
+ io.inner.grant.valid := !external_pending &&
+ Mux(io.ignt().hasData(),
+ Mux(buffering,
+ pending_ignt_data(ignt_data_idx),
+ io.outer.grant.valid),
+ iacq_finished)
+ }
+ }
+
+ // We must wait for as many Finishes as we sent Grants
+ io.inner.finish.ready := state === s_busy
+
+ scoreboard += (pending_ignt, ifin_counter.pending)
+ }
+
+}
+
+trait EmitsOuterAcquires extends AcceptsInnerAcquires {
+ val ognt_counter = Wire(new TwoWayBeatCounterStatus)
+
+ // Handle misses or coherence permission upgrades by initiating a new transaction in the outer memory:
+ //
+ // If we're allocating in this cache, we can use the current metadata
+ // to make an appropriate custom Acquire, otherwise we copy over the
+ // built-in Acquire from the inner TL to the outer TL
+ def outerAcquire(
+ caching: Bool,
+ coh: ClientMetadata,
+ block_outer_acquire: Bool = Bool(false),
+ buffering: Bool = Bool(true),
+ data: UInt = io.iacq().data,
+ wmask: UInt = io.iacq().wmask(),
+ next: UInt = s_busy) {
+
+ // Tracks outstanding Acquires, waiting for their matching Grant.
+ connectTwoWayBeatCounters(
+ status = ognt_counter,
+ up = io.outer.acquire,
+ down = io.outer.grant,
+ beat = xact_addr_beat,
+ trackDown = (g: Grant) => !g.isVoluntary())
+
+ io.outer.acquire.valid :=
+ state === s_outer_acquire && !block_outer_acquire &&
+ (xact_allocate ||
+ Mux(buffering,
+ !pending_put_data(ognt_counter.up.idx),
+ // If not buffering, we should only send an outer acquire if
+ // the ignt_q is not empty (pending_ignt) and the enqueued
+ // transaction does not have data or we are receiving the
+ // inner acquire and it is the same transaction as the one enqueued.
+ pending_ignt && (!xact_iacq.hasData() ||
+ (io.inner.acquire.valid && iacq_same_xact))))
+
+ io.outer.acquire.bits :=
+ Mux(caching,
+ coh.makeAcquire(
+ op_code = xact_op_code,
+ client_xact_id = UInt(0),
+ addr_block = xact_addr_block),
+ BuiltInAcquireBuilder(
+ a_type = xact_iacq.a_type,
+ client_xact_id = UInt(0),
+ addr_block = xact_addr_block,
+ addr_beat = ognt_counter.up.idx,
+ data = data,
+ addr_byte = xact_addr_byte,
+ operand_size = xact_op_size,
+ opcode = xact_op_code,
+ wmask = wmask,
+ alloc = Bool(false))
+ (p.alterPartial({ case TLId => p(OuterTLId)})))
+
+ when(state === s_outer_acquire && ognt_counter.up.done) { state := next }
+
+ when (ognt_counter.pending) { io.outer.grant.ready := Bool(true) }
+
+ scoreboard += ognt_counter.pending
+ }
+}
+
+abstract class VoluntaryReleaseTracker(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p)
+ with AcceptsVoluntaryReleases
+ with RoutesInParent {
+ def irel_can_merge = Bool(false)
+ def irel_same_xact = io.irel().conflicts(xact_addr_block) &&
+ io.irel().isVoluntary() &&
+ pending_irel_data.orR
+}
+
+abstract class AcquireTracker(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p)
+ with AcceptsInnerAcquires
+ with EmitsOuterAcquires
+ with EmitsInnerProbes
+ with RoutesInParent {
+}
diff --git a/uncore/src/main/scala/coherence/Directory.scala b/uncore/src/main/scala/coherence/Directory.scala
new file mode 100644
index 00000000..86e4fde5
--- /dev/null
+++ b/uncore/src/main/scala/coherence/Directory.scala
@@ -0,0 +1,43 @@
+// See LICENSE for license details.
+
+package uncore.coherence
+import Chisel._
+
+// This class encapsulates transformations on different directory information
+// storage formats
+abstract class DirectoryRepresentation(val width: Int) {
+ def pop(prev: UInt, id: UInt): UInt
+ def push(prev: UInt, id: UInt): UInt
+ def flush: UInt
+ def none(s: UInt): Bool
+ def one(s: UInt): Bool
+ def count(s: UInt): UInt
+ def next(s: UInt): UInt
+ def full(s: UInt): UInt
+}
+
+abstract trait HasDirectoryRepresentation {
+ val dir: DirectoryRepresentation
+}
+
+class NullRepresentation(nClients: Int) extends DirectoryRepresentation(1) {
+ def pop(prev: UInt, id: UInt) = UInt(0)
+ def push(prev: UInt, id: UInt) = UInt(0)
+ def flush = UInt(0)
+ def none(s: UInt) = Bool(false)
+ def one(s: UInt) = Bool(false)
+ def count(s: UInt) = UInt(nClients)
+ def next(s: UInt) = UInt(0)
+ def full(s: UInt) = SInt(-1, width = nClients).toUInt
+}
+
+class FullRepresentation(nClients: Int) extends DirectoryRepresentation(nClients) {
+ def pop(prev: UInt, id: UInt) = prev & ~UIntToOH(id)
+ def push(prev: UInt, id: UInt) = prev | UIntToOH(id)
+ def flush = UInt(0, width = width)
+ def none(s: UInt) = s === UInt(0)
+ def one(s: UInt) = PopCount(s) === UInt(1)
+ def count(s: UInt) = PopCount(s)
+ def next(s: UInt) = PriorityEncoder(s)
+ def full(s: UInt) = s
+}
diff --git a/uncore/src/main/scala/coherence/Metadata.scala b/uncore/src/main/scala/coherence/Metadata.scala
new file mode 100644
index 00000000..c0d7a6bf
--- /dev/null
+++ b/uncore/src/main/scala/coherence/Metadata.scala
@@ -0,0 +1,344 @@
+// See LICENSE for license details.
+
+package uncore.coherence
+
+import Chisel._
+import uncore.tilelink._
+import uncore.constants._
+import cde.{Parameters, Field}
+
+/** Identifies the TLId of the inner network in a hierarchical cache controller */
+case object InnerTLId extends Field[String]
+/** Identifies the TLId of the outer network in a hierarchical cache controller */
+case object OuterTLId extends Field[String]
+
+/** Base class to represent coherence information in clients and managers */
+abstract class CoherenceMetadata(implicit p: Parameters) extends TLBundle()(p) {
+ val co = tlCoh
+}
+
+/** Stores the client-side coherence information,
+ * such as permissions on the data and whether the data is dirty.
+ * Its API can be used to make TileLink messages in response to
+ * memory operations or [[uncore.Probe]] messages.
+ */
+class ClientMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) {
+ /** Actual state information stored in this bundle */
+ val state = UInt(width = co.clientStateWidth)
+
+ /** Metadata equality */
+ def ===(rhs: ClientMetadata): Bool = this.state === rhs.state
+ def =/=(rhs: ClientMetadata): Bool = !this.===(rhs)
+
+ /** Is the block's data present in this cache */
+ def isValid(dummy: Int = 0): Bool = co.isValid(this)
+ /** Does this cache have permissions on this block sufficient to perform op */
+ def isHit(op_code: UInt): Bool = co.isHit(op_code, this)
+ /** Does this cache lack permissions on this block sufficient to perform op */
+ def isMiss(op_code: UInt): Bool = !co.isHit(op_code, this)
+ /** Does a secondary miss on the block require another Acquire message */
+ def requiresAcquireOnSecondaryMiss(first_op: UInt, second_op: UInt): Bool =
+ co.requiresAcquireOnSecondaryMiss(first_op, second_op, this)
+ /** Does op require a Release to be made to outer memory */
+ def requiresReleaseOnCacheControl(op_code: UInt): Bool =
+ co.requiresReleaseOnCacheControl(op_code: UInt, this)
+ /** Does an eviction require a Release to be made to outer memory */
+ def requiresVoluntaryWriteback(dummy: Int = 0): Bool =
+ co.requiresReleaseOnCacheControl(M_FLUSH, this)
+
+ /** Constructs an Acquire message based on this metdata and a memory operation
+ *
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
+ */
+ def makeAcquire(
+ op_code: UInt,
+ client_xact_id: UInt,
+ addr_block: UInt): Acquire = {
+ Acquire(
+ is_builtin_type = Bool(false),
+ a_type = co.getAcquireType(op_code, this),
+ client_xact_id = client_xact_id,
+ addr_block = addr_block,
+ union = Cat(op_code, Bool(true)))(p)
+ }
+
+ /** Constructs a Release message based on this metadata on cache control op
+ *
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ * @param addr_beat sub-block address (which beat)
+ * @param data data being written back
+ */
+ def makeVoluntaryRelease(
+ op_code: UInt,
+ client_xact_id: UInt,
+ addr_block: UInt,
+ addr_beat: UInt = UInt(0),
+ data: UInt = UInt(0)): Release =
+ Release(
+ voluntary = Bool(true),
+ r_type = co.getReleaseType(op_code, this),
+ client_xact_id = client_xact_id,
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ data = data)(p)
+
+ /** Constructs a Release message based on this metadata on an eviction
+ *
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ * @param addr_beat sub-block address (which beat)
+ * @param data data being written back
+ */
+ def makeVoluntaryWriteback(
+ client_xact_id: UInt,
+ addr_block: UInt,
+ addr_beat: UInt = UInt(0),
+ data: UInt = UInt(0)): Release =
+ makeVoluntaryRelease(
+ op_code = M_FLUSH,
+ client_xact_id = client_xact_id,
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ data = data)
+
+ /** Constructs a Release message based on this metadata and a [[uncore.Probe]]
+ *
+ * @param the incoming [[uncore.Probe]]
+ * @param addr_beat sub-block address (which beat)
+ * @param data data being released
+ */
+ def makeRelease(
+ prb: Probe,
+ addr_beat: UInt = UInt(0),
+ data: UInt = UInt(0)): Release =
+ Release(
+ voluntary = Bool(false),
+ r_type = co.getReleaseType(prb, this),
+ client_xact_id = UInt(0),
+ addr_block = prb.addr_block,
+ addr_beat = addr_beat,
+ data = data)(p)
+
+ /** New metadata after receiving a [[uncore.Grant]]
+ *
+ * @param incoming the incoming [[uncore.Grant]]
+ * @param pending the mem op that triggered this transaction
+ */
+ def onGrant(incoming: Grant, pending: UInt): ClientMetadata =
+ co.clientMetadataOnGrant(incoming, pending, this)
+
+ /** New metadata after receiving a [[uncore.Probe]]
+ *
+ * @param incoming the incoming [[uncore.Probe]]
+ */
+ def onProbe(incoming: Probe): ClientMetadata =
+ co.clientMetadataOnProbe(incoming, this)
+
+ /** New metadata after a op_code hits this block
+ *
+ * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
+ */
+ def onHit(op_code: UInt): ClientMetadata =
+ co.clientMetadataOnHit(op_code, this)
+
+ /** New metadata after op_code releases permissions on this block
+ *
+ * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
+ */
+ def onCacheControl(op_code: UInt): ClientMetadata =
+ co.clientMetadataOnCacheControl(op_code, this)
+}
+
+/** Factories for ClientMetadata, including on reset */
+object ClientMetadata {
+ def apply(state: UInt)(implicit p: Parameters) = {
+ val meta = Wire(new ClientMetadata)
+ meta.state := state
+ meta
+ }
+ def onReset(implicit p: Parameters) = ClientMetadata(UInt(0))(p) // TODO: assumes clientInvalid === 0
+}
+
+/** Stores manager-side information about the status
+ * of a cache block, including whether it has any known sharers.
+ *
+ * Its API can be used to create [[uncore.Probe]] and [[uncore.Grant]] messages.
+ */
+class ManagerMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) {
+ // Currently no coherence policies assume manager-side state information
+ // val state = UInt(width = co.masterStateWidth) TODO: Fix 0-width wires in Chisel
+
+ /** The directory information for this block */
+ val sharers = UInt(width = co.dir.width)
+
+ /** Metadata equality */
+ def ===(rhs: ManagerMetadata): Bool = //this.state === rhs.state && TODO: Fix 0-width wires in Chisel
+ this.sharers === rhs.sharers
+ def =/=(rhs: ManagerMetadata): Bool = !this.===(rhs)
+
+ /** Converts the directory info into an N-hot sharer bitvector (i.e. full representation) */
+ def full(dummy: Int = 0): UInt = co.dir.full(this.sharers)
+
+ /** Does this [[uncore.Acquire]] require [[uncore.Probe Probes]] to be sent */
+ def requiresProbes(acq: HasAcquireType): Bool = co.requiresProbes(acq, this)
+ /** Does this memory op require [[uncore.Probe Probes]] to be sent */
+ def requiresProbes(op_code: UInt): Bool = co.requiresProbes(op_code, this)
+ /** Does an eviction require [[uncore.Probe Probes]] to be sent */
+ def requiresProbesOnVoluntaryWriteback(dummy: Int = 0): Bool =
+ co.requiresProbes(M_FLUSH, this)
+
+ /** Construct an appropriate [[uncore.ProbeToDst]] for a given [[uncore.Acquire]]
+ *
+ * @param dst Destination client id for this Probe
+ * @param acq Acquire message triggering this Probe
+ * @param addr_block address of the cache block being probed
+ */
+ def makeProbe(dst: UInt, acq: HasAcquireType, addr_block: UInt): ProbeToDst =
+ Probe(dst, co.getProbeType(acq, this), addr_block)(p)
+
+ /** Construct an appropriate [[uncore.ProbeToDst]] for a given [[uncore.Acquire]]
+ *
+ * @param dst Destination client id for this Probe
+ * @param acq Acquire message triggering this Probe
+ */
+ def makeProbe(dst: UInt, acq: AcquireMetadata): ProbeToDst =
+ Probe(dst, co.getProbeType(acq, this), acq.addr_block)(p)
+
+ /** Construct an appropriate [[uncore.ProbeToDst]] for a given mem op
+ *
+ * @param dst Destination client id for this Probe
+ * @param op_code memory operation triggering this Probe
+ * @param addr_block address of the cache block being probed
+ */
+ def makeProbe(dst: UInt, op_code: UInt, addr_block: UInt): ProbeToDst =
+ Probe(dst, co.getProbeType(op_code, this), addr_block)(p)
+
+ /** Construct an appropriate [[uncore.ProbeToDst]] for an eviction
+ *
+ * @param dst Destination client id for this Probe
+ * @param addr_block address of the cache block being probed prior to eviction
+ */
+ def makeProbeForVoluntaryWriteback(dst: UInt, addr_block: UInt): ProbeToDst =
+ makeProbe(dst, M_FLUSH, addr_block)
+
+ /** Construct an appropriate [[uncore.GrantToDst]] to acknowledge an [[uncore.Release]]
+ *
+ * @param rel Release message being acknowledged by this Grant
+ */
+ def makeGrant(rel: ReleaseMetadata with HasClientId): GrantToDst =
+ Grant(
+ dst = rel.client_id,
+ is_builtin_type = Bool(true),
+ g_type = Grant.voluntaryAckType,
+ client_xact_id = rel.client_xact_id,
+ manager_xact_id = UInt(0))(p)
+
+ /** Construct an appropriate [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]]
+ *
+ * May contain single or multiple beats of data, or just be a permissions upgrade.
+ *
+ * @param acq Acquire message being responded to by this Grant
+ * @param manager_xact_id manager's transaction id
+ * @param addr_beat beat id of the data
+ * @param data data being refilled to the original requestor
+ */
+ def makeGrant(
+ acq: AcquireMetadata with HasClientId,
+ manager_xact_id: UInt,
+ addr_beat: UInt = UInt(0),
+ data: UInt = UInt(0)): GrantToDst =
+ Grant(
+ dst = acq.client_id,
+ is_builtin_type = acq.isBuiltInType(),
+ g_type = co.getGrantType(acq, this),
+ client_xact_id = acq.client_xact_id,
+ manager_xact_id = manager_xact_id,
+ addr_beat = addr_beat,
+ data = data)(p)
+
+ /** Construct an [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]] with some overrides
+ *
+ * Used to respond to secondary misses merged into this transaction.
+ * May contain single or multiple beats of data.
+ *
+ * @param sec Secondary miss info
+ * @param manager_xact_id manager's transaction id
+ * @param data data being refilled to the original requestor
+ */
+ def makeGrant(
+ sec: SecondaryMissInfo,
+ manager_xact_id: UInt,
+ data: UInt): GrantToDst = {
+ Grant(
+ dst = sec.client_id,
+ is_builtin_type = sec.isBuiltInType(),
+ g_type = co.getGrantType(sec, this),
+ client_xact_id = sec.client_xact_id,
+ manager_xact_id = manager_xact_id,
+ addr_beat = sec.addr_beat,
+ data = data)(p)
+ }
+
+ /** New metadata after receiving a [[uncore.ReleaseFromSrc]]
+ *
+ * @param incoming the incoming [[uncore.ReleaseFromSrc]]
+ */
+ def onRelease(incoming: ReleaseMetadata with HasClientId): ManagerMetadata =
+ co.managerMetadataOnRelease(incoming, incoming.client_id, this)
+
+ /** New metadata after sending a [[uncore.GrantToDst]]
+ *
+ * @param outgoing the outgoing [[uncore.GrantToDst]]
+ */
+ def onGrant(outgoing: GrantMetadata with HasClientId): ManagerMetadata =
+ co.managerMetadataOnGrant(outgoing, outgoing.client_id, this)
+}
+
+/** Factories for ManagerMetadata, including on reset */
+object ManagerMetadata {
+ def apply(sharers: UInt, state: UInt = UInt(width = 0))(implicit p: Parameters) = {
+ val meta = Wire(new ManagerMetadata)
+ //meta.state := state TODO: Fix 0-width wires in Chisel
+ meta.sharers := sharers
+ meta
+ }
+ def apply(implicit p: Parameters) = {
+ val meta = Wire(new ManagerMetadata)
+ //meta.state := UInt(width = 0) TODO: Fix 0-width wires in Chisel
+ meta.sharers := meta.co.dir.flush
+ meta
+ }
+ def onReset(implicit p: Parameters) = ManagerMetadata(p)
+}
+
+/** HierarchicalMetadata is used in a cache in a multi-level memory hierarchy
+ * that is a manager with respect to some inner caches and a client with
+ * respect to some outer cache.
+ *
+ * This class makes use of two different sets of TileLink parameters, which are
+ * applied by contextually mapping [[uncore.TLId]] to one of
+ * [[uncore.InnerTLId]] or [[uncore.OuterTLId]].
+ */
+class HierarchicalMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) {
+ val inner: ManagerMetadata = new ManagerMetadata()(p.alterPartial({case TLId => p(InnerTLId)}))
+ val outer: ClientMetadata = new ClientMetadata()(p.alterPartial({case TLId => p(OuterTLId)}))
+ def ===(rhs: HierarchicalMetadata): Bool =
+ this.inner === rhs.inner && this.outer === rhs.outer
+ def =/=(rhs: HierarchicalMetadata): Bool = !this.===(rhs)
+}
+
+/** Factories for HierarchicalMetadata, including on reset */
+object HierarchicalMetadata {
+ def apply(inner: ManagerMetadata, outer: ClientMetadata)
+ (implicit p: Parameters): HierarchicalMetadata = {
+ val m = Wire(new HierarchicalMetadata)
+ m.inner := inner
+ m.outer := outer
+ m
+ }
+ def onReset(implicit p: Parameters): HierarchicalMetadata =
+ apply(ManagerMetadata.onReset, ClientMetadata.onReset)
+}
diff --git a/uncore/src/main/scala/coherence/Policies.scala b/uncore/src/main/scala/coherence/Policies.scala
new file mode 100644
index 00000000..744b8e7d
--- /dev/null
+++ b/uncore/src/main/scala/coherence/Policies.scala
@@ -0,0 +1,696 @@
+// See LICENSE for license details.
+
+package uncore.coherence
+
+import Chisel._
+import uncore.tilelink._
+import uncore.constants._
+import uncore.Util._
+
+/** The entire CoherencePolicy API consists of the following three traits:
+ * HasCustomTileLinkMessageTypes, used to define custom messages
+ * HasClientSideCoherencePolicy, for client coherence agents
+ * HasManagerSideCoherencePolicy, for manager coherence agents
+ */
+abstract class CoherencePolicy(val dir: DirectoryRepresentation)
+ extends HasCustomTileLinkMessageTypes
+ with HasClientSideCoherencePolicy
+ with HasManagerSideCoherencePolicy
+
+/** This API defines the custom, coherence-policy-defined message types,
+ * as opposed to the built-in ones found in tilelink.scala.
+ * Policies must enumerate the custom messages to be sent over each
+ * channel, as well as which of them have associated data.
+ */
+trait HasCustomTileLinkMessageTypes {
+ val nAcquireTypes: Int
+ def acquireTypeWidth = log2Up(nAcquireTypes)
+ val nProbeTypes: Int
+ def probeTypeWidth = log2Up(nProbeTypes)
+ val nReleaseTypes: Int
+ def releaseTypeWidth = log2Up(nReleaseTypes)
+ val nGrantTypes: Int
+ def grantTypeWidth = log2Up(nGrantTypes)
+
+ val acquireTypesWithData = Nil // Only built-in Acquire types have data for now
+ def releaseTypesWithData: Seq[UInt]
+ def grantTypesWithData: Seq[UInt]
+}
+
+/** This API contains all functions required for client coherence agents.
+ * Policies must enumerate the number of client states and define their
+ * permissions with respect to memory operations. Policies must fill in functions
+ * to control which messages are sent and how metadata is updated in response
+ * to coherence events. These funtions are generally called from within the
+ * ClientMetadata class in metadata.scala
+ */
+trait HasClientSideCoherencePolicy {
+ // Client coherence states and their permissions
+ val nClientStates: Int
+ def clientStateWidth = log2Ceil(nClientStates)
+ def clientStatesWithReadPermission: Seq[UInt]
+ def clientStatesWithWritePermission: Seq[UInt]
+ def clientStatesWithDirtyData: Seq[UInt]
+
+ // Transaction initiation logic
+ def isValid(meta: ClientMetadata): Bool
+ def isHit(cmd: UInt, meta: ClientMetadata): Bool = {
+ Mux(isWriteIntent(cmd),
+ meta.state isOneOf clientStatesWithWritePermission,
+ meta.state isOneOf clientStatesWithReadPermission)
+ }
+ //TODO: Assumes all states with write permissions also have read permissions
+ def requiresAcquireOnSecondaryMiss(
+ first_cmd: UInt,
+ second_cmd: UInt,
+ meta: ClientMetadata): Bool = {
+ isWriteIntent(second_cmd) && !isWriteIntent(first_cmd)
+ }
+ //TODO: Assumes all cache ctrl ops writeback dirty data, and
+ // doesn't issue transaction when e.g. downgrading Exclusive to Shared:
+ def requiresReleaseOnCacheControl(cmd: UInt, meta: ClientMetadata): Bool =
+ meta.state isOneOf clientStatesWithDirtyData
+
+ // Determine which custom message type to use
+ def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt
+ def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt
+ def getReleaseType(p: HasProbeType, meta: ClientMetadata): UInt
+
+ // Mutate ClientMetadata based on messages or cmds
+ def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata): ClientMetadata
+ def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata): ClientMetadata
+ def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata): ClientMetadata
+ def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata): ClientMetadata
+}
+
+/** This API contains all functions required for manager coherence agents.
+ * Policies must enumerate the number of manager states. Policies must fill
+ * in functions to control which Probe and Grant messages are sent and how
+ * metadata should be updated in response to coherence events. These funtions
+ * are generally called from within the ManagerMetadata class in metadata.scala
+ */
+trait HasManagerSideCoherencePolicy extends HasDirectoryRepresentation {
+ val nManagerStates: Int
+ def masterStateWidth = log2Ceil(nManagerStates)
+
+ // Transaction probing logic
+ def requiresProbes(acq: HasAcquireType, meta: ManagerMetadata): Bool
+ def requiresProbes(cmd: UInt, meta: ManagerMetadata): Bool
+
+ // Determine which custom message type to use in response
+ def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt
+ def getProbeType(acq: HasAcquireType, meta: ManagerMetadata): UInt
+ def getGrantType(acq: HasAcquireType, meta: ManagerMetadata): UInt
+ def getExclusiveGrantType(): UInt
+
+ // Mutate ManagerMetadata based on messages or cmds
+ def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata): ManagerMetadata
+ def managerMetadataOnGrant(outgoing: HasGrantType, dst: UInt, meta: ManagerMetadata) =
+ ManagerMetadata(sharers=Mux(outgoing.isBuiltInType(), // Assumes all built-ins are uncached
+ meta.sharers,
+ dir.push(meta.sharers, dst)))(meta.p)
+ //state = meta.state) TODO: Fix 0-width wires in Chisel
+}
+
+/** The following concrete implementations of CoherencePolicy each provide the
+ * functionality of one particular protocol.
+ */
+
+/** A simple protocol with only two Client states.
+ * Data is always assumed to be dirty.
+ * Only a single client may ever have a copy of a block at a time.
+ */
+class MICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
+ // Message types
+ val nAcquireTypes = 1
+ val nProbeTypes = 2
+ val nReleaseTypes = 4
+ val nGrantTypes = 1
+
+ val acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
+ val probeInvalidate :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
+ val releaseInvalidateData :: releaseCopyData :: releaseInvalidateAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
+ val grantExclusive :: Nil = Enum(UInt(), nGrantTypes)
+
+ def releaseTypesWithData = Seq(releaseInvalidateData, releaseCopyData)
+ def grantTypesWithData = Seq(grantExclusive)
+
+ // Client states and functions
+ val nClientStates = 2
+ val clientInvalid :: clientValid :: Nil = Enum(UInt(), nClientStates)
+
+ def clientStatesWithReadPermission = Seq(clientValid)
+ def clientStatesWithWritePermission = Seq(clientValid)
+ def clientStatesWithDirtyData = Seq(clientValid)
+
+ def isValid (meta: ClientMetadata): Bool = meta.state =/= clientInvalid
+
+ def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = acquireExclusive
+
+ def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
+ val dirty = meta.state isOneOf clientStatesWithDirtyData
+ MuxLookup(cmd, releaseCopyAck, Array(
+ M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
+ M_PRODUCE -> Mux(dirty, releaseCopyData, releaseCopyAck),
+ M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
+ }
+
+ def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
+ MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
+ probeInvalidate -> getReleaseType(M_FLUSH, meta),
+ probeCopy -> getReleaseType(M_FLUSH, meta)))
+
+ def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = meta
+
+ def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(Mux(cmd === M_FLUSH, clientInvalid, meta.state))(meta.p)
+
+ def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(Mux(incoming.isBuiltInType(), clientInvalid, clientValid))(meta.p)
+
+ def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
+ ClientMetadata(Mux(incoming.p_type === probeInvalidate,
+ clientInvalid, meta.state))(meta.p)
+
+ // Manager states and functions:
+ val nManagerStates = 0 // We don't actually need any states for this protocol
+
+ def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+ def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+ def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
+ MuxLookup(cmd, probeCopy, Array(
+ M_FLUSH -> probeInvalidate))
+
+ def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
+ Mux(a.isBuiltInType(),
+ MuxLookup(a.a_type, probeCopy, Array(
+ Acquire.getBlockType -> probeCopy,
+ Acquire.putBlockType -> probeInvalidate,
+ Acquire.getType -> probeCopy,
+ Acquire.putType -> probeInvalidate,
+ Acquire.getPrefetchType -> probeCopy,
+ Acquire.putPrefetchType -> probeInvalidate,
+ Acquire.putAtomicType -> probeInvalidate)),
+ probeInvalidate)
+
+ def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
+ Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type), grantExclusive)
+ def getExclusiveGrantType(): UInt = grantExclusive
+
+ def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
+ val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
+ MuxCase(meta, Array(
+ incoming.is(releaseInvalidateData) -> popped,
+ incoming.is(releaseInvalidateAck) -> popped))
+ }
+}
+
+/** A simple protocol with only three Client states.
+ * Data is marked as dirty when written.
+ * Only a single client may ever have a copy of a block at a time.
+ */
+class MEICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
+ // Message types
+ val nAcquireTypes = 2
+ val nProbeTypes = 3
+ val nReleaseTypes = 6
+ val nGrantTypes = 1
+
+ val acquireExclusiveClean :: acquireExclusiveDirty :: Nil = Enum(UInt(), nAcquireTypes)
+ val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
+ val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
+ val grantExclusive :: Nil = Enum(UInt(), nGrantTypes)
+
+ def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
+ def grantTypesWithData = Seq(grantExclusive)
+
+ // Client states and functions
+ val nClientStates = 3
+ val clientInvalid :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
+
+ def clientStatesWithReadPermission = Seq(clientExclusiveClean, clientExclusiveDirty)
+ def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty)
+ def clientStatesWithDirtyData = Seq(clientExclusiveDirty)
+
+ def isValid (meta: ClientMetadata) = meta.state =/= clientInvalid
+
+ def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
+ Mux(isWriteIntent(cmd), acquireExclusiveDirty, acquireExclusiveClean)
+
+ def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
+ val dirty = meta.state isOneOf clientStatesWithDirtyData
+ MuxLookup(cmd, releaseCopyAck, Array(
+ M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
+ M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
+ M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
+ }
+
+ def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
+ MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
+ probeInvalidate -> getReleaseType(M_FLUSH, meta),
+ probeDowngrade -> getReleaseType(M_FLUSH, meta),
+ probeCopy -> getReleaseType(M_FLUSH, meta)))
+
+ def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p)
+
+ def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(
+ MuxLookup(cmd, meta.state, Array(
+ M_FLUSH -> clientInvalid,
+ M_CLEAN -> Mux(meta.state === clientExclusiveDirty, clientExclusiveClean, meta.state))))(meta.p)
+
+ def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(
+ Mux(incoming.isBuiltInType(), clientInvalid,
+ Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean)))(meta.p)
+
+ def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
+ ClientMetadata(
+ MuxLookup(incoming.p_type, meta.state, Array(
+ probeInvalidate -> clientInvalid,
+ probeDowngrade -> clientInvalid,
+ probeCopy -> clientInvalid)))(meta.p)
+
+ // Manager states and functions:
+ val nManagerStates = 0 // We don't actually need any states for this protocol
+
+ def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) = !dir.none(meta.sharers)
+ def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+ def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
+ MuxLookup(cmd, probeCopy, Array(
+ M_FLUSH -> probeInvalidate,
+ M_PRODUCE -> probeDowngrade))
+
+ def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
+ Mux(a.isBuiltInType(),
+ MuxLookup(a.a_type, probeCopy, Array(
+ Acquire.getBlockType -> probeCopy,
+ Acquire.putBlockType -> probeInvalidate,
+ Acquire.getType -> probeCopy,
+ Acquire.putType -> probeInvalidate,
+ Acquire.getPrefetchType -> probeCopy,
+ Acquire.putPrefetchType -> probeInvalidate,
+ Acquire.putAtomicType -> probeInvalidate)),
+ probeInvalidate)
+
+ def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
+ Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type), grantExclusive)
+ def getExclusiveGrantType(): UInt = grantExclusive
+
+ def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
+ val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
+ MuxCase(meta, Array(
+ incoming.is(releaseInvalidateData) -> popped,
+ incoming.is(releaseInvalidateAck) -> popped))
+ }
+}
+
+/** A protocol with only three Client states.
+ * Data is always assumed to be dirty.
+ * Multiple clients may share read permissions on a block at the same time.
+ */
+class MSICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
+ // Message types
+ val nAcquireTypes = 2
+ val nProbeTypes = 3
+ val nReleaseTypes = 6
+ val nGrantTypes = 3
+
+ val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
+ val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
+ val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
+ val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes)
+
+ def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
+ def grantTypesWithData = Seq(grantShared, grantExclusive)
+
+ // Client states and functions
+ val nClientStates = 3
+ val clientInvalid :: clientShared :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
+
+ def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveDirty)
+ def clientStatesWithWritePermission = Seq(clientExclusiveDirty)
+ def clientStatesWithDirtyData = Seq(clientExclusiveDirty)
+
+ def isValid(meta: ClientMetadata): Bool = meta.state =/= clientInvalid
+
+ def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
+ Mux(isWriteIntent(cmd), acquireExclusive, acquireShared)
+
+ def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
+ val dirty = meta.state isOneOf clientStatesWithDirtyData
+ MuxLookup(cmd, releaseCopyAck, Array(
+ M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
+ M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
+ M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
+ }
+
+ def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
+ MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
+ probeInvalidate -> getReleaseType(M_FLUSH, meta),
+ probeDowngrade -> getReleaseType(M_PRODUCE, meta),
+ probeCopy -> getReleaseType(M_PRODUCE, meta)))
+
+ def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p)
+
+ def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(
+ MuxLookup(cmd, meta.state, Array(
+ M_FLUSH -> clientInvalid,
+ M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission,
+ clientShared, meta.state))))(meta.p)
+
+ def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(
+ Mux(incoming.isBuiltInType(), clientInvalid,
+ MuxLookup(incoming.g_type, clientInvalid, Array(
+ grantShared -> clientShared,
+ grantExclusive -> clientExclusiveDirty,
+ grantExclusiveAck -> clientExclusiveDirty))))(meta.p)
+
+ def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
+ ClientMetadata(
+ MuxLookup(incoming.p_type, meta.state, Array(
+ probeInvalidate -> clientInvalid,
+ probeDowngrade -> clientShared,
+ probeCopy -> clientShared)))(meta.p)
+
+ // Manager states and functions:
+ val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing
+ // only a single sharer (also would need
+ // notification msg to track clean drops)
+ // Also could avoid probes on outer WBs.
+
+ def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) =
+ Mux(dir.none(meta.sharers), Bool(false),
+ Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
+ Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared)))
+
+ def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+ def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
+ MuxLookup(cmd, probeCopy, Array(
+ M_FLUSH -> probeInvalidate,
+ M_PRODUCE -> probeDowngrade))
+
+ def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
+ Mux(a.isBuiltInType(),
+ MuxLookup(a.a_type, probeCopy, Array(
+ Acquire.getBlockType -> probeCopy,
+ Acquire.putBlockType -> probeInvalidate,
+ Acquire.getType -> probeCopy,
+ Acquire.putType -> probeInvalidate,
+ Acquire.getPrefetchType -> probeCopy,
+ Acquire.putPrefetchType -> probeInvalidate,
+ Acquire.putAtomicType -> probeInvalidate)),
+ MuxLookup(a.a_type, probeCopy, Array(
+ acquireShared -> probeDowngrade,
+ acquireExclusive -> probeInvalidate)))
+
+ def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
+ Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type),
+ Mux(a.a_type === acquireShared,
+ Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
+ grantExclusive))
+ def getExclusiveGrantType(): UInt = grantExclusive
+
+ def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
+ val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
+ MuxCase(meta, Array(
+ incoming.is(releaseInvalidateData) -> popped,
+ incoming.is(releaseInvalidateAck) -> popped))
+ }
+}
+
+/** A protocol with four Client states.
+ * Data is marked as dirty when written.
+ * Multiple clients may share read permissions on a block at the same time.
+ */
+class MESICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
+ // Message types
+ val nAcquireTypes = 2
+ val nProbeTypes = 3
+ val nReleaseTypes = 6
+ val nGrantTypes = 3
+
+ val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
+ val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
+ val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
+ val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes)
+
+ def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
+ def grantTypesWithData = Seq(grantShared, grantExclusive)
+
+ // Client states and functions
+ val nClientStates = 4
+ val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
+
+ def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveClean, clientExclusiveDirty)
+ def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty)
+ def clientStatesWithDirtyData = Seq(clientExclusiveDirty)
+
+ def isValid(meta: ClientMetadata): Bool = meta.state =/= clientInvalid
+
+ def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
+ Mux(isWriteIntent(cmd), acquireExclusive, acquireShared)
+
+ def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
+ val dirty = meta.state isOneOf clientStatesWithDirtyData
+ MuxLookup(cmd, releaseCopyAck, Array(
+ M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
+ M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
+ M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
+ }
+
+ def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
+ MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
+ probeInvalidate -> getReleaseType(M_FLUSH, meta),
+ probeDowngrade -> getReleaseType(M_PRODUCE, meta),
+ probeCopy -> getReleaseType(M_PRODUCE, meta)))
+
+ def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p)
+
+ def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(
+ MuxLookup(cmd, meta.state, Array(
+ M_FLUSH -> clientInvalid,
+ M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission,
+ clientShared, meta.state),
+ M_CLEAN -> Mux(meta.state === clientExclusiveDirty,
+ clientExclusiveClean, meta.state))))(meta.p)
+
+ def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(
+ Mux(incoming.isBuiltInType(), clientInvalid,
+ MuxLookup(incoming.g_type, clientInvalid, Array(
+ grantShared -> clientShared,
+ grantExclusive -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean),
+ grantExclusiveAck -> clientExclusiveDirty))))(meta.p)
+
+ def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
+ ClientMetadata(
+ MuxLookup(incoming.p_type, meta.state, Array(
+ probeInvalidate -> clientInvalid,
+ probeDowngrade -> clientShared,
+ probeCopy -> clientShared)))(meta.p)
+
+ // Manager states and functions:
+ val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing
+ // only a single sharer (also would need
+ // notification msg to track clean drops)
+ // Also could avoid probes on outer WBs.
+
+ def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) =
+ Mux(dir.none(meta.sharers), Bool(false),
+ Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
+ Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared)))
+
+ def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+ def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
+ MuxLookup(cmd, probeCopy, Array(
+ M_FLUSH -> probeInvalidate,
+ M_PRODUCE -> probeDowngrade))
+
+ def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
+ Mux(a.isBuiltInType(),
+ MuxLookup(a.a_type, probeCopy, Array(
+ Acquire.getBlockType -> probeCopy,
+ Acquire.putBlockType -> probeInvalidate,
+ Acquire.getType -> probeCopy,
+ Acquire.putType -> probeInvalidate,
+ Acquire.getPrefetchType -> probeCopy,
+ Acquire.putPrefetchType -> probeInvalidate,
+ Acquire.putAtomicType -> probeInvalidate)),
+ MuxLookup(a.a_type, probeCopy, Array(
+ acquireShared -> probeDowngrade,
+ acquireExclusive -> probeInvalidate)))
+
+ def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
+ Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type),
+ Mux(a.a_type === acquireShared,
+ Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
+ grantExclusive))
+ def getExclusiveGrantType(): UInt = grantExclusive
+
+ def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
+ val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
+ MuxCase(meta, Array(
+ incoming.is(releaseInvalidateData) -> popped,
+ incoming.is(releaseInvalidateAck) -> popped))
+ }
+}
+
+class MigratoryCoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
+ // Message types
+ val nAcquireTypes = 3
+ val nProbeTypes = 4
+ val nReleaseTypes = 10
+ val nGrantTypes = 4
+
+ val acquireShared :: acquireExclusive :: acquireInvalidateOthers :: Nil = Enum(UInt(), nAcquireTypes)
+ val probeInvalidate :: probeDowngrade :: probeCopy :: probeInvalidateOthers :: Nil = Enum(UInt(), nProbeTypes)
+ val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: releaseDowngradeDataMigratory :: releaseDowngradeAckHasCopy :: releaseInvalidateDataMigratory :: releaseInvalidateAckMigratory :: Nil = Enum(UInt(), nReleaseTypes)
+ val grantShared :: grantExclusive :: grantExclusiveAck :: grantReadMigratory :: Nil = Enum(UInt(), nGrantTypes)
+
+ def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData, releaseInvalidateDataMigratory, releaseDowngradeDataMigratory)
+ def grantTypesWithData = Seq(grantShared, grantExclusive, grantReadMigratory)
+
+ // Client states and functions
+ val nClientStates = 7
+ val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: clientSharedByTwo :: clientMigratoryClean :: clientMigratoryDirty :: Nil = Enum(UInt(), nClientStates)
+
+ def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveClean, clientExclusiveDirty, clientSharedByTwo, clientMigratoryClean, clientMigratoryDirty)
+ def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty, clientMigratoryClean, clientMigratoryDirty)
+ def clientStatesWithDirtyData = Seq(clientExclusiveDirty, clientMigratoryDirty)
+
+ def isValid (meta: ClientMetadata): Bool = meta.state =/= clientInvalid
+
+ def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
+ Mux(isWriteIntent(cmd),
+ Mux(meta.state === clientInvalid, acquireExclusive, acquireInvalidateOthers),
+ acquireShared)
+
+ def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
+ val dirty = meta.state isOneOf clientStatesWithDirtyData
+ MuxLookup(cmd, releaseCopyAck, Array(
+ M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
+ M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
+ M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
+ }
+
+ def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt = {
+ val dirty = meta.state isOneOf clientStatesWithDirtyData
+ val with_data = MuxLookup(incoming.p_type, releaseInvalidateData, Array(
+ probeInvalidate -> Mux(meta.state isOneOf (clientExclusiveDirty, clientMigratoryDirty),
+ releaseInvalidateDataMigratory, releaseInvalidateData),
+ probeDowngrade -> Mux(meta.state === clientMigratoryDirty,
+ releaseDowngradeDataMigratory, releaseDowngradeData),
+ probeCopy -> releaseCopyData))
+ val without_data = MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
+ probeInvalidate -> Mux(clientExclusiveClean === meta.state,
+ releaseInvalidateAckMigratory, releaseInvalidateAck),
+ probeInvalidateOthers -> Mux(clientSharedByTwo === meta.state,
+ releaseInvalidateAckMigratory, releaseInvalidateAck),
+ probeDowngrade -> Mux(meta.state =/= clientInvalid,
+ releaseDowngradeAckHasCopy, releaseDowngradeAck),
+ probeCopy -> Mux(meta.state =/= clientInvalid,
+ releaseDowngradeAckHasCopy, releaseDowngradeAck)))
+ Mux(dirty, with_data, without_data)
+ }
+
+ def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(
+ Mux(isWrite(cmd), MuxLookup(meta.state, clientExclusiveDirty, Array(
+ clientExclusiveClean -> clientExclusiveDirty,
+ clientMigratoryClean -> clientMigratoryDirty)),
+ meta.state))(meta.p)
+
+ def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(
+ MuxLookup(cmd, meta.state, Array(
+ M_FLUSH -> clientInvalid,
+ M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission,
+ clientShared, meta.state),
+ M_CLEAN -> MuxLookup(meta.state, meta.state, Array(
+ clientExclusiveDirty -> clientExclusiveClean,
+ clientMigratoryDirty -> clientMigratoryClean)))))(meta.p)
+
+ def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
+ ClientMetadata(
+ Mux(incoming.isBuiltInType(), clientInvalid,
+ MuxLookup(incoming.g_type, clientInvalid, Array(
+ grantShared -> clientShared,
+ grantExclusive -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean),
+ grantExclusiveAck -> clientExclusiveDirty,
+ grantReadMigratory -> Mux(isWrite(cmd),
+ clientMigratoryDirty, clientMigratoryClean)))))(meta.p)
+
+ def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) = {
+ val downgradeState = MuxLookup(meta.state, clientShared, Array(
+ clientExclusiveClean -> clientSharedByTwo,
+ clientExclusiveDirty -> clientSharedByTwo,
+ clientSharedByTwo -> clientShared,
+ clientMigratoryClean -> clientSharedByTwo,
+ clientMigratoryDirty -> clientInvalid))
+ ClientMetadata(
+ MuxLookup(incoming.p_type, meta.state, Array(
+ probeInvalidate -> clientInvalid,
+ probeInvalidateOthers -> clientInvalid,
+ probeDowngrade -> downgradeState,
+ probeCopy -> downgradeState)))(meta.p)
+ }
+
+ // Manager states and functions:
+ val nManagerStates = 0 // TODO: we could add some states to reduce the number of message types
+
+ def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) =
+ Mux(dir.none(meta.sharers), Bool(false),
+ Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
+ Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared)))
+
+ def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
+
+ def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
+ MuxLookup(cmd, probeCopy, Array(
+ M_FLUSH -> probeInvalidate,
+ M_PRODUCE -> probeDowngrade))
+
+ def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
+ Mux(a.isBuiltInType(),
+ MuxLookup(a.a_type, probeCopy, Array(
+ Acquire.getBlockType -> probeCopy,
+ Acquire.putBlockType -> probeInvalidate,
+ Acquire.getType -> probeCopy,
+ Acquire.putType -> probeInvalidate,
+ Acquire.getPrefetchType -> probeCopy,
+ Acquire.putPrefetchType -> probeInvalidate,
+ Acquire.putAtomicType -> probeInvalidate)),
+ MuxLookup(a.a_type, probeCopy, Array(
+ acquireShared -> probeDowngrade,
+ acquireExclusive -> probeInvalidate,
+ acquireInvalidateOthers -> probeInvalidateOthers)))
+
+ def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
+ Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type),
+ MuxLookup(a.a_type, grantShared, Array(
+ acquireShared -> Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
+ acquireExclusive -> grantExclusive,
+ acquireInvalidateOthers -> grantExclusiveAck))) //TODO: add this to MESI for broadcast?
+ def getExclusiveGrantType(): UInt = grantExclusive
+
+ def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
+ val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
+ MuxCase(meta, Array(
+ incoming.is(releaseInvalidateData) -> popped,
+ incoming.is(releaseInvalidateAck) -> popped,
+ incoming.is(releaseInvalidateDataMigratory) -> popped,
+ incoming.is(releaseInvalidateAckMigratory) -> popped))
+ }
+}
diff --git a/uncore/src/main/scala/converters/Ahb.scala b/uncore/src/main/scala/converters/Ahb.scala
new file mode 100644
index 00000000..0fca9517
--- /dev/null
+++ b/uncore/src/main/scala/converters/Ahb.scala
@@ -0,0 +1,425 @@
+package uncore.converters
+
+import Chisel._
+import junctions._
+import uncore.tilelink._
+import uncore.util._
+import uncore.constants._
+import cde.{Parameters, Field}
+import HastiConstants._
+
+/* We need to translate TileLink requests into operations we can actually execute on AHB.
+ * The general plan of attack is:
+ * get => one AHB=>TL read
+ * put => [multiple AHB write fragments=>nill], one AHB write=>TL
+ * getBlock => AHB burst reads =>TL
+ * putBlock => AHB burst writes=>TL
+ * getPrefetch => noop=>TL
+ * putPrefetch => noop=>TL
+ * putAtomic => one AHB=>TL read, one idle, one AHB atom_write=>nill, one idle
+ *
+ * This requires that we support a pipeline of optional AHB requests with optional TL responses
+ */
+class AHBRequestIO(implicit p: Parameters) extends HastiMasterIO
+ with HasGrantType
+ with HasClientTransactionId
+ with HasTileLinkBeatId {
+ val executeAHB = Bool()
+ val respondTL = Bool()
+ val latchAtom = Bool()
+ val firstBurst = Bool()
+ val finalBurst = Bool()
+ val cmd = Bits(width = M_SZ) // atomic op
+}
+
+// AHB stage1: translate TileLink Acquires into AHBRequests
+class AHBTileLinkIn(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module
+ with HasHastiParameters
+ with HasTileLinkParameters
+ with HasAddrMapParameters {
+ val io = new Bundle {
+ val acquire = new DecoupledIO(new Acquire).flip // NOTE: acquire must be either a Queue or a Pipe
+ val request = new DecoupledIO(new AHBRequestIO)
+ }
+
+ // Match the AHB burst with a TileLink {Put,Get}Block
+ val burstSize = tlDataBeats match {
+ case 1 => HBURST_SINGLE
+ // case 2 not supported by AHB
+ case 4 => HBURST_WRAP4
+ case 8 => HBURST_WRAP8
+ case 16 => HBURST_WRAP16
+ case _ => throw new java.lang.AssertionError("TileLink beats unsupported by AHB")
+ }
+
+ // Bursts start at 0 and wrap-around back to 0
+ val finalBurst = UInt(tlDataBeats-1, width = log2Up(tlDataBeats)).asUInt
+ val firstBurst = UInt(0, width = log2Up(tlDataBeats))
+ val next_wmask = Wire(UInt(width = tlDataBytes)) // calculated below
+
+ // State variables for processing more complicated TileLink Acquires
+ val s_atom_r :: s_atom_idle1 :: s_atom_w :: s_atom_idle2 :: Nil = Enum(UInt(), 4)
+ val atom_state = Reg(init = s_atom_r) // never changes if !supportAtomics
+ val done_wmask = Reg(init = UInt(0, width = tlDataBytes))
+ val burst = Reg(init = firstBurst)
+
+ // Grab some view of the TileLink acquire
+ val acq_wmask = io.acquire.bits.wmask()
+ val isReadBurst = io.acquire.bits.is(Acquire.getBlockType)
+ val isWriteBurst = io.acquire.bits.is(Acquire.putBlockType)
+ val isBurst = isWriteBurst || isReadBurst
+ val isAtomic = io.acquire.bits.is(Acquire.putAtomicType) && Bool(supportAtomics)
+ val isPut = io.acquire.bits.is(Acquire.putType)
+
+ // Final states?
+ val last_wmask = next_wmask === acq_wmask
+ val last_atom = atom_state === s_atom_idle2
+ val last_burst = burst === finalBurst
+
+ // Block the incoming request until we've fully consumed it
+ // NOTE: the outgoing grant.valid may happen while acquire.ready is still false;
+ // for this reason it is essential to have a Queue or a Pipe infront of acquire
+ io.acquire.ready := io.request.ready && MuxLookup(io.acquire.bits.a_type, Bool(true), Array(
+ Acquire.getType -> Bool(true),
+ Acquire.getBlockType -> last_burst, // hold it until the last beat is burst
+ Acquire.putType -> last_wmask, // only accept the put if we can fully consume its wmask
+ Acquire.putBlockType -> Bool(true),
+ Acquire.putAtomicType -> last_atom, // atomic operation stages complete
+ Acquire.getPrefetchType -> Bool(true),
+ Acquire.putPrefetchType -> Bool(true)))
+
+ // Advance the fragment state
+ when (io.request.ready && io.acquire.valid && isPut) {
+ when (last_wmask) { // if this was the last fragment, restart FSM
+ done_wmask := UInt(0)
+ } .otherwise {
+ done_wmask := next_wmask
+ }
+ }
+
+ // Advance the burst state
+ // We assume here that TileLink gives us all putBlock beats with nothing between them
+ when (io.request.ready && io.acquire.valid && isBurst) {
+ when (last_burst) {
+ burst := UInt(0)
+ } .otherwise {
+ burst := burst + UInt(1)
+ }
+ }
+
+ // Advance the atomic state machine
+ when (io.request.ready && io.acquire.valid && isAtomic) {
+ switch (atom_state) {
+ is (s_atom_r) { atom_state := s_atom_idle1 }
+ is (s_atom_idle1) { atom_state := s_atom_w } // idle1 => AMOALU runs on a different clock than AHB slave read
+ is (s_atom_w) { atom_state := s_atom_idle2 }
+ is (s_atom_idle2) { atom_state := s_atom_r } // idle2 state is required by AHB after hmastlock is lowered
+ }
+ }
+
+ // Returns (range=0, range=-1, aligned_wmask, size)
+ def mask_helper(in_0 : Bool, range : UInt): (Bool, Bool, UInt, UInt) = {
+ val len = range.getWidth
+ if (len == 1) {
+ (range === UInt(0), range === UInt(1), in_0.asUInt() & range, UInt(0))
+ } else {
+ val mid = len / 2
+ val lo = range(mid-1, 0)
+ val hi = range(len-1, mid)
+ val (lo_0, lo_1, lo_m, lo_s) = mask_helper(in_0, lo)
+ val (hi_0, hi_1, hi_m, hi_s) = mask_helper(in_0 && lo_0, hi)
+ val out_0 = lo_0 && hi_0
+ val out_1 = lo_1 && hi_1
+ val out_m = Cat(hi_m, lo_m) | Fill(len, (in_0 && out_1).asUInt())
+ val out_s = Mux(out_1, UInt(log2Up(len)), Mux(lo_0, hi_s, lo_s))
+ (out_0, out_1, out_m, out_s)
+ }
+ }
+
+ val pending_wmask = acq_wmask & ~done_wmask
+ val put_addr = PriorityEncoder(pending_wmask)
+ val (wmask_0, _, exec_wmask, put_size) = mask_helper(Bool(true), pending_wmask)
+ next_wmask := done_wmask | exec_wmask
+
+ // Calculate the address, with consideration to put fragments and bursts
+ val addr_block = io.acquire.bits.addr_block
+ val addr_beatin= io.acquire.bits.addr_beat
+ val addr_burst = Mux(isReadBurst, addr_beatin + burst, addr_beatin)
+ val addr_byte = Mux(isPut, put_addr, io.acquire.bits.addr_byte())
+ val addr_beat = Mux(isWriteBurst, UInt(0), addr_burst)
+ val ahbAddr = Cat(addr_block, addr_burst, addr_byte)
+ val ahbSize = Mux(isPut, put_size, Mux(isBurst, UInt(log2Ceil(tlDataBytes)), io.acquire.bits.op_size()))
+
+ val ahbBurst = MuxLookup(io.acquire.bits.a_type, HBURST_SINGLE, Array(
+ Acquire.getType -> HBURST_SINGLE,
+ Acquire.getBlockType -> burstSize,
+ Acquire.putType -> HBURST_SINGLE,
+ Acquire.putBlockType -> burstSize,
+ Acquire.putAtomicType -> HBURST_SINGLE,
+ Acquire.getPrefetchType -> HBURST_SINGLE,
+ Acquire.putPrefetchType -> HBURST_SINGLE))
+
+ val ahbWrite = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
+ Acquire.getType -> Bool(false),
+ Acquire.getBlockType -> Bool(false),
+ Acquire.putType -> Bool(true),
+ Acquire.putBlockType -> Bool(true),
+ Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
+ s_atom_r -> Bool(false),
+ s_atom_idle1 -> Bool(false), // don't care
+ s_atom_w -> Bool(true),
+ s_atom_idle2 -> Bool(true))), // don't care
+ Acquire.getPrefetchType -> Bool(false), // don't care
+ Acquire.putPrefetchType -> Bool(true))) // don't care
+
+ val ahbExecute = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
+ Acquire.getType -> Bool(true),
+ Acquire.getBlockType -> Bool(true),
+ Acquire.putType -> !wmask_0, // handle the case of a Put with no bytes!
+ Acquire.putBlockType -> Bool(true),
+ Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
+ s_atom_r -> Bool(true),
+ s_atom_idle1 -> Bool(false),
+ s_atom_w -> Bool(true),
+ s_atom_idle2 -> Bool(false))),
+ Acquire.getPrefetchType -> Bool(false),
+ Acquire.putPrefetchType -> Bool(false)))
+
+ val respondTL = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
+ Acquire.getType -> Bool(true),
+ Acquire.getBlockType -> Bool(true),
+ Acquire.putType -> last_wmask,
+ Acquire.putBlockType -> last_burst,
+ Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
+ s_atom_r -> Bool(true), // they want the old data
+ s_atom_idle1 -> Bool(false),
+ s_atom_w -> Bool(false),
+ s_atom_idle2 -> Bool(false))),
+ Acquire.getPrefetchType -> Bool(true),
+ Acquire.putPrefetchType -> Bool(true)))
+
+ io.request.valid := io.acquire.valid
+ io.request.bits.htrans := HTRANS_IDLE // unused/ignored
+ io.request.bits.haddr := ahbAddr
+ io.request.bits.hmastlock := isAtomic && atom_state =/= s_atom_idle2
+ io.request.bits.hwrite := ahbWrite
+ io.request.bits.hburst := ahbBurst
+ io.request.bits.hsize := ahbSize
+ io.request.bits.hprot := HPROT_DATA | HPROT_PRIVILEGED
+ io.request.bits.hwdata := io.acquire.bits.data
+ io.request.bits.executeAHB := ahbExecute
+ io.request.bits.respondTL := respondTL
+ io.request.bits.latchAtom := isAtomic && atom_state === s_atom_r
+ io.request.bits.firstBurst := burst === firstBurst
+ io.request.bits.finalBurst := burst === finalBurst || !isBurst
+ io.request.bits.cmd := io.acquire.bits.op_code()
+ io.request.bits.is_builtin_type := Bool(true)
+ io.request.bits.g_type := io.acquire.bits.getBuiltInGrantType()
+ io.request.bits.client_xact_id := io.acquire.bits.client_xact_id
+ io.request.bits.addr_beat := addr_beat
+
+ val debugBurst = Reg(UInt())
+ when (io.request.valid) {
+ debugBurst := addr_burst - burst
+ }
+
+ // We only support built-in TileLink requests
+ assert(!io.acquire.valid || io.acquire.bits.is_builtin_type, "AHB bridge only supports builtin TileLink types")
+ // Ensure alignment of address to size
+ assert(!io.acquire.valid || (ahbAddr & ((UInt(1) << ahbSize) - UInt(1))) === UInt(0), "TileLink operation misaligned")
+ // If this is a putBlock, make sure it moves properly
+ assert(!io.acquire.valid || !isBurst || burst === firstBurst || debugBurst === addr_burst - burst, "TileLink putBlock beats not sequential")
+ // We better not get an incomplete TileLink acquire
+ assert(!io.acquire.valid || isBurst || burst === firstBurst, "TileLink never completed a putBlock")
+ // If we disabled atomic support, we better not see a request
+ assert(!io.acquire.bits.is(Acquire.putAtomicType) || Bool(supportAtomics))
+}
+
+// AHB stage2: execute AHBRequests
+class AHBBusMaster(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module
+ with HasHastiParameters
+ with HasTileLinkParameters
+ with HasAddrMapParameters {
+ val io = new Bundle {
+ val request = new DecoupledIO(new AHBRequestIO).flip
+ val grant = new DecoupledIO(new Grant)
+ val ahb = new HastiMasterIO()
+ }
+
+ // All AHB outputs are registered (they might be IOs)
+ val midBurst = Reg(init = Bool(false))
+ val htrans = Reg(init = HTRANS_IDLE)
+ val haddr = Reg(UInt())
+ val hmastlock = Reg(init = Bool(false))
+ val hwrite = Reg(Bool())
+ val hburst = Reg(UInt())
+ val hsize = Reg(init = UInt(0, width = SZ_HSIZE))
+ val hprot = Reg(UInt())
+ val hwdata0 = Reg(Bits())
+ val hwdata1 = Reg(Bits())
+ val hrdata = Reg(Bits())
+
+ io.ahb.htrans := htrans
+ io.ahb.haddr := haddr
+ io.ahb.hmastlock := hmastlock
+ io.ahb.hwrite := hwrite
+ io.ahb.hburst := hburst
+ io.ahb.hsize := hsize
+ io.ahb.hprot := hprot
+ io.ahb.hwdata := hwdata1 // one cycle after the address phase
+
+ // TileLink response data needed in data phase
+ val respondTL0 = Reg(init = Bool(false))
+ val respondTL1 = Reg(init = Bool(false))
+ val latchAtom0 = Reg(init = Bool(false))
+ val latchAtom1 = Reg(init = Bool(false))
+ val executeAHB0 = Reg(init = Bool(false))
+ val executeAHB1 = Reg(init = Bool(false))
+ val bubble = Reg(init = Bool(true)) // nothing useful in address phase
+ val cmd = Reg(Bits())
+ val g_type0 = Reg(UInt())
+ val g_type1 = Reg(UInt())
+ val client_xact_id0 = Reg(Bits())
+ val client_xact_id1 = Reg(Bits())
+ val addr_beat0 = Reg(UInt())
+ val addr_beat1 = Reg(UInt())
+ val grant1 = Reg(new Grant)
+
+ // It is allowed to progress from Idle/Busy during a wait state
+ val addrReady = io.ahb.hready || bubble || (!executeAHB1 && !executeAHB0)
+ val dataReady = io.ahb.hready || !executeAHB1
+
+ // Only accept a new AHBRequest if we have enough buffer space in the pad
+ // to accomodate a persistent drop in TileLink's grant.ready
+ io.request.ready := addrReady && io.grant.ready
+
+ // htrans must be updated even if no request is valid
+ when (addrReady) {
+ when (io.request.fire() && io.request.bits.executeAHB) {
+ midBurst := !io.request.bits.finalBurst
+ when (io.request.bits.firstBurst) {
+ htrans := HTRANS_NONSEQ
+ } .otherwise {
+ htrans := HTRANS_SEQ
+ }
+ } .otherwise {
+ when (midBurst) {
+ htrans := HTRANS_BUSY
+ } .otherwise {
+ htrans := HTRANS_IDLE
+ }
+ }
+ }
+
+ // Address phase, clear repondTL when we have nothing to do
+ when (addrReady) {
+ when (io.request.fire()) {
+ respondTL0 := io.request.bits.respondTL
+ latchAtom0 := io.request.bits.latchAtom
+ executeAHB0:= io.request.bits.executeAHB
+ bubble := Bool(false)
+ } .otherwise {
+ respondTL0 := Bool(false)
+ latchAtom0 := Bool(false)
+ executeAHB0:= Bool(false)
+ bubble := Bool(true) // an atom-injected Idle is not a bubble!
+ }
+ }
+
+ // Transfer bulk address phase
+ when (io.request.fire()) {
+ haddr := io.request.bits.haddr
+ hmastlock := io.request.bits.hmastlock
+ hwrite := io.request.bits.hwrite
+ hburst := io.request.bits.hburst
+ hsize := io.request.bits.hsize
+ hprot := io.request.bits.hprot
+ hwdata0 := io.request.bits.hwdata
+ cmd := io.request.bits.cmd
+ g_type0 := io.request.bits.g_type
+ client_xact_id0 := io.request.bits.client_xact_id
+ addr_beat0 := io.request.bits.addr_beat
+ }
+
+ // Execute Atomic ops; unused and optimized away if !supportAtomics
+ val amo_p = p.alterPartial({
+ case CacheBlockOffsetBits => hastiAddrBits
+ case AmoAluOperandBits => hastiDataBits
+ })
+ val alu = Module(new AMOALU(rhsIsAligned = true)(amo_p))
+ alu.io.addr := haddr
+ alu.io.cmd := cmd
+ alu.io.typ := hsize
+ alu.io.rhs := hwdata0
+ alu.io.lhs := hrdata
+
+ // Transfer bulk data phase
+ when (dataReady) {
+ when (addrReady) {
+ respondTL1 := respondTL0
+ latchAtom1 := latchAtom0
+ executeAHB1 := executeAHB0
+ } .otherwise {
+ respondTL1 := Bool(false)
+ latchAtom1 := Bool(false)
+ executeAHB1 := Bool(false)
+ }
+ hwdata1 := Mux(Bool(supportAtomics), alu.io.out, hwdata0)
+ g_type1 := g_type0
+ client_xact_id1 := client_xact_id0
+ addr_beat1 := addr_beat0
+ }
+
+ // Latch the read result for an atomic operation
+ when (dataReady && latchAtom1) {
+ hrdata := io.ahb.hrdata
+ }
+
+ // Only issue TL grant when the slave has provided data
+ io.grant.valid := dataReady && respondTL1
+ io.grant.bits := Grant(
+ is_builtin_type = Bool(true),
+ g_type = g_type1,
+ client_xact_id = client_xact_id1,
+ manager_xact_id = UInt(0),
+ addr_beat = addr_beat1,
+ data = io.ahb.hrdata)
+
+ // We cannot support errors from AHB to TileLink
+ assert(!io.ahb.hresp, "AHB hresp error detected and cannot be reported via TileLink")
+}
+
+class AHBBridge(supportAtomics: Boolean = true)(implicit val p: Parameters) extends Module
+ with HasHastiParameters
+ with HasTileLinkParameters
+ with HasAddrMapParameters {
+ val io = new Bundle {
+ val tl = new ClientUncachedTileLinkIO().flip
+ val ahb = new HastiMasterIO()
+ }
+
+ // Hasti and TileLink widths must agree at this point in the topology
+ require (tlDataBits == hastiDataBits)
+ require (p(PAddrBits) == hastiAddrBits)
+
+ // AHB does not permit bursts to cross a 1KB boundary
+ require (tlDataBits * tlDataBeats <= 1024*8)
+ // tlDataBytes must be a power of 2
+ require (1 << log2Ceil(tlDataBytes) == tlDataBytes)
+
+ // Create the sub-blocks
+ val fsm = Module(new AHBTileLinkIn(supportAtomics))
+ val bus = Module(new AHBBusMaster(supportAtomics))
+ val pad = Module(new Queue(new Grant, 4))
+
+ fsm.io.acquire <> Queue(io.tl.acquire, 2) // Pipe is also acceptable
+ bus.io.request <> fsm.io.request
+ io.ahb <> bus.io.ahb
+ io.tl.grant <> pad.io.deq
+
+ // The pad is needed to absorb AHB progress while !grant.ready
+ // We are only 'ready' if the pad has at least 3 cycles of space
+ bus.io.grant.ready := pad.io.count <= UInt(1)
+ pad.io.enq.bits := bus.io.grant.bits
+ pad.io.enq.valid := bus.io.grant.valid
+}
diff --git a/uncore/src/main/scala/converters/Nasti.scala b/uncore/src/main/scala/converters/Nasti.scala
new file mode 100644
index 00000000..fa090083
--- /dev/null
+++ b/uncore/src/main/scala/converters/Nasti.scala
@@ -0,0 +1,399 @@
+package uncore.converters
+
+import Chisel._
+import junctions._
+import uncore.tilelink._
+import uncore.constants._
+import cde.Parameters
+import scala.math.min
+
+class IdMapper(val inIdBits: Int, val outIdBits: Int,
+ val forceMapping: Boolean = false)
+ (implicit val p: Parameters) extends Module {
+
+ val io = new Bundle {
+ val req = new Bundle {
+ val valid = Bool(INPUT)
+ val ready = Bool(OUTPUT)
+ val in_id = UInt(INPUT, inIdBits)
+ val out_id = UInt(OUTPUT, outIdBits)
+ }
+ val resp = new Bundle {
+ val valid = Bool(INPUT)
+ val matches = Bool(OUTPUT)
+ val out_id = UInt(INPUT, outIdBits)
+ val in_id = UInt(OUTPUT, inIdBits)
+ }
+ }
+ val maxInXacts = 1 << inIdBits
+
+ if (inIdBits <= outIdBits && !forceMapping) {
+ io.req.ready := Bool(true)
+ io.req.out_id := io.req.in_id
+ io.resp.matches := Bool(true)
+ io.resp.in_id := io.resp.out_id
+ } else {
+ val nInXacts = 1 << inIdBits
+ // No point in allowing more out xacts than in xacts
+ val nOutXacts = min(1 << outIdBits, nInXacts)
+
+ val out_id_free = Reg(init = Vec.fill(nOutXacts){Bool(true)})
+ val in_id_free = Reg(init = Vec.fill(nInXacts){Bool(true)})
+ val next_out_id = PriorityEncoder(out_id_free)
+ val id_mapping = Reg(Vec(nOutXacts, UInt(0, inIdBits)))
+
+ val req_fire = io.req.valid && io.req.ready
+ when (req_fire) {
+ out_id_free(io.req.out_id) := Bool(false)
+ in_id_free(io.req.in_id) := Bool(false)
+ id_mapping(io.req.out_id) := io.req.in_id
+ }
+ when (io.resp.valid) {
+ out_id_free(io.resp.out_id) := Bool(true)
+ in_id_free(io.resp.in_id) := Bool(true)
+ }
+
+ io.req.ready := out_id_free.reduce(_ || _) && in_id_free(io.req.in_id)
+ io.req.out_id := next_out_id
+
+ io.resp.in_id := id_mapping(io.resp.out_id)
+ io.resp.matches := !out_id_free(io.resp.out_id)
+ }
+}
+
+class NastiIOTileLinkIOConverterInfo(implicit p: Parameters) extends TLBundle()(p) {
+ val addr_beat = UInt(width = tlBeatAddrBits)
+ val subblock = Bool()
+}
+
+class NastiIOTileLinkIOConverter(implicit p: Parameters) extends TLModule()(p)
+ with HasNastiParameters {
+ val io = new Bundle {
+ val tl = new ClientUncachedTileLinkIO().flip
+ val nasti = new NastiIO
+ }
+
+ private def opSizeToXSize(ops: UInt) = MuxLookup(ops, UInt("b111"), Seq(
+ MT_B -> UInt(0),
+ MT_BU -> UInt(0),
+ MT_H -> UInt(1),
+ MT_HU -> UInt(1),
+ MT_W -> UInt(2),
+ MT_WU -> UInt(2),
+ MT_D -> UInt(3),
+ MT_Q -> UInt(log2Up(tlDataBytes))))
+
+ val dataBits = tlDataBits*tlDataBeats
+ require(tlDataBits == nastiXDataBits, "Data sizes between LLC and MC don't agree") // TODO: remove this restriction
+ require(tlDataBeats < (1 << nastiXLenBits), "Can't have that many beats")
+
+ val has_data = io.tl.acquire.bits.hasData()
+
+ val is_subblock = io.tl.acquire.bits.isSubBlockType()
+ val is_multibeat = io.tl.acquire.bits.hasMultibeatData()
+ val (tl_cnt_out, tl_wrap_out) = Counter(
+ io.tl.acquire.fire() && is_multibeat, tlDataBeats)
+
+ val get_valid = io.tl.acquire.valid && !has_data
+ val put_valid = io.tl.acquire.valid && has_data
+
+ // Reorder queue saves extra information needed to send correct
+ // grant back to TL client
+ val roqIdBits = min(tlClientXactIdBits, nastiXIdBits)
+ val roq = Module(new ReorderQueue(
+ new NastiIOTileLinkIOConverterInfo, roqIdBits))
+
+ val get_id_mapper = Module(new IdMapper(tlClientXactIdBits, nastiXIdBits))
+ val put_id_mapper = Module(new IdMapper(tlClientXactIdBits, nastiXIdBits))
+
+ val get_id_ready = get_id_mapper.io.req.ready
+ val put_id_mask = is_subblock || io.tl.acquire.bits.addr_beat === UInt(0)
+ val put_id_ready = put_id_mapper.io.req.ready || !put_id_mask
+
+ // For Get/GetBlock, make sure Reorder queue can accept new entry
+ val get_helper = DecoupledHelper(
+ get_valid,
+ roq.io.enq.ready,
+ io.nasti.ar.ready,
+ get_id_ready)
+
+ val w_inflight = Reg(init = Bool(false))
+ val w_id = Reg(init = UInt(0, nastiXIdBits))
+
+ // For Put/PutBlock, make sure aw and w channel are both ready before
+ // we send the first beat
+ val aw_ready = w_inflight || io.nasti.aw.ready
+ val put_helper = DecoupledHelper(
+ put_valid,
+ aw_ready,
+ io.nasti.w.ready,
+ put_id_ready)
+
+ val (nasti_cnt_out, nasti_wrap_out) = Counter(
+ io.nasti.r.fire() && !roq.io.deq.data.subblock, tlDataBeats)
+
+ roq.io.enq.valid := get_helper.fire(roq.io.enq.ready)
+ roq.io.enq.bits.tag := io.nasti.ar.bits.id
+ roq.io.enq.bits.data.addr_beat := io.tl.acquire.bits.addr_beat
+ roq.io.enq.bits.data.subblock := is_subblock
+ roq.io.deq.valid := io.nasti.r.fire() && (nasti_wrap_out || roq.io.deq.data.subblock)
+ roq.io.deq.tag := io.nasti.r.bits.id
+
+ get_id_mapper.io.req.valid := get_helper.fire(get_id_ready)
+ get_id_mapper.io.req.in_id := io.tl.acquire.bits.client_xact_id
+ get_id_mapper.io.resp.valid := io.nasti.r.fire() && io.nasti.r.bits.last
+ get_id_mapper.io.resp.out_id := io.nasti.r.bits.id
+
+ put_id_mapper.io.req.valid := put_helper.fire(put_id_ready, put_id_mask)
+ put_id_mapper.io.req.in_id := io.tl.acquire.bits.client_xact_id
+ put_id_mapper.io.resp.valid := io.nasti.b.fire()
+ put_id_mapper.io.resp.out_id := io.nasti.b.bits.id
+
+ // Decompose outgoing TL Acquires into Nasti address and data channels
+ io.nasti.ar.valid := get_helper.fire(io.nasti.ar.ready)
+ io.nasti.ar.bits := NastiReadAddressChannel(
+ id = get_id_mapper.io.req.out_id,
+ addr = io.tl.acquire.bits.full_addr(),
+ size = Mux(is_subblock,
+ opSizeToXSize(io.tl.acquire.bits.op_size()),
+ UInt(log2Ceil(tlDataBytes))),
+ len = Mux(is_subblock, UInt(0), UInt(tlDataBeats - 1)))
+
+ def mask_helper(all_inside_0: Seq[Bool], defsize: Int): (Seq[Bool], UInt, UInt) = {
+ val len = all_inside_0.size
+ if (len == 1) {
+ (Seq(Bool(true)), UInt(0), UInt(defsize))
+ } else {
+ val sub_inside_0 = Seq.tabulate (len/2) { i => all_inside_0(2*i) && all_inside_0(2*i+1) }
+ val (sub_outside_0, sub_offset, sub_size) = mask_helper(sub_inside_0, defsize+1)
+ val all_outside_0 = Seq.tabulate (len) { i => sub_outside_0(i/2) && all_inside_0(i^1) }
+ val odd_outside_0 = Seq.tabulate (len/2) { i => all_outside_0(2*i+1) }
+ val odd_outside = odd_outside_0.reduce (_ || _)
+ val all_outside = all_outside_0.reduce (_ || _)
+ val offset = Cat(sub_offset, odd_outside.toBits)
+ val size = Mux(all_outside, UInt(defsize), sub_size)
+ (all_outside_0, offset, size)
+ }
+ }
+
+ val all_inside_0 = (~io.tl.acquire.bits.wmask()).toBools
+ val (_, put_offset, put_size) = mask_helper(all_inside_0, 0)
+
+ io.nasti.aw.valid := put_helper.fire(aw_ready, !w_inflight)
+ io.nasti.aw.bits := NastiWriteAddressChannel(
+ id = put_id_mapper.io.req.out_id,
+ addr = io.tl.acquire.bits.full_addr() |
+ Mux(is_multibeat, UInt(0), put_offset),
+ size = Mux(is_multibeat, UInt(log2Ceil(tlDataBytes)), put_size),
+ len = Mux(is_multibeat, UInt(tlDataBeats - 1), UInt(0)))
+
+ io.nasti.w.valid := put_helper.fire(io.nasti.w.ready)
+ io.nasti.w.bits := NastiWriteDataChannel(
+ id = w_id,
+ data = io.tl.acquire.bits.data,
+ strb = Some(io.tl.acquire.bits.wmask()),
+ last = Mux(w_inflight,
+ tl_cnt_out === UInt(tlDataBeats - 1), !is_multibeat))
+
+ io.tl.acquire.ready := Mux(has_data,
+ put_helper.fire(put_valid),
+ get_helper.fire(get_valid))
+
+ when (!w_inflight && io.tl.acquire.fire() && is_multibeat) {
+ w_inflight := Bool(true)
+ w_id := put_id_mapper.io.req.out_id
+ }
+
+ when (w_inflight) {
+ when (tl_wrap_out) { w_inflight := Bool(false) }
+ }
+
+ // Aggregate incoming NASTI responses into TL Grants
+ val (tl_cnt_in, tl_wrap_in) = Counter(
+ io.tl.grant.fire() && io.tl.grant.bits.hasMultibeatData(), tlDataBeats)
+ val gnt_arb = Module(new LockingArbiter(new GrantToDst, 2,
+ tlDataBeats, Some((gnt: GrantToDst) => gnt.hasMultibeatData())))
+ io.tl.grant <> gnt_arb.io.out
+
+ gnt_arb.io.in(0).valid := io.nasti.r.valid
+ io.nasti.r.ready := gnt_arb.io.in(0).ready
+ gnt_arb.io.in(0).bits := Grant(
+ is_builtin_type = Bool(true),
+ g_type = Mux(roq.io.deq.data.subblock,
+ Grant.getDataBeatType, Grant.getDataBlockType),
+ client_xact_id = get_id_mapper.io.resp.in_id,
+ manager_xact_id = UInt(0),
+ addr_beat = Mux(roq.io.deq.data.subblock, roq.io.deq.data.addr_beat, tl_cnt_in),
+ data = io.nasti.r.bits.data)
+
+ assert(!roq.io.deq.valid || roq.io.deq.matches,
+ "TL -> NASTI converter ReorderQueue: NASTI tag error")
+ assert(!gnt_arb.io.in(0).valid || get_id_mapper.io.resp.matches,
+ "TL -> NASTI ID Mapper: NASTI tag error")
+
+ gnt_arb.io.in(1).valid := io.nasti.b.valid
+ io.nasti.b.ready := gnt_arb.io.in(1).ready
+ gnt_arb.io.in(1).bits := Grant(
+ is_builtin_type = Bool(true),
+ g_type = Grant.putAckType,
+ client_xact_id = put_id_mapper.io.resp.in_id,
+ manager_xact_id = UInt(0),
+ addr_beat = UInt(0),
+ data = Bits(0))
+ assert(!gnt_arb.io.in(1).valid || put_id_mapper.io.resp.matches, "NASTI tag error")
+
+ assert(!io.nasti.r.valid || io.nasti.r.bits.resp === UInt(0), "NASTI read error")
+ assert(!io.nasti.b.valid || io.nasti.b.bits.resp === UInt(0), "NASTI write error")
+}
+
+class TileLinkIONastiIOConverter(implicit p: Parameters) extends TLModule()(p)
+ with HasNastiParameters {
+ val io = new Bundle {
+ val nasti = (new NastiIO).flip
+ val tl = new ClientUncachedTileLinkIO
+ }
+
+ val (s_idle :: s_put :: Nil) = Enum(Bits(), 2)
+ val state = Reg(init = s_idle)
+
+ private val blockOffset = tlByteAddrBits + tlBeatAddrBits
+
+ val aw_req = Reg(new NastiWriteAddressChannel)
+ val w_tl_id = Reg(io.tl.acquire.bits.client_xact_id)
+
+ def is_singlebeat(chan: NastiAddressChannel): Bool =
+ chan.len === UInt(0)
+
+ def is_multibeat(chan: NastiAddressChannel): Bool =
+ chan.len === UInt(tlDataBeats - 1) && chan.size === UInt(log2Up(tlDataBytes))
+
+ def nasti_addr_block(chan: NastiAddressChannel): UInt =
+ chan.addr(nastiXAddrBits - 1, blockOffset)
+
+ def nasti_addr_beat(chan: NastiAddressChannel): UInt =
+ chan.addr(blockOffset - 1, tlByteAddrBits)
+
+ def nasti_addr_byte(chan: NastiAddressChannel): UInt =
+ chan.addr(tlByteAddrBits - 1, 0)
+
+ def nasti_operand_size(chan: NastiAddressChannel): UInt =
+ MuxLookup(chan.size, MT_Q, Seq(
+ UInt(0) -> MT_BU,
+ UInt(1) -> MT_HU,
+ UInt(2) -> MT_WU,
+ UInt(3) -> MT_D))
+
+ def size_mask(size: UInt): UInt =
+ (UInt(1) << (UInt(1) << size)) - UInt(1)
+
+ def nasti_wmask(aw: NastiWriteAddressChannel, w: NastiWriteDataChannel): UInt = {
+ val base = w.strb & size_mask(aw.size)
+ val addr_byte = nasti_addr_byte(aw)
+ w.strb & (size_mask(aw.size) << addr_byte)
+ }
+
+ def tl_last(gnt: GrantMetadata): Bool =
+ !gnt.hasMultibeatData() || gnt.addr_beat === UInt(tlDataBeats - 1)
+
+ def tl_b_grant(gnt: GrantMetadata): Bool =
+ gnt.g_type === Grant.putAckType
+
+ assert(!io.nasti.ar.valid ||
+ is_singlebeat(io.nasti.ar.bits) || is_multibeat(io.nasti.ar.bits),
+ "NASTI read transaction cannot convert to TileLInk")
+
+ assert(!io.nasti.aw.valid ||
+ is_singlebeat(io.nasti.aw.bits) || is_multibeat(io.nasti.aw.bits),
+ "NASTI write transaction cannot convert to TileLInk")
+
+ val put_count = Reg(init = UInt(0, tlBeatAddrBits))
+ val get_id_mapper = Module(new IdMapper(nastiXIdBits, tlClientXactIdBits, true))
+ val put_id_mapper = Module(new IdMapper(nastiXIdBits, tlClientXactIdBits, true))
+
+ when (io.nasti.aw.fire()) {
+ aw_req := io.nasti.aw.bits
+ w_tl_id := put_id_mapper.io.req.out_id
+ state := s_put
+ }
+
+ when (io.nasti.w.fire()) {
+ put_count := put_count + UInt(1)
+ when (io.nasti.w.bits.last) {
+ put_count := UInt(0)
+ state := s_idle
+ }
+ }
+
+ val get_acquire = Mux(is_multibeat(io.nasti.ar.bits),
+ GetBlock(
+ client_xact_id = get_id_mapper.io.req.out_id,
+ addr_block = nasti_addr_block(io.nasti.ar.bits)),
+ Get(
+ client_xact_id = get_id_mapper.io.req.out_id,
+ addr_block = nasti_addr_block(io.nasti.ar.bits),
+ addr_beat = nasti_addr_beat(io.nasti.ar.bits),
+ addr_byte = nasti_addr_byte(io.nasti.ar.bits),
+ operand_size = nasti_operand_size(io.nasti.ar.bits),
+ alloc = Bool(false)))
+
+ val put_acquire = Mux(is_multibeat(aw_req),
+ PutBlock(
+ client_xact_id = w_tl_id,
+ addr_block = nasti_addr_block(aw_req),
+ addr_beat = put_count,
+ data = io.nasti.w.bits.data,
+ wmask = Some(io.nasti.w.bits.strb)),
+ Put(
+ client_xact_id = w_tl_id,
+ addr_block = nasti_addr_block(aw_req),
+ addr_beat = nasti_addr_beat(aw_req),
+ data = io.nasti.w.bits.data,
+ wmask = Some(nasti_wmask(aw_req, io.nasti.w.bits))))
+
+ val get_helper = DecoupledHelper(
+ io.nasti.ar.valid,
+ get_id_mapper.io.req.ready,
+ io.tl.acquire.ready)
+
+ get_id_mapper.io.req.valid := get_helper.fire(
+ get_id_mapper.io.req.ready, state === s_idle)
+ get_id_mapper.io.req.in_id := io.nasti.ar.bits.id
+ get_id_mapper.io.resp.out_id := io.tl.grant.bits.client_xact_id
+ get_id_mapper.io.resp.valid := io.nasti.r.fire() && io.nasti.r.bits.last
+
+ val aw_ok = (state === s_idle && !io.nasti.ar.valid)
+
+ put_id_mapper.io.req.valid := aw_ok && io.nasti.aw.valid
+ put_id_mapper.io.req.in_id := io.nasti.aw.bits.id
+ put_id_mapper.io.resp.out_id := io.tl.grant.bits.client_xact_id
+ put_id_mapper.io.resp.valid := io.nasti.b.fire()
+
+ io.tl.acquire.bits := Mux(state === s_put, put_acquire, get_acquire)
+ io.tl.acquire.valid := get_helper.fire(io.tl.acquire.ready, state === s_idle) ||
+ (state === s_put && io.nasti.w.valid)
+
+ io.nasti.ar.ready := get_helper.fire(io.nasti.ar.valid, state === s_idle)
+ io.nasti.aw.ready := aw_ok && put_id_mapper.io.req.ready
+ io.nasti.w.ready := (state === s_put && io.tl.acquire.ready)
+
+ val nXacts = tlMaxClientXacts * tlMaxClientsPerPort
+
+ io.nasti.b.valid := io.tl.grant.valid && tl_b_grant(io.tl.grant.bits)
+ io.nasti.b.bits := NastiWriteResponseChannel(
+ id = put_id_mapper.io.resp.in_id)
+
+ assert(!io.nasti.b.valid || put_id_mapper.io.resp.matches,
+ "Put ID does not match")
+
+ io.nasti.r.valid := io.tl.grant.valid && !tl_b_grant(io.tl.grant.bits)
+ io.nasti.r.bits := NastiReadDataChannel(
+ id = get_id_mapper.io.resp.in_id,
+ data = io.tl.grant.bits.data,
+ last = tl_last(io.tl.grant.bits))
+
+ assert(!io.nasti.r.valid || get_id_mapper.io.resp.matches,
+ "Get ID does not match")
+
+ io.tl.grant.ready := Mux(tl_b_grant(io.tl.grant.bits),
+ io.nasti.b.ready, io.nasti.r.ready)
+}
diff --git a/uncore/src/main/scala/converters/Smi.scala b/uncore/src/main/scala/converters/Smi.scala
new file mode 100644
index 00000000..6ec47950
--- /dev/null
+++ b/uncore/src/main/scala/converters/Smi.scala
@@ -0,0 +1,32 @@
+// See LICENSE for details
+
+package uncore.converters
+
+import Chisel._
+import junctions._
+import uncore.tilelink._
+import cde.Parameters
+
+/** Convert TileLink protocol to Smi protocol */
+class SmiIOTileLinkIOConverter(val dataWidth: Int, val addrWidth: Int)
+ (implicit p: Parameters) extends Module {
+ val io = new Bundle {
+ val tl = (new ClientUncachedTileLinkIO).flip
+ val smi = new SmiIO(dataWidth, addrWidth)
+ }
+
+ def decoupledNastiConnect(outer: NastiIO, inner: NastiIO) {
+ outer.ar <> Queue(inner.ar)
+ outer.aw <> Queue(inner.aw)
+ outer.w <> Queue(inner.w)
+ inner.r <> Queue(outer.r)
+ inner.b <> Queue(outer.b)
+ }
+
+ val tl2nasti = Module(new NastiIOTileLinkIOConverter())
+ val nasti2smi = Module(new SmiIONastiIOConverter(dataWidth, addrWidth))
+
+ tl2nasti.io.tl <> io.tl
+ decoupledNastiConnect(nasti2smi.io.nasti, tl2nasti.io.nasti)
+ io.smi <> nasti2smi.io.smi
+}
diff --git a/uncore/src/main/scala/converters/Tilelink.scala b/uncore/src/main/scala/converters/Tilelink.scala
new file mode 100644
index 00000000..6cb15f91
--- /dev/null
+++ b/uncore/src/main/scala/converters/Tilelink.scala
@@ -0,0 +1,691 @@
+package uncore.converters
+
+import Chisel._
+import junctions._
+import uncore.tilelink._
+import uncore.util._
+import uncore.constants._
+import cde.Parameters
+
+/** Utilities for safely wrapping a *UncachedTileLink by pinning probe.ready and release.valid low */
+object TileLinkIOWrapper {
+ def apply(tl: ClientUncachedTileLinkIO)(implicit p: Parameters): ClientTileLinkIO = {
+ val conv = Module(new ClientTileLinkIOWrapper)
+ conv.io.in <> tl
+ conv.io.out
+ }
+ def apply(tl: UncachedTileLinkIO)(implicit p: Parameters): TileLinkIO = {
+ val conv = Module(new TileLinkIOWrapper)
+ conv.io.in <> tl
+ conv.io.out
+ }
+ def apply(tl: ClientTileLinkIO): ClientTileLinkIO = tl
+ def apply(tl: TileLinkIO): TileLinkIO = tl
+}
+
+class TileLinkIOWrapper(implicit p: Parameters) extends TLModule()(p) {
+ val io = new Bundle {
+ val in = new UncachedTileLinkIO().flip
+ val out = new TileLinkIO
+ }
+ io.out.acquire <> io.in.acquire
+ io.in.grant <> io.out.grant
+ io.out.finish <> io.in.finish
+ io.out.probe.ready := Bool(true)
+ io.out.release.valid := Bool(false)
+}
+
+class ClientTileLinkIOWrapper(implicit p: Parameters) extends TLModule()(p) {
+ val io = new Bundle {
+ val in = new ClientUncachedTileLinkIO().flip
+ val out = new ClientTileLinkIO
+ }
+ io.out.acquire <> io.in.acquire
+ io.in.grant <> io.out.grant
+ io.out.probe.ready := Bool(true)
+ io.out.release.valid := Bool(false)
+}
+
+class ClientTileLinkIOUnwrapper(implicit p: Parameters) extends TLModule()(p) {
+ val io = new Bundle {
+ val in = new ClientTileLinkIO().flip
+ val out = new ClientUncachedTileLinkIO
+ }
+
+ val acqArb = Module(new LockingRRArbiter(new Acquire, 2, tlDataBeats,
+ Some((acq: Acquire) => acq.hasMultibeatData())))
+
+ val acqRoq = Module(new ReorderQueue(Bool(), tlClientXactIdBits))
+ val relRoq = Module(new ReorderQueue(Bool(), tlClientXactIdBits))
+
+ val iacq = io.in.acquire.bits
+ val irel = io.in.release.bits
+ val ognt = io.out.grant.bits
+
+ val acq_roq_enq = iacq.first()
+ val rel_roq_enq = irel.first()
+
+ val acq_roq_ready = !acq_roq_enq || acqRoq.io.enq.ready
+ val rel_roq_ready = !rel_roq_enq || relRoq.io.enq.ready
+
+ val acq_helper = DecoupledHelper(
+ io.in.acquire.valid,
+ acq_roq_ready,
+ acqArb.io.in(0).ready)
+
+ val rel_helper = DecoupledHelper(
+ io.in.release.valid,
+ rel_roq_ready,
+ acqArb.io.in(1).ready)
+
+ acqRoq.io.enq.valid := acq_helper.fire(acq_roq_ready, acq_roq_enq)
+ acqRoq.io.enq.bits.data := iacq.isBuiltInType()
+ acqRoq.io.enq.bits.tag := iacq.client_xact_id
+
+ acqArb.io.in(0).valid := acq_helper.fire(acqArb.io.in(0).ready)
+ acqArb.io.in(0).bits := Acquire(
+ is_builtin_type = Bool(true),
+ a_type = Mux(iacq.isBuiltInType(),
+ iacq.a_type, Acquire.getBlockType),
+ client_xact_id = iacq.client_xact_id,
+ addr_block = iacq.addr_block,
+ addr_beat = iacq.addr_beat,
+ data = iacq.data,
+ union = Mux(iacq.isBuiltInType(),
+ iacq.union, Cat(MT_Q, M_XRD, Bool(true))))
+ io.in.acquire.ready := acq_helper.fire(io.in.acquire.valid)
+
+ relRoq.io.enq.valid := rel_helper.fire(rel_roq_ready, rel_roq_enq)
+ relRoq.io.enq.bits.data := irel.isVoluntary()
+ relRoq.io.enq.bits.tag := irel.client_xact_id
+
+ acqArb.io.in(1).valid := rel_helper.fire(acqArb.io.in(1).ready)
+ acqArb.io.in(1).bits := PutBlock(
+ client_xact_id = irel.client_xact_id,
+ addr_block = irel.addr_block,
+ addr_beat = irel.addr_beat,
+ data = irel.data)
+ io.in.release.ready := rel_helper.fire(io.in.release.valid)
+
+ io.out.acquire <> acqArb.io.out
+
+ val grant_deq_roq = io.out.grant.fire() && ognt.last()
+
+ acqRoq.io.deq.valid := acqRoq.io.deq.matches && grant_deq_roq
+ acqRoq.io.deq.tag := ognt.client_xact_id
+
+ relRoq.io.deq.valid := !acqRoq.io.deq.matches && grant_deq_roq
+ relRoq.io.deq.tag := ognt.client_xact_id
+
+ assert(!grant_deq_roq || acqRoq.io.deq.matches || relRoq.io.deq.matches,
+ "TileLink Unwrapper: client_xact_id mismatch")
+
+ val gnt_builtin = acqRoq.io.deq.data
+ val gnt_voluntary = relRoq.io.deq.data
+
+ val acq_grant = Grant(
+ is_builtin_type = gnt_builtin,
+ g_type = Mux(gnt_builtin, ognt.g_type, tlCoh.getExclusiveGrantType),
+ client_xact_id = ognt.client_xact_id,
+ manager_xact_id = ognt.manager_xact_id,
+ addr_beat = ognt.addr_beat,
+ data = ognt.data)
+
+ assert(!io.in.release.valid || io.in.release.bits.isVoluntary(), "Unwrapper can only process voluntary releases.")
+ val rel_grant = Grant(
+ is_builtin_type = Bool(true),
+ g_type = Grant.voluntaryAckType, // We should only every be working with voluntary releases
+ client_xact_id = ognt.client_xact_id,
+ manager_xact_id = ognt.manager_xact_id,
+ addr_beat = ognt.addr_beat,
+ data = ognt.data)
+
+ io.in.grant.valid := io.out.grant.valid
+ io.in.grant.bits := Mux(acqRoq.io.deq.matches, acq_grant, rel_grant)
+ io.out.grant.ready := io.in.grant.ready
+
+ io.in.probe.valid := Bool(false)
+}
+
+object TileLinkWidthAdapter {
+ def apply(in: ClientUncachedTileLinkIO, outerId: String)(implicit p: Parameters) = {
+ val outerDataBits = p(TLKey(outerId)).dataBitsPerBeat
+ if (outerDataBits > in.tlDataBits) {
+ val widener = Module(new TileLinkIOWidener(in.p(TLId), outerId))
+ widener.io.in <> in
+ widener.io.out
+ } else if (outerDataBits < in.tlDataBits) {
+ val narrower = Module(new TileLinkIONarrower(in.p(TLId), outerId))
+ narrower.io.in <> in
+ narrower.io.out
+ } else { in }
+ }
+ def apply(out: ClientUncachedTileLinkIO, in: ClientUncachedTileLinkIO)(implicit p: Parameters): Unit = {
+ require(out.tlDataBits * out.tlDataBeats == in.tlDataBits * in.tlDataBeats)
+ out <> apply(in, out.p(TLId))
+ }
+}
+
+class TileLinkIOWidener(innerTLId: String, outerTLId: String)
+ (implicit p: Parameters) extends TLModule()(p) {
+
+ val paddrBits = p(PAddrBits)
+ val innerParams = p(TLKey(innerTLId))
+ val outerParams = p(TLKey(outerTLId))
+ val innerDataBeats = innerParams.dataBeats
+ val innerDataBits = innerParams.dataBitsPerBeat
+ val innerWriteMaskBits = innerParams.writeMaskBits
+ val innerByteAddrBits = log2Up(innerWriteMaskBits)
+ val innerMaxXacts = innerParams.maxClientXacts * innerParams.maxClientsPerPort
+ val innerXactIdBits = log2Up(innerMaxXacts)
+ val outerDataBeats = outerParams.dataBeats
+ val outerDataBits = outerParams.dataBitsPerBeat
+ val outerWriteMaskBits = outerParams.writeMaskBits
+ val outerByteAddrBits = log2Up(outerWriteMaskBits)
+ val outerBeatAddrBits = log2Up(outerDataBeats)
+ val outerBlockOffset = outerBeatAddrBits + outerByteAddrBits
+ val outerMaxClients = outerParams.maxClientsPerPort
+ val outerClientIdBits = log2Up(outerParams.maxClientXacts * outerMaxClients)
+ val outerManagerIdBits = log2Up(outerParams.maxManagerXacts)
+ val outerBlockAddrBits = paddrBits - outerBlockOffset
+
+ require(outerDataBeats <= innerDataBeats)
+ require(outerDataBits >= innerDataBits)
+ require(outerDataBits % innerDataBits == 0)
+ require(outerDataBits * outerDataBeats == innerDataBits * innerDataBeats)
+
+ val factor = innerDataBeats / outerDataBeats
+
+ val io = new Bundle {
+ val in = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => innerTLId})).flip
+ val out = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => outerTLId}))
+ }
+
+ val iacq = io.in.acquire.bits
+ val oacq = io.out.acquire.bits
+ val ognt = io.out.grant.bits
+ val ignt = io.in.grant.bits
+
+ val shrink = iacq.a_type === Acquire.putBlockType
+ val stretch = ognt.g_type === Grant.getDataBlockType
+ val smallget = iacq.a_type === Acquire.getType
+ val smallput = iacq.a_type === Acquire.putType
+ val smallgnt = ognt.g_type === Grant.getDataBeatType
+
+ val sending_put = Reg(init = Bool(false))
+ val collecting = Reg(init = Bool(false))
+ val put_block = Reg(UInt(width = outerBlockAddrBits))
+ val put_id = Reg(UInt(width = outerClientIdBits))
+ val put_data = Reg(Vec(factor, UInt(width = innerDataBits)))
+ val put_wmask = Reg(Vec(factor, UInt(width = innerWriteMaskBits)))
+ val put_allocate = Reg(Bool())
+ val (put_beat, put_done) = Counter(io.out.acquire.fire() && oacq.hasMultibeatData(), outerDataBeats)
+ val (recv_idx, recv_done) = Counter(io.in.acquire.fire() && iacq.hasMultibeatData(), factor)
+
+ val in_addr = iacq.full_addr()
+ val out_addr_block = in_addr(paddrBits - 1, outerBlockOffset)
+ val out_addr_beat = in_addr(outerBlockOffset - 1, outerByteAddrBits)
+ val out_addr_byte = in_addr(outerByteAddrBits - 1, 0)
+
+ val switch_addr = in_addr(outerByteAddrBits - 1, innerByteAddrBits)
+ val smallget_switch = Reg(Vec(innerMaxXacts, switch_addr))
+
+ def align_data(addr: UInt, data: UInt): UInt =
+ data << Cat(addr, UInt(0, log2Up(innerDataBits)))
+
+ def align_wmask(addr: UInt, wmask: UInt): UInt =
+ wmask << Cat(addr, UInt(0, log2Up(innerWriteMaskBits)))
+
+ val outerConfig = p.alterPartial({ case TLId => outerTLId })
+
+ val get_acquire = Get(
+ client_xact_id = iacq.client_xact_id,
+ addr_block = out_addr_block,
+ addr_beat = out_addr_beat,
+ addr_byte = out_addr_byte,
+ operand_size = iacq.op_size(),
+ alloc = iacq.allocate())(outerConfig)
+
+ val get_block_acquire = GetBlock(
+ client_xact_id = iacq.client_xact_id,
+ addr_block = out_addr_block,
+ alloc = iacq.allocate())(outerConfig)
+
+ val put_acquire = Put(
+ client_xact_id = iacq.client_xact_id,
+ addr_block = out_addr_block,
+ addr_beat = out_addr_beat,
+ data = align_data(switch_addr, iacq.data),
+ wmask = Some(align_wmask(switch_addr, iacq.wmask())),
+ alloc = iacq.allocate())(outerConfig)
+
+ val put_block_acquire = PutBlock(
+ client_xact_id = put_id,
+ addr_block = put_block,
+ addr_beat = put_beat,
+ data = put_data.toBits,
+ wmask = Some(put_wmask.toBits))(outerConfig)
+
+ io.out.acquire.valid := sending_put || (!shrink && io.in.acquire.valid)
+ io.out.acquire.bits := MuxCase(get_block_acquire, Seq(
+ sending_put -> put_block_acquire,
+ smallget -> get_acquire,
+ smallput -> put_acquire))
+ io.in.acquire.ready := !sending_put && (shrink || io.out.acquire.ready)
+
+ when (io.in.acquire.fire() && shrink) {
+ when (!collecting) {
+ put_block := out_addr_block
+ put_id := iacq.client_xact_id
+ put_allocate := iacq.allocate()
+ collecting := Bool(true)
+ }
+ put_data(recv_idx) := iacq.data
+ put_wmask(recv_idx) := iacq.wmask()
+ }
+
+ when (io.in.acquire.fire() && smallget) {
+ smallget_switch(iacq.client_xact_id) := switch_addr
+ }
+
+ when (recv_done) { sending_put := Bool(true) }
+ when (sending_put && io.out.acquire.ready) { sending_put := Bool(false) }
+ when (put_done) { collecting := Bool(false) }
+
+ val returning_data = Reg(init = Bool(false))
+ val (send_idx, send_done) = Counter(
+ io.in.grant.ready && returning_data, factor)
+
+ val gnt_beat = Reg(UInt(width = outerBeatAddrBits))
+ val gnt_client_id = Reg(UInt(width = outerClientIdBits))
+ val gnt_manager_id = Reg(UInt(width = outerManagerIdBits))
+ val gnt_data = Reg(UInt(width = outerDataBits))
+
+ when (io.out.grant.fire() && stretch) {
+ gnt_data := ognt.data
+ gnt_client_id := ognt.client_xact_id
+ gnt_manager_id := ognt.manager_xact_id
+ gnt_beat := ognt.addr_beat
+ returning_data := Bool(true)
+ }
+
+ when (send_done) { returning_data := Bool(false) }
+
+ def select_data(data: UInt, sel: UInt): UInt =
+ data >> (sel << log2Up(innerDataBits))
+
+ val gnt_switch = smallget_switch(ognt.client_xact_id)
+
+ val innerConfig = p.alterPartial({ case TLId => innerTLId })
+
+ val get_block_grant = Grant(
+ is_builtin_type = Bool(true),
+ g_type = Grant.getDataBlockType,
+ client_xact_id = gnt_client_id,
+ manager_xact_id = gnt_manager_id,
+ addr_beat = Cat(gnt_beat, send_idx),
+ data = select_data(gnt_data, send_idx))(innerConfig)
+
+ val get_grant = Grant(
+ is_builtin_type = Bool(true),
+ g_type = Grant.getDataBeatType,
+ client_xact_id = ognt.client_xact_id,
+ manager_xact_id = ognt.manager_xact_id,
+ addr_beat = Cat(ognt.addr_beat, gnt_switch),
+ data = select_data(ognt.data, gnt_switch))(innerConfig)
+
+ val default_grant = Grant(
+ is_builtin_type = Bool(true),
+ g_type = ognt.g_type,
+ client_xact_id = ognt.client_xact_id,
+ manager_xact_id = ognt.manager_xact_id,
+ addr_beat = ognt.addr_beat,
+ data = ognt.data)(innerConfig)
+
+ io.in.grant.valid := returning_data || (!stretch && io.out.grant.valid)
+ io.in.grant.bits := MuxCase(default_grant, Seq(
+ returning_data -> get_block_grant,
+ smallgnt -> get_grant))
+ io.out.grant.ready := !returning_data && (stretch || io.in.grant.ready)
+}
+
+class TileLinkIONarrower(innerTLId: String, outerTLId: String)
+ (implicit p: Parameters) extends TLModule()(p) {
+
+ val innerParams = p(TLKey(innerTLId))
+ val outerParams = p(TLKey(outerTLId))
+ val innerDataBeats = innerParams.dataBeats
+ val innerDataBits = innerParams.dataBitsPerBeat
+ val innerWriteMaskBits = innerParams.writeMaskBits
+ val innerByteAddrBits = log2Up(innerWriteMaskBits)
+ val outerDataBeats = outerParams.dataBeats
+ val outerDataBits = outerParams.dataBitsPerBeat
+ val outerWriteMaskBits = outerParams.writeMaskBits
+ val outerByteAddrBits = log2Up(outerWriteMaskBits)
+ val outerBeatAddrBits = log2Up(outerDataBeats)
+ val outerBlockOffset = outerBeatAddrBits + outerByteAddrBits
+ val outerMaxClients = outerParams.maxClientsPerPort
+ val outerIdBits = log2Up(outerParams.maxClientXacts * outerMaxClients)
+
+ require(outerDataBeats > innerDataBeats)
+ require(outerDataBeats % innerDataBeats == 0)
+ require(outerDataBits < innerDataBits)
+ require(outerDataBits * outerDataBeats == innerDataBits * innerDataBeats)
+
+ val factor = outerDataBeats / innerDataBeats
+
+ val io = new Bundle {
+ val in = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => innerTLId})).flip
+ val out = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => outerTLId}))
+ }
+
+ val iacq = io.in.acquire.bits
+ val ognt = io.out.grant.bits
+
+ val stretch = iacq.a_type === Acquire.putBlockType
+ val shrink = iacq.a_type === Acquire.getBlockType
+ val smallput = iacq.a_type === Acquire.putType
+ val smallget = iacq.a_type === Acquire.getType
+
+ val acq_data_buffer = Reg(UInt(width = innerDataBits))
+ val acq_wmask_buffer = Reg(UInt(width = innerWriteMaskBits))
+ val acq_client_id = Reg(iacq.client_xact_id)
+ val acq_addr_block = Reg(iacq.addr_block)
+ val acq_addr_beat = Reg(iacq.addr_beat)
+ val oacq_ctr = Counter(factor)
+
+ val outer_beat_addr = iacq.full_addr()(outerBlockOffset - 1, outerByteAddrBits)
+ val outer_byte_addr = iacq.full_addr()(outerByteAddrBits - 1, 0)
+
+ val mask_chunks = Vec.tabulate(factor) { i =>
+ val lsb = i * outerWriteMaskBits
+ val msb = (i + 1) * outerWriteMaskBits - 1
+ iacq.wmask()(msb, lsb)
+ }
+
+ val data_chunks = Vec.tabulate(factor) { i =>
+ val lsb = i * outerDataBits
+ val msb = (i + 1) * outerDataBits - 1
+ iacq.data(msb, lsb)
+ }
+
+ val beat_sel = Cat(mask_chunks.map(mask => mask.orR).reverse)
+
+ val smallput_data = Mux1H(beat_sel, data_chunks)
+ val smallput_wmask = Mux1H(beat_sel, mask_chunks)
+ val smallput_beat = Cat(iacq.addr_beat, PriorityEncoder(beat_sel))
+
+ assert(!io.in.acquire.valid || !smallput || PopCount(beat_sel) <= UInt(1),
+ "Can't perform Put wider than outer width")
+
+ val read_size_ok = MuxLookup(iacq.op_size(), Bool(false), Seq(
+ MT_B -> Bool(true),
+ MT_BU -> Bool(true),
+ MT_H -> Bool(outerDataBits >= 16),
+ MT_HU -> Bool(outerDataBits >= 16),
+ MT_W -> Bool(outerDataBits >= 32),
+ MT_WU -> Bool(outerDataBits >= 32),
+ MT_D -> Bool(outerDataBits >= 64),
+ MT_Q -> Bool(false)))
+
+ assert(!io.in.acquire.valid || !smallget || read_size_ok,
+ "Can't perform Get wider than outer width")
+
+ val outerConfig = p.alterPartial({ case TLId => outerTLId })
+ val innerConfig = p.alterPartial({ case TLId => innerTLId })
+
+ val get_block_acquire = GetBlock(
+ client_xact_id = iacq.client_xact_id,
+ addr_block = iacq.addr_block,
+ alloc = iacq.allocate())(outerConfig)
+
+ val put_block_acquire = PutBlock(
+ client_xact_id = acq_client_id,
+ addr_block = acq_addr_block,
+ addr_beat = if (factor > 1)
+ Cat(acq_addr_beat, oacq_ctr.value)
+ else acq_addr_beat,
+ data = acq_data_buffer(outerDataBits - 1, 0),
+ wmask = Some(acq_wmask_buffer(outerWriteMaskBits - 1, 0)))(outerConfig)
+
+ val get_acquire = Get(
+ client_xact_id = iacq.client_xact_id,
+ addr_block = iacq.addr_block,
+ addr_beat = outer_beat_addr,
+ addr_byte = outer_byte_addr,
+ operand_size = iacq.op_size(),
+ alloc = iacq.allocate())(outerConfig)
+
+ val put_acquire = Put(
+ client_xact_id = iacq.client_xact_id,
+ addr_block = iacq.addr_block,
+ addr_beat = smallput_beat,
+ data = smallput_data,
+ wmask = Some(smallput_wmask))(outerConfig)
+
+ val sending_put = Reg(init = Bool(false))
+
+ val pass_valid = io.in.acquire.valid && !stretch
+
+ io.out.acquire.bits := MuxCase(Wire(io.out.acquire.bits, init=iacq), Seq(
+ (sending_put, put_block_acquire),
+ (shrink, get_block_acquire),
+ (smallput, put_acquire),
+ (smallget, get_acquire)))
+ io.out.acquire.valid := sending_put || pass_valid
+ io.in.acquire.ready := !sending_put && (stretch || io.out.acquire.ready)
+
+ when (io.in.acquire.fire() && stretch) {
+ acq_data_buffer := iacq.data
+ acq_wmask_buffer := iacq.wmask()
+ acq_client_id := iacq.client_xact_id
+ acq_addr_block := iacq.addr_block
+ acq_addr_beat := iacq.addr_beat
+ sending_put := Bool(true)
+ }
+
+ when (sending_put && io.out.acquire.ready) {
+ acq_data_buffer := acq_data_buffer >> outerDataBits
+ acq_wmask_buffer := acq_wmask_buffer >> outerWriteMaskBits
+ when (oacq_ctr.inc()) { sending_put := Bool(false) }
+ }
+
+ val ognt_block = ognt.hasMultibeatData()
+ val gnt_data_buffer = Reg(Vec(factor, UInt(width = outerDataBits)))
+ val gnt_client_id = Reg(ognt.client_xact_id)
+ val gnt_manager_id = Reg(ognt.manager_xact_id)
+
+ val ignt_ctr = Counter(innerDataBeats)
+ val ognt_ctr = Counter(factor)
+ val sending_get = Reg(init = Bool(false))
+
+ val get_block_grant = Grant(
+ is_builtin_type = Bool(true),
+ g_type = Grant.getDataBlockType,
+ client_xact_id = gnt_client_id,
+ manager_xact_id = gnt_manager_id,
+ addr_beat = ignt_ctr.value,
+ data = gnt_data_buffer.toBits)(innerConfig)
+
+ val smallget_grant = ognt.g_type === Grant.getDataBeatType
+
+ val get_grant = Grant(
+ is_builtin_type = Bool(true),
+ g_type = Grant.getDataBeatType,
+ client_xact_id = ognt.client_xact_id,
+ manager_xact_id = ognt.manager_xact_id,
+ addr_beat = ognt.addr_beat >> UInt(log2Up(factor)),
+ data = Fill(factor, ognt.data))(innerConfig)
+
+ io.in.grant.valid := sending_get || (io.out.grant.valid && !ognt_block)
+ io.out.grant.ready := !sending_get && (ognt_block || io.in.grant.ready)
+
+ io.in.grant.bits := MuxCase(Wire(io.in.grant.bits, init=ognt), Seq(
+ sending_get -> get_block_grant,
+ smallget_grant -> get_grant))
+
+ when (io.out.grant.valid && ognt_block && !sending_get) {
+ gnt_data_buffer(ognt_ctr.value) := ognt.data
+ when (ognt_ctr.inc()) {
+ gnt_client_id := ognt.client_xact_id
+ gnt_manager_id := ognt.manager_xact_id
+ sending_get := Bool(true)
+ }
+ }
+
+ when (io.in.grant.ready && sending_get) {
+ ignt_ctr.inc()
+ sending_get := Bool(false)
+ }
+}
+
+class TileLinkFragmenterSource(implicit p: Parameters) extends TLModule()(p) {
+ val io = new Bundle {
+ val in = Decoupled(new Acquire).flip
+ val out = Decoupled(new Acquire)
+ val que = Decoupled(UInt(width = tlBeatAddrBits))
+ }
+
+ // Pipeline stage with acquire data; needed to ensure in.bits stay fixed when !in.ready
+ val acq_valid = RegInit(Bool(false))
+ val acq_bits = Reg(new Acquire)
+ // The last beat of generate acquire to send
+ val acq_last_beat = Reg(UInt(width = tlBeatAddrBits))
+ val acq_last = acq_bits.addr_beat === acq_last_beat
+
+ // 'in' has the first beat?
+ val in_multi_put = io.in.bits.isBuiltInType(Acquire.putBlockType)
+ val in_multi_get = io.in.bits.isBuiltInType(Acquire.getBlockType)
+ val in_first_beat = !in_multi_put || io.in.bits.addr_beat === UInt(0)
+
+ // Move stuff from acq to out whenever out is ready
+ io.out.valid := acq_valid
+ // When can acq accept a request?
+ val acq_ready = !acq_valid || (acq_last && io.out.ready)
+ // Move the first beat from in to acq only when both acq and que are ready
+ io.in.ready := (!in_first_beat || io.que.ready) && acq_ready
+ io.que.valid := (in_first_beat && io.in.valid) && acq_ready
+
+ // in.fire moves data from in to acq and (optionally) que
+ // out.fire moves data from acq to out
+
+ // Desired flow control results:
+ assert (!io.que.fire() || io.in.fire()) // 1. que.fire => in.fire
+ assert (!(io.in.fire() && in_first_beat) || io.que.fire()) // 2. in.fire && in_first_beat => que.fire
+ assert (!io.out.fire() || acq_valid) // 3. out.fire => acq_valid
+ assert (!io.in.fire() || (!acq_valid || (io.out.fire() && acq_last))) // 4. in.fire => !acq_valid || (out.fire && acq_last)
+ // Proofs:
+ // 1. que.fire => que.ready && in.valid && acq_ready => in.ready && in.valid
+ // 2. in.fire && in_first_beat => in.valid && acq_ready && [(!in_first_beat || que.ready) && in_first_beat] =>
+ // in.valid && acq_ready && que.ready && in_first_beat => que.valid && que.ready
+ // 3. out.fire => out.valid => acq_valid
+ // 4. in.fire => acq_ready => !acq_valid || (acq_last && out.ready) =>
+ // !acq_valid || (acq_valid && acq_last && out.ready) => !acq_valid || (acq_last && out.fire)
+
+ val multi_size = SInt(-1, width = tlBeatAddrBits).asUInt // TL2: use in.bits.size()/beatBits-1
+ val in_sizeMinus1 = Mux(in_multi_get || in_multi_put, multi_size, UInt(0))
+ val in_insertSizeMinus1 = Mux(in_multi_get, multi_size, UInt(0))
+
+ when (io.in.fire()) {
+ // Theorem 4 makes this safe; we overwrite garbage, or replace the final acq
+ acq_valid := Bool(true)
+ acq_bits := io.in.bits
+ acq_last_beat := io.in.bits.addr_beat + in_insertSizeMinus1
+ // Replace this with size truncation in TL2:
+ acq_bits.a_type := Mux(in_multi_put, Acquire.putType, Mux(in_multi_get, Acquire.getType, io.in.bits.a_type))
+ } .elsewhen (io.out.fire()) {
+ acq_valid := !acq_last // false => !in.valid || (!que.ready && in_first_beat)
+ acq_bits.addr_beat := acq_bits.addr_beat + UInt(1)
+ // acq_last && out.fire => acq_last && out.ready && acq_valid => acq_ready
+ // Suppose in.valid, then !in.fire => !in.ready => !(!in_first_beat || que.ready) => !que.ready && in_first_beat
+ }
+
+ // Safe by theorem 3
+ io.out.bits := acq_bits
+ // Safe by theorem 1
+ io.que.bits := in_sizeMinus1
+}
+
+class TileLinkFragmenterSink(implicit p: Parameters) extends TLModule()(p) {
+ val io = new Bundle {
+ val in = Decoupled(new Grant).flip
+ val out = Decoupled(new Grant)
+ val que = Decoupled(UInt(width = tlBeatAddrBits)).flip
+ }
+
+ val count_valid = RegInit(Bool(false))
+ val multi_op = Reg(Bool())
+ val count_bits = Reg(UInt(width = tlBeatAddrBits))
+ val last = count_bits === UInt(0)
+
+ val in_put = io.in.bits.isBuiltInType(Grant.putAckType)
+ val in_get = io.in.bits.isBuiltInType(Grant.getDataBeatType)
+ val deliver = last || in_get
+
+ // Accept the input, discarding the non-final put grant
+ io.in.ready := count_valid && (io.out.ready || !deliver)
+ // Output the grant whenever we want delivery
+ io.out.valid := count_valid && io.in.valid && deliver
+ // Take a new number whenever we deliver the last beat
+ io.que.ready := !count_valid || (io.in.valid && io.out.ready && last)
+
+ // Desired flow control results:
+ assert (!io.out.fire() || (count_valid && io.in.fire())) // 1. out.fire => in.fire && count_valid
+ assert (!(io.in.fire() && deliver) || io.out.fire()) // 2. in.fire && deliver => out.fire
+ assert (!(io.out.fire() && last) || io.que.ready) // 3. out.fire && last => que.ready
+ assert (!io.que.fire() || (!count_valid || io.out.fire())) // 4. que.fire => !count_valid || (out.fire && last)
+ // Proofs:
+ // 1. out.fire => out.ready && (count_valid && in.valid && deliver) => (count_valid && out.ready) && in.valid => in.fire
+ // 2. in.fire && deliver => in.valid && count_valid && [(out.ready || !deliver) && deliver] =>
+ // in.valid && count_valid && deliver && out.ready => out.fire
+ // 3. out.fire && last => out.valid && out.ready && last => in.valid && out.ready && last => que.ready
+ // 4. que.fire => que.valid && (!count_valid || (in.valid && out.ready && last))
+ // => !count_valid || (count_valid && in.valid && out.ready && [last => deliver])
+ // => !count_valid || (out.valid && out.ready && last)
+
+ when (io.que.fire()) {
+ // Theorem 4 makes this safe; we overwrite garbage or last output
+ count_valid := Bool(true)
+ count_bits := io.que.bits
+ multi_op := io.que.bits =/= UInt(0)
+ } .elsewhen (io.in.fire()) {
+ count_valid := !last // false => !que.valid
+ count_bits := count_bits - UInt(1)
+ // Proof: in.fire && [last => deliver] =2=> out.fire && last =3=> que.ready
+ // !que.fire && que.ready => !que.valid
+ }
+
+ // Safe by Theorem 1
+ io.out.bits := io.in.bits
+ io.out.bits.g_type := Mux(multi_op, Mux(in_get, Grant.getDataBlockType, Grant.putAckType), io.in.bits.g_type)
+}
+
+class TileLinkFragmenter(depth: Int = 1)(implicit p: Parameters) extends TLModule()(p) {
+ val io = new Bundle {
+ val in = new ClientUncachedTileLinkIO().flip
+ val out = new ClientUncachedTileLinkIO
+ }
+
+ // TL2:
+ // supportsAcquire = false
+ // modify all outward managers to supportsMultibeat = true
+ // assert: all managers must behaveFIFO (not inspect duplicated id field)
+
+ val source = Module(new TileLinkFragmenterSource)
+ val sink = Module(new TileLinkFragmenterSink)
+ sink.io.que <> Queue(source.io.que, depth)
+
+ source.io.in <> io.in.acquire
+ io.out.acquire <> source.io.out
+ sink.io.in <> io.out.grant
+ io.in.grant <> sink.io.out
+}
+
+object TileLinkFragmenter {
+ // Pass the source/client to fragment
+ def apply(source: ClientUncachedTileLinkIO, depth: Int = 1)(implicit p: Parameters): ClientUncachedTileLinkIO = {
+ val fragmenter = Module(new TileLinkFragmenter(depth))
+ fragmenter.io.in <> source
+ fragmenter.io.out
+ }
+}
diff --git a/uncore/src/main/scala/devices/Bram.scala b/uncore/src/main/scala/devices/Bram.scala
new file mode 100644
index 00000000..1b8c5194
--- /dev/null
+++ b/uncore/src/main/scala/devices/Bram.scala
@@ -0,0 +1,160 @@
+package uncore.devices
+
+import Chisel._
+import cde.{Parameters, Field}
+import junctions._
+import uncore.tilelink._
+import uncore.util._
+import HastiConstants._
+
+class BRAMSlave(depth: Int)(implicit val p: Parameters) extends Module
+ with HasTileLinkParameters {
+ val io = new ClientUncachedTileLinkIO().flip
+
+ // For TL2:
+ // supportsAcquire = false
+ // supportsMultibeat = false
+ // supportsHint = false
+ // supportsAtomic = false
+
+ // Timing-wise, we assume the input is coming out of registers
+ // since you probably needed a TileLinkFragmenter infront of us
+
+ // Thus, only one pipeline stage: the grant result
+ val g_valid = RegInit(Bool(false))
+ val g_bits = Reg(new Grant)
+
+ // Just pass the pipeline straight through
+ io.grant.valid := g_valid
+ io.grant.bits := g_bits
+ io.acquire.ready := !g_valid || io.grant.ready
+
+ val acq_get = io.acquire.bits.isBuiltInType(Acquire.getType)
+ val acq_put = io.acquire.bits.isBuiltInType(Acquire.putType)
+ val acq_addr = Cat(io.acquire.bits.addr_block, io.acquire.bits.addr_beat)
+
+ val bram = Mem(depth, Bits(width = tlDataBits))
+
+ val ren = acq_get && io.acquire.fire()
+ val wen = acq_put && io.acquire.fire()
+
+ when (io.grant.fire()) {
+ g_valid := Bool(false)
+ }
+
+ when (io.acquire.fire()) {
+ g_valid := Bool(true)
+ g_bits := Grant(
+ is_builtin_type = Bool(true),
+ g_type = io.acquire.bits.getBuiltInGrantType(),
+ client_xact_id = io.acquire.bits.client_xact_id,
+ manager_xact_id = UInt(0),
+ addr_beat = io.acquire.bits.addr_beat,
+ data = UInt(0))
+ }
+
+ when (wen) {
+ bram.write(acq_addr, io.acquire.bits.data)
+ assert(io.acquire.bits.wmask().andR, "BRAMSlave: partial write masks not supported")
+ }
+ io.grant.bits.data := RegEnable(bram.read(acq_addr), ren)
+}
+
+class HastiRAM(depth: Int)(implicit p: Parameters) extends HastiModule()(p) {
+ val io = new HastiSlaveIO
+
+ val wdata = Vec.tabulate(hastiDataBytes)(i => io.hwdata(8*(i+1)-1,8*i))
+ val waddr = Reg(UInt(width = hastiAddrBits))
+ val wvalid = Reg(init = Bool(false))
+ val wsize = Reg(UInt(width = SZ_HSIZE))
+ val ram = SeqMem(depth, Vec(hastiDataBytes, Bits(width = 8)))
+
+ val max_size = log2Ceil(hastiDataBytes)
+ val wmask_lut = MuxLookup(wsize, SInt(-1, hastiDataBytes).asUInt,
+ (0 until max_size).map(sz => (UInt(sz) -> UInt((1 << (1 << sz)) - 1))))
+ val wmask = (wmask_lut << waddr(max_size - 1, 0))(hastiDataBytes - 1, 0)
+
+ val is_trans = io.hsel && (io.htrans === HTRANS_NONSEQ || io.htrans === HTRANS_SEQ)
+ val raddr = io.haddr >> UInt(max_size)
+ val ren = is_trans && !io.hwrite
+ val bypass = Reg(init = Bool(false))
+
+ when (is_trans && io.hwrite) {
+ waddr := io.haddr
+ wsize := io.hsize
+ wvalid := Bool(true)
+ } .otherwise { wvalid := Bool(false) }
+
+ when (ren) { bypass := wvalid && (waddr >> UInt(max_size)) === raddr }
+
+ when (wvalid) {
+ ram.write(waddr >> UInt(max_size), wdata, wmask.toBools)
+ }
+
+ val rdata = ram.read(raddr, ren)
+ io.hrdata := Cat(rdata.zip(wmask.toBools).zip(wdata).map {
+ case ((rbyte, wsel), wbyte) => Mux(wsel && bypass, wbyte, rbyte)
+ }.reverse)
+
+ io.hready := Bool(true)
+ io.hresp := HRESP_OKAY
+}
+
+/**
+ * This RAM is not meant to be particularly performant.
+ * It just supports the entire range of uncached TileLink operations in the
+ * simplest way possible.
+ */
+class TileLinkTestRAM(depth: Int)(implicit val p: Parameters) extends Module
+ with HasTileLinkParameters {
+ val io = new ClientUncachedTileLinkIO().flip
+
+ val ram = Mem(depth, UInt(width = tlDataBits))
+
+ val responding = Reg(init = Bool(false))
+ val acq = io.acquire.bits
+ val r_acq = Reg(io.acquire.bits)
+ val acq_addr = Cat(acq.addr_block, acq.addr_beat)
+ val r_acq_addr = Cat(r_acq.addr_block, r_acq.addr_beat)
+
+ when (io.acquire.fire() && io.acquire.bits.last()) {
+ r_acq := io.acquire.bits
+ responding := Bool(true)
+ }
+
+ when (io.grant.fire()) {
+ val is_getblk = r_acq.isBuiltInType(Acquire.getBlockType)
+ val last_beat = r_acq.addr_beat === UInt(tlDataBeats - 1)
+ when (is_getblk && !last_beat) {
+ r_acq.addr_beat := r_acq.addr_beat + UInt(1)
+ } .otherwise { responding := Bool(false) }
+ }
+
+ io.acquire.ready := !responding
+ io.grant.valid := responding
+ io.grant.bits := Grant(
+ is_builtin_type = Bool(true),
+ g_type = r_acq.getBuiltInGrantType(),
+ client_xact_id = r_acq.client_xact_id,
+ manager_xact_id = UInt(0),
+ addr_beat = r_acq.addr_beat,
+ data = ram(r_acq_addr))
+
+ val old_data = ram(acq_addr)
+ val new_data = acq.data
+
+ val amo_shift_bits = acq.amo_shift_bytes() << UInt(3)
+ val amoalu = Module(new AMOALU)
+ amoalu.io.addr := Cat(acq.addr_block, acq.addr_beat, acq.addr_byte())
+ amoalu.io.cmd := acq.op_code()
+ amoalu.io.typ := acq.op_size()
+ amoalu.io.lhs := old_data >> amo_shift_bits
+ amoalu.io.rhs := new_data >> amo_shift_bits
+
+ val result = Mux(acq.isAtomic(), amoalu.io.out << amo_shift_bits, new_data)
+ val wmask = FillInterleaved(8, acq.wmask())
+
+ when (io.acquire.fire() && acq.hasData()) {
+ ram(acq_addr) := (old_data & ~wmask) | (result & wmask)
+ }
+}
diff --git a/uncore/src/main/scala/devices/Debug.scala b/uncore/src/main/scala/devices/Debug.scala
new file mode 100644
index 00000000..dc9468b1
--- /dev/null
+++ b/uncore/src/main/scala/devices/Debug.scala
@@ -0,0 +1,1003 @@
+// See LICENSE for license details.
+
+package uncore.devices
+
+import Chisel._
+import uncore.tilelink._
+import junctions._
+import cde.{Parameters, Config, Field}
+
+// *****************************************
+// Constants which are interesting even
+// outside of this module
+// *****************************************
+
+object DbRegAddrs{
+
+ def DMCONTROL = UInt(0x10)
+
+ def DMINFO = UInt(0x11)
+ def AUTHDATA0 = UInt(0x12)
+ def AUTHDATA1 = UInt(0x13)
+ def SERDATA = UInt(0x14)
+ def SERSTATUS = UInt(0x15)
+ def SBUSADDRESS0 = UInt(0x16)
+ def SBUSADDRESS1 = UInt(0x17)
+ def SBDATA0 = UInt(0x18)
+ def SBDATA1 = UInt(0x19)
+ //1a
+ def HALTSUM = UInt(0x1B)
+ //1c - 3b are the halt notification registers.
+ def SBADDRESS2 = UInt(0x3d)
+ // 3c
+ def SBDATA2 = UInt(0x3e)
+ def SBDATA3 = UInt(0x3f)
+}
+
+/** Constant values used by both Debug Bus Response & Request
+ */
+
+object DbBusConsts{
+
+ def dbDataSize = 34
+
+ def dbRamWordBits = 32
+
+ def dbOpSize = 2
+ def db_OP_NONE = UInt("b00")
+ def db_OP_READ = UInt("b01")
+ def db_OP_READ_WRITE = UInt("b10")
+ def db_OP_READ_COND_WRITE = UInt("b11")
+
+ def dbRespSize = 2
+ def db_RESP_SUCCESS = UInt("b00")
+ def db_RESP_FAILURE = UInt("b01")
+ def db_RESP_HW_FAILURE = UInt("b10")
+ // This is used outside this block
+ // to indicate 'busy'.
+ def db_RESP_RESERVED = UInt("b11")
+
+}
+
+object DsbBusConsts {
+
+ def sbAddrWidth = 12
+ def sbIdWidth = 10
+
+ //These are the default ROM contents, which support RV32 and RV64.
+ // See $RISCV/riscv-tools/riscv-isa-sim/debug_rom/debug_rom.h/S
+ // The code assumes 64 bytes of Debug RAM.
+
+ def defaultRomContents : Array[Byte] = Array(
+ 0x6f, 0x00, 0xc0, 0x04, 0x6f, 0x00, 0xc0, 0x00, 0x13, 0x04, 0xf0, 0xff,
+ 0x6f, 0x00, 0x80, 0x00, 0x13, 0x04, 0x00, 0x00, 0x0f, 0x00, 0xf0, 0x0f,
+ 0xf3, 0x24, 0x00, 0xf1, 0x63, 0xc6, 0x04, 0x00, 0x83, 0x24, 0xc0, 0x43,
+ 0x6f, 0x00, 0x80, 0x00, 0x83, 0x34, 0x80, 0x43, 0x23, 0x2e, 0x80, 0x42,
+ 0x73, 0x24, 0x40, 0xf1, 0x23, 0x20, 0x80, 0x10, 0x73, 0x24, 0x00, 0x7b,
+ 0x13, 0x74, 0x84, 0x00, 0x63, 0x12, 0x04, 0x04, 0x73, 0x24, 0x20, 0x7b,
+ 0x73, 0x00, 0x20, 0x7b, 0x73, 0x10, 0x24, 0x7b, 0x73, 0x24, 0x00, 0x7b,
+ 0x13, 0x74, 0x04, 0x1c, 0x13, 0x04, 0x04, 0xf4, 0x63, 0x1e, 0x04, 0x00,
+ 0x73, 0x24, 0x00, 0xf1, 0x63, 0x46, 0x04, 0x00, 0x23, 0x2e, 0x90, 0x42,
+ 0x67, 0x00, 0x00, 0x40, 0x23, 0x3c, 0x90, 0x42, 0x67, 0x00, 0x00, 0x40,
+ 0x73, 0x24, 0x40, 0xf1, 0x23, 0x26, 0x80, 0x10, 0x73, 0x60, 0x04, 0x7b,
+ 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x04, 0x02, 0xe3, 0x0c, 0x04, 0xfe,
+ 0x6f, 0xf0, 0x1f, 0xfd).map(_.toByte)
+
+ // These ROM contents support only RV32
+ // See $RISCV/riscv-tools/riscv-isa-sim/debug_rom/debug_rom.h/S
+ // The code assumes only 28 bytes of Debug RAM.
+
+ def xlen32OnlyRomContents : Array[Byte] = Array(
+ 0x6f, 0x00, 0xc0, 0x03, 0x6f, 0x00, 0xc0, 0x00, 0x13, 0x04, 0xf0, 0xff,
+ 0x6f, 0x00, 0x80, 0x00, 0x13, 0x04, 0x00, 0x00, 0x0f, 0x00, 0xf0, 0x0f,
+ 0x83, 0x24, 0x80, 0x41, 0x23, 0x2c, 0x80, 0x40, 0x73, 0x24, 0x40, 0xf1,
+ 0x23, 0x20, 0x80, 0x10, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x84, 0x00,
+ 0x63, 0x1a, 0x04, 0x02, 0x73, 0x24, 0x20, 0x7b, 0x73, 0x00, 0x20, 0x7b,
+ 0x73, 0x10, 0x24, 0x7b, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x04, 0x1c,
+ 0x13, 0x04, 0x04, 0xf4, 0x63, 0x16, 0x04, 0x00, 0x23, 0x2c, 0x90, 0x40,
+ 0x67, 0x00, 0x00, 0x40, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x26, 0x80, 0x10,
+ 0x73, 0x60, 0x04, 0x7b, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x04, 0x02,
+ 0xe3, 0x0c, 0x04, 0xfe, 0x6f, 0xf0, 0x1f, 0xfe).map(_.toByte)
+
+ // These ROM contents support only RV64
+ // See $RISCV/riscv-tools/riscv-isa-sim/debug_rom/debug_rom.h/S
+ // The code assumes 64 bytes of Debug RAM.
+
+ def xlen64OnlyRomContents : Array[Byte] = Array(
+ 0x6f, 0x00, 0xc0, 0x03, 0x6f, 0x00, 0xc0, 0x00, 0x13, 0x04, 0xf0, 0xff,
+ 0x6f, 0x00, 0x80, 0x00, 0x13, 0x04, 0x00, 0x00, 0x0f, 0x00, 0xf0, 0x0f,
+ 0x83, 0x34, 0x80, 0x43, 0x23, 0x2e, 0x80, 0x42, 0x73, 0x24, 0x40, 0xf1,
+ 0x23, 0x20, 0x80, 0x10, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x84, 0x00,
+ 0x63, 0x1a, 0x04, 0x02, 0x73, 0x24, 0x20, 0x7b, 0x73, 0x00, 0x20, 0x7b,
+ 0x73, 0x10, 0x24, 0x7b, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x04, 0x1c,
+ 0x13, 0x04, 0x04, 0xf4, 0x63, 0x16, 0x04, 0x00, 0x23, 0x3c, 0x90, 0x42,
+ 0x67, 0x00, 0x00, 0x40, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x26, 0x80, 0x10,
+ 0x73, 0x60, 0x04, 0x7b, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x04, 0x02,
+ 0xe3, 0x0c, 0x04, 0xfe, 0x6f, 0xf0, 0x1f, 0xfe).map(_.toByte)
+}
+
+
+
+object DsbRegAddrs{
+
+ def CLEARDEBINT = UInt(0x100)
+ def SETHALTNOT = UInt(0x10C)
+ def SERINFO = UInt(0x110)
+ def SERBASE = UInt(0x114)
+ // For each serial, there are
+ // 3 registers starting here:
+ // SERSEND0
+ // SERRECEIVE0
+ // SERSTATUS0
+ // ...
+ // SERSTATUS7
+ def SERTX_OFFSET = UInt(0)
+ def SERRX_OFFSET = UInt(4)
+ def SERSTAT_OFFSET = UInt(8)
+
+}
+
+
+// *****************************************
+// Configuration & Parameters for this Module
+//
+// *****************************************
+
+/** Enumerations used both in the hardware
+ * and in the configuration specification.
+ */
+
+object DebugModuleAuthType extends scala.Enumeration {
+ type DebugModuleAuthType = Value
+ val None, Password, ChallengeResponse, Reserved = Value
+}
+import DebugModuleAuthType._
+
+object DebugModuleAccessType extends scala.Enumeration {
+ type DebugModuleAccessType = Value
+ val Access8Bit, Access16Bit, Access32Bit, Access64Bit, Access128Bit = Value
+}
+import DebugModuleAccessType._
+
+
+/** Parameters exposed to the top-level design, set based on
+ * external requirements, etc.
+ *
+ * This object checks that the parameters conform to the
+ * full specification. The implementation which receives this
+ * object can perform more checks on what that implementation
+ * actually supports.
+ * nComponents : The number of components to support debugging.
+ * nDebugBusAddrSize : Size of the Debug Bus Address
+ * nDebugRam Bytes: Size of the Debug RAM (depends on the XLEN of the machine).
+ * debugRomContents: Optional Sequence of bytes which form the Debug ROM contents.
+ * hasBusMaster: Whether or not a bus master should be included
+ * The size of the accesses supported by the Bus Master.
+ * nSerialPorts : Number of serial ports to instantiate
+ * authType : The Authorization Type
+ * Number of cycles to assert ndreset when pulsed.
+ **/
+
+
+case class DebugModuleConfig (
+ nComponents : Int,
+ nDebugBusAddrSize : Int,
+ nDebugRamBytes : Int,
+ debugRomContents : Option[Seq[Byte]],
+ hasBusMaster : Boolean,
+ hasAccess128 : Boolean,
+ hasAccess64 : Boolean,
+ hasAccess32 : Boolean,
+ hasAccess16 : Boolean,
+ hasAccess8 : Boolean,
+ nSerialPorts : Int,
+ authType : DebugModuleAuthType,
+ nNDResetCycles : Int
+) {
+
+ if (hasBusMaster == false){
+ require (hasAccess128 == false)
+ require (hasAccess64 == false)
+ require (hasAccess32 == false)
+ require (hasAccess16 == false)
+ require (hasAccess8 == false)
+ }
+
+ require (nSerialPorts <= 8)
+
+ require ((nDebugBusAddrSize >= 5) && (nDebugBusAddrSize <= 7))
+
+ private val maxComponents = nDebugBusAddrSize match {
+ case 5 => (32*4)
+ case 6 => (32*32)
+ case 7 => (32*32)
+ }
+ require (nComponents > 0 && nComponents <= maxComponents)
+
+ private val maxRam = nDebugBusAddrSize match {
+ case 5 => (4 * 16)
+ case 6 => (4 * 16)
+ case 7 => (4 * 64)
+ }
+
+ require (nDebugRamBytes > 0 && nDebugRamBytes <= maxRam)
+
+ val hasHaltSum = (nComponents > 64) || (nSerialPorts > 0)
+
+ val hasDebugRom = debugRomContents match{
+ case Some(_) => true
+ case None => false
+ }
+
+ if (hasDebugRom) {
+ require (debugRomContents.size > 0)
+ require (debugRomContents.size <= 512)
+ }
+
+ require (nNDResetCycles > 0)
+
+}
+
+class DefaultDebugModuleConfig (val ncomponents : Int, val xlen:Int)
+ extends DebugModuleConfig(
+ nComponents = ncomponents,
+ nDebugBusAddrSize = 5,
+ // While smaller numbers are theoretically
+ // possible as noted in the Spec,
+ // the ROM image would need to be
+ // adjusted accordingly.
+ nDebugRamBytes = xlen match{
+ case 32 => 28
+ case 64 => 64
+ case 128 => 64
+ },
+ debugRomContents = xlen match {
+ case 32 => Some(DsbBusConsts.xlen32OnlyRomContents)
+ case 64 => Some(DsbBusConsts.xlen64OnlyRomContents)
+ },
+ hasBusMaster = false,
+ hasAccess128 = false,
+ hasAccess64 = false,
+ hasAccess32 = false,
+ hasAccess16 = false,
+ hasAccess8 = false,
+ nSerialPorts = 0,
+ authType = DebugModuleAuthType.None,
+ nNDResetCycles = 1)
+
+case object DMKey extends Field[DebugModuleConfig]
+
+
+// *****************************************
+// Module Interfaces
+//
+// *****************************************
+
+
+/** Structure to define the contents of a Debug Bus Request
+ */
+
+class DebugBusReq(addrBits : Int) extends Bundle {
+ val addr = UInt(width = addrBits)
+ val op = UInt(width = DbBusConsts.dbOpSize)
+ val data = UInt(width = DbBusConsts.dbDataSize)
+
+ override def cloneType = new DebugBusReq(addrBits).asInstanceOf[this.type]
+}
+
+
+/** Structure to define the contents of a Debug Bus Response
+ */
+class DebugBusResp( ) extends Bundle {
+ val resp = UInt(width = DbBusConsts.dbRespSize)
+ val data = UInt(width = DbBusConsts.dbDataSize)
+}
+
+/** Structure to define the top-level DebugBus interface
+ * of DebugModule.
+ * DebugModule is the consumer of this interface.
+ * Therefore it has the 'flipped' version of this.
+ */
+
+class DebugBusIO(implicit val p: cde.Parameters) extends ParameterizedBundle()(p) {
+ val req = new DecoupledIO(new DebugBusReq(p(DMKey).nDebugBusAddrSize))
+ val resp = new DecoupledIO(new DebugBusResp).flip()
+}
+
+// *****************************************
+// The Module
+//
+// *****************************************
+
+/** Parameterized version of the Debug Module defined in the
+ * RISC-V Debug Specification
+ *
+ * DebugModule is a slave to two masters:
+ * The Debug Bus -- implemented as a generic Decoupled IO with request
+ * and response channels
+ * The System Bus -- implemented as Uncached Tile Link.
+ *
+ * DebugModule is responsible for holding registers, RAM, and ROM
+ * to support debug interactions, as well as driving interrupts
+ * to a configurable number of components in the system.
+ * It is also responsible for some reset lines.
+ */
+
+class DebugModule ()(implicit val p:cde.Parameters)
+ extends Module
+ with HasTileLinkParameters {
+ val cfg = p(DMKey)
+
+ //--------------------------------------------------------------
+ // Import constants for shorter variable names
+ //--------------------------------------------------------------
+
+ import DbRegAddrs._
+ import DsbRegAddrs._
+ import DsbBusConsts._
+ import DbBusConsts._
+
+ //--------------------------------------------------------------
+ // Sanity Check Configuration For this implementation.
+ //--------------------------------------------------------------
+
+ require (cfg.nComponents <= 128)
+ require (cfg.nSerialPorts == 0)
+ require (cfg.hasBusMaster == false)
+ require (cfg.nDebugRamBytes <= 64)
+ require (cfg.authType == DebugModuleAuthType.None)
+
+ //--------------------------------------------------------------
+ // Private Classes (Register Fields)
+ //--------------------------------------------------------------
+
+ class RAMFields() extends Bundle {
+ val interrupt = Bool()
+ val haltnot = Bool()
+ val data = Bits(width = 32)
+
+ override def cloneType = new RAMFields().asInstanceOf[this.type]
+ }
+
+ class CONTROLFields() extends Bundle {
+ val interrupt = Bool()
+ val haltnot = Bool()
+ val reserved0 = Bits(width = 31-22 + 1)
+ val buserror = Bits(width = 3)
+ val serial = Bits(width = 3)
+ val autoincrement = Bool()
+ val access = UInt(width = 3)
+ val hartid = Bits(width = 10)
+ val ndreset = Bool()
+ val fullreset = Bool()
+
+ override def cloneType = new CONTROLFields().asInstanceOf[this.type]
+
+ }
+
+ class DMINFOFields() extends Bundle {
+ val reserved0 = Bits(width = 2)
+ val abussize = UInt(width = 7)
+ val serialcount = UInt(width = 4)
+ val access128 = Bool()
+ val access64 = Bool()
+ val access32 = Bool()
+ val access16 = Bool()
+ val accesss8 = Bool()
+ val dramsize = UInt(width = 6)
+ val haltsum = Bool()
+ val reserved1 = Bits(width = 3)
+ val authenticated = Bool()
+ val authbusy = Bool()
+ val authtype = UInt(width = 2)
+ val version = UInt(width = 2)
+
+ override def cloneType = new DMINFOFields().asInstanceOf[this.type]
+
+ }
+
+ class HALTSUMFields() extends Bundle {
+ val serialfull = Bool()
+ val serialvalid = Bool()
+ val acks = Bits(width = 32)
+
+ override def cloneType = new HALTSUMFields().asInstanceOf[this.type]
+
+ }
+
+ //--------------------------------------------------------------
+ // Module I/O
+ //--------------------------------------------------------------
+
+ val io = new Bundle {
+ val db = new DebugBusIO()(p).flip()
+ val debugInterrupts = Vec(cfg.nComponents, Bool()).asOutput
+ val tl = new ClientUncachedTileLinkIO().flip
+ val ndreset = Bool(OUTPUT)
+ val fullreset = Bool(OUTPUT)
+ }
+
+ //--------------------------------------------------------------
+ // Register & Wire Declarations
+ //--------------------------------------------------------------
+
+ // --- Debug Bus Registers
+ val CONTROLReset = Wire(new CONTROLFields())
+ val CONTROLWrEn = Wire(Bool())
+ val CONTROLReg = Reg(new CONTROLFields())
+ val CONTROLWrData = Wire (new CONTROLFields())
+ val CONTROLRdData = Wire (new CONTROLFields())
+ val ndresetCtrReg = Reg(UInt(cfg.nNDResetCycles))
+
+ val DMINFORdData = Wire (new DMINFOFields())
+
+ val HALTSUMRdData = Wire (new HALTSUMFields())
+
+ val RAMWrData = Wire (new RAMFields())
+ val RAMRdData = Wire (new RAMFields())
+
+ // --- System Bus Registers
+
+ val SETHALTNOTWrEn = Wire(Bool())
+ val SETHALTNOTWrData = Wire(UInt(width = sbIdWidth))
+ val CLEARDEBINTWrEn = Wire(Bool())
+ val CLEARDEBINTWrData = Wire(UInt(width = sbIdWidth))
+
+ // --- Interrupt & Halt Notification Registers
+
+ val interruptRegs = Reg(init=Vec.fill(cfg.nComponents){Bool(false)})
+
+ val haltnotRegs = Reg(init=Vec.fill(cfg.nComponents){Bool(false)})
+ val numHaltnotStatus = ((cfg.nComponents - 1) / 32) + 1
+
+ val haltnotStatus = Wire(Vec(numHaltnotStatus, Bits(width = 32)))
+ val rdHaltnotStatus = Wire(Bits(width = 32))
+
+ val haltnotSummary = Cat(haltnotStatus.map(_.orR).reverse)
+
+ // --- Debug RAM
+
+ // Since the access size from Debug Bus and System Bus may not be consistent,
+ // use the maximum to build the RAM, and then select as needed for the smaller
+ // size.
+
+ val dbRamDataWidth = DbBusConsts.dbRamWordBits
+ val sbRamDataWidth = tlDataBits
+ val dbRamAddrWidth = log2Up((cfg.nDebugRamBytes * 8) / dbRamDataWidth)
+ val sbRamAddrWidth = log2Up((cfg.nDebugRamBytes * 8) / sbRamDataWidth)
+ val sbRamAddrOffset = log2Up(tlDataBits/8)
+
+ val ramDataWidth = dbRamDataWidth max sbRamDataWidth
+ val ramAddrWidth = dbRamAddrWidth min sbRamAddrWidth
+ val ramMem = Mem(1 << ramAddrWidth , UInt(width=ramDataWidth))
+ val ramAddr = Wire(UInt(width=ramAddrWidth))
+ val ramRdData = Wire(UInt(width=ramDataWidth))
+ val ramWrData = Wire(UInt(width=ramDataWidth))
+ val ramWrMask = Wire(UInt(width=ramDataWidth))
+ val ramWrEn = Wire(Bool())
+
+ val dbRamAddr = Wire(UInt(width=dbRamAddrWidth))
+ val dbRamRdData = Wire (UInt(width=dbRamDataWidth))
+ val dbRamWrData = Wire(UInt(width=dbRamDataWidth))
+ val dbRamWrEn = Wire(Bool())
+ val dbRamRdEn = Wire(Bool())
+
+ val sbRamAddr = Wire(UInt(width=sbRamAddrWidth))
+ val sbRamRdData = Wire (UInt(width=sbRamDataWidth))
+ val sbRamWrData = Wire(UInt(width=sbRamDataWidth))
+ val sbRamWrEn = Wire(Bool())
+ val sbRamRdEn = Wire(Bool())
+
+ val sbRomRdData = Wire(UInt(width=tlDataBits))
+ val sbRomAddrOffset = log2Up(tlDataBits/8)
+
+ // --- Debug Bus Accesses
+
+ val dbRdEn = Wire(Bool())
+ val dbWrEn = Wire(Bool())
+ val dbRdData = Wire(UInt(width = DbBusConsts.dbDataSize))
+
+ val s_DB_READY :: s_DB_RESP :: Nil = Enum(Bits(), 2)
+ val dbStateReg = Reg(init = s_DB_READY)
+
+ val dbResult = Wire(io.db.resp.bits)
+
+ val dbReq = Wire(io.db.req.bits)
+ val dbRespReg = Reg(io.db.resp.bits)
+
+ val rdCondWrFailure = Wire(Bool())
+ val dbWrNeeded = Wire(Bool())
+
+ // --- System Bus Access
+ val sbAddr = Wire(UInt(width=sbAddrWidth))
+ val sbRdData = Wire(UInt(width=tlDataBits))
+ val sbWrData = Wire(UInt(width=tlDataBits))
+ val sbWrMask = Wire(UInt(width=tlDataBits))
+ val sbWrEn = Wire(Bool())
+ val sbRdEn = Wire(Bool())
+
+ val stallFromDb = Wire(Bool())
+ val stallFromSb = Wire(Bool())
+ //--------------------------------------------------------------
+ // Interrupt Registers
+ //--------------------------------------------------------------
+
+ for (component <- 0 until cfg.nComponents) {
+ io.debugInterrupts(component) := interruptRegs(component)
+ }
+
+ // Interrupt Registers are written by write to CONTROL or debugRAM addresses
+ // for Debug Bus, and cleared by writes to CLEARDEBINT by System Bus.
+ // It is "unspecified" what should happen if both
+ // SET and CLEAR happen at the same time. In this
+ // implementation, the SET wins.
+
+ for (component <- 0 until cfg.nComponents) {
+ when (CONTROLWrEn) {
+ when (CONTROLWrData.hartid === UInt(component)) {
+ interruptRegs(component) := interruptRegs(component) | CONTROLWrData.interrupt
+ }
+ }.elsewhen (dbRamWrEn) {
+ when (CONTROLReg.hartid === UInt(component)){
+ interruptRegs(component) := interruptRegs(component) | RAMWrData.interrupt
+ }
+ }.elsewhen (CLEARDEBINTWrEn){
+ when (CLEARDEBINTWrData === UInt(component, width = sbIdWidth)) {
+ interruptRegs(component) := Bool(false)
+ }
+ }
+ }
+
+ //--------------------------------------------------------------
+ // Halt Notification Registers
+ //--------------------------------------------------------------
+
+ // Halt Notifications Registers are cleared by zero write to CONTROL or debugRAM addresses
+ // for Debug Bus, and set by write to SETHALTNOT by System Bus.
+ // It is "unspecified" what should happen if both
+ // SET and CLEAR happen at the same time. In this
+ // implementation, the SET wins.
+
+ for (component <- 0 until cfg.nComponents) {
+ when (SETHALTNOTWrEn){
+ when (SETHALTNOTWrData === UInt(component, width = sbIdWidth)) {
+ haltnotRegs(component) := Bool(true)
+ }
+ } .elsewhen (CONTROLWrEn) {
+ when (CONTROLWrData.hartid === UInt(component)) {
+ haltnotRegs(component) := haltnotRegs(component) & CONTROLWrData.haltnot
+ }
+ }.elsewhen (dbRamWrEn) {
+ when (CONTROLReg.hartid === UInt(component)){
+ haltnotRegs(component) := haltnotRegs(component) & RAMWrData.haltnot
+ }
+ }
+ }
+
+ for (ii <- 0 until numHaltnotStatus) {
+ haltnotStatus(ii) := Cat(haltnotRegs.slice(ii * 32, (ii + 1) * 32).reverse)
+ }
+
+ //--------------------------------------------------------------
+ // Other Registers
+ //--------------------------------------------------------------
+
+ CONTROLReset.interrupt := Bool(false)
+ CONTROLReset.haltnot := Bool(false)
+ CONTROLReset.reserved0 := Bits(0)
+ CONTROLReset.buserror := Bits(0)
+ CONTROLReset.serial := Bits(0)
+ CONTROLReset.autoincrement := Bool(false)
+ CONTROLReset.access := UInt(DebugModuleAccessType.Access32Bit.id)
+ CONTROLReset.hartid := Bits(0)
+ CONTROLReset.ndreset := Bool(false)
+ CONTROLReset.fullreset := Bool(false)
+
+ // Because this version of DebugModule doesn't
+ // support authentication, this entire register is
+ // Read-Only constant wires.
+ DMINFORdData.reserved0 := Bits(0)
+ DMINFORdData.abussize := UInt(0) // Not Implemented.
+ DMINFORdData.serialcount := UInt(cfg.nSerialPorts)
+ DMINFORdData.access128 := Bool(cfg.hasAccess128)
+ DMINFORdData.access64 := Bool(cfg.hasAccess64)
+ DMINFORdData.access32 := Bool(cfg.hasAccess32)
+ DMINFORdData.access16 := Bool(cfg.hasAccess16)
+ DMINFORdData.accesss8 := Bool(cfg.hasAccess8)
+ DMINFORdData.dramsize := Bits((cfg.nDebugRamBytes >> 2) - 1) // Size in 32-bit words minus 1.
+ DMINFORdData.haltsum := Bool(cfg.hasHaltSum)
+ DMINFORdData.reserved1 := Bits(0)
+ DMINFORdData.authenticated := Bool(true) // Not Implemented.
+ DMINFORdData.authbusy := Bool(false) // Not Implemented.
+ DMINFORdData.authtype := UInt(cfg.authType.id)
+ DMINFORdData.version := UInt(1) // Conforms to RISC-V Debug Spec
+
+ HALTSUMRdData.serialfull := Bool(false) // Not Implemented
+ HALTSUMRdData.serialvalid := Bool(false) // Not Implemented
+ HALTSUMRdData.acks := haltnotSummary
+
+ //--------------------------------------------------------------
+ // Debug RAM Access (Debug Bus & System Bus)
+ //--------------------------------------------------------------
+
+ dbReq := io.db.req.bits
+ // Debug Bus RAM Access
+ // From Specification: Debug RAM is 0x00 - 0x0F
+ // 0x40 - 0x6F Not Implemented
+ dbRamAddr := dbReq.addr( dbRamAddrWidth-1 , 0)
+ dbRamWrData := dbReq.data
+ sbRamAddr := sbAddr(sbRamAddrWidth + sbRamAddrOffset - 1, sbRamAddrOffset)
+ sbRamWrData := sbWrData
+
+ require (dbRamAddrWidth >= ramAddrWidth) // SB accesses less than 32 bits Not Implemented.
+ val dbRamWrMask = Wire(init=Vec.fill(1 << (dbRamAddrWidth - ramAddrWidth)){Fill(dbRamDataWidth, UInt(1, width=1))})
+
+ if (dbRamDataWidth < ramDataWidth){
+
+ val dbRamSel = dbRamAddr(dbRamAddrWidth - ramAddrWidth - 1 , 0)
+ val rdDataWords = Vec.tabulate(1 << (dbRamAddrWidth - ramAddrWidth)){ ii =>
+ ramRdData((ii+1)*dbRamDataWidth - 1 , ii*dbRamDataWidth)}
+
+ dbRamWrMask := Vec.fill(1 << (dbRamAddrWidth - ramAddrWidth)){UInt(0, width = dbRamDataWidth)}
+ dbRamWrMask(dbRamSel) := Fill(dbRamDataWidth, UInt(1, width=1))
+ dbRamRdData := rdDataWords(dbRamSel)
+ } else {
+ dbRamRdData := ramRdData
+ }
+
+ sbRamRdData := ramRdData
+
+ ramWrMask := Mux(sbRamWrEn, sbWrMask, dbRamWrMask.toBits())
+
+ assert (!((dbRamWrEn | dbRamRdEn) & (sbRamRdEn | sbRamWrEn)), "Stall logic should have prevented concurrent SB/DB RAM Access")
+
+ // Make copies of DB RAM data before writing.
+ val dbRamWrDataVec = Fill(1 << (dbRamAddrWidth - ramAddrWidth), dbRamWrData)
+ ramWrData := Mux(sbRamWrEn,
+ (ramWrMask & sbRamWrData ) | (~ramWrMask & ramRdData),
+ (ramWrMask & dbRamWrDataVec.toBits) | (~ramWrMask & ramRdData))
+
+ ramAddr := Mux(sbRamWrEn | sbRamRdEn, sbRamAddr,
+ dbRamAddr >> (dbRamAddrWidth - ramAddrWidth))
+
+ ramRdData := ramMem(ramAddr)
+ when (ramWrEn) { ramMem(ramAddr) := ramWrData }
+
+ ramWrEn := sbRamWrEn | dbRamWrEn
+
+ //--------------------------------------------------------------
+ // Debug Bus Access
+ //--------------------------------------------------------------
+
+ // 0x00 - 0x0F Debug RAM
+ // 0x10 - 0x1B Registers
+ // 0x1C - 0x3B Halt Notification Registers
+ // 0x3C - 0x3F Registers
+ // 0x40 - 0x6F Debug RAM
+
+
+ // -----------------------------------------
+ // DB Access Write Decoder
+
+ CONTROLWrData := new CONTROLFields().fromBits(dbReq.data)
+ RAMWrData := new RAMFields().fromBits(dbReq.data)
+
+ dbRamWrEn := Bool(false)
+ CONTROLWrEn := Bool(false)
+ when ((dbReq.addr >> 4) === Bits(0)) { // 0x00 - 0x0F Debug RAM
+ dbRamWrEn := dbWrEn
+ }.elsewhen (dbReq.addr === DMCONTROL) {
+ CONTROLWrEn := dbWrEn
+ }.otherwise {
+ //Other registers/RAM are Not Implemented.
+ }
+
+ when (reset) {
+ CONTROLReg := CONTROLReset
+ ndresetCtrReg := UInt(0)
+ }.elsewhen (CONTROLWrEn) {
+ // interrupt handled in other logic
+ // haltnot handled in other logic
+ if (cfg.hasBusMaster){
+ // buserror is set 'until 0 is written to any bit in this field'.
+ CONTROLReg.buserror := Mux((CONTROLWrData.buserror === SInt(-1).toBits), CONTROLReg.buserror, UInt(0))
+ CONTROLReg.autoincrement := CONTROLWrData.autoincrement
+ CONTROLReg.access := CONTROLWrData.access
+ }
+ if (cfg.nSerialPorts > 0){
+ CONTROLReg.serial := CONTROLWrData.serial
+ }
+ CONTROLReg.hartid := CONTROLWrData.hartid
+ CONTROLReg.fullreset := CONTROLReg.fullreset | CONTROLWrData.fullreset
+ when (CONTROLWrData.ndreset){
+ ndresetCtrReg := UInt(cfg.nNDResetCycles)
+ }.otherwise {
+ ndresetCtrReg := Mux(ndresetCtrReg === UInt(0) , UInt(0), ndresetCtrReg - UInt(1))
+ }
+ }.otherwise {
+ ndresetCtrReg := Mux(ndresetCtrReg === UInt(0) , UInt(0), ndresetCtrReg - UInt(1))
+ }
+
+ // -----------------------------------------
+ // DB Access Read Mux
+
+ CONTROLRdData := CONTROLReg;
+ CONTROLRdData.interrupt := interruptRegs(CONTROLReg.hartid)
+ CONTROLRdData.haltnot := haltnotRegs(CONTROLReg.hartid)
+ CONTROLRdData.ndreset := ndresetCtrReg.orR
+
+ RAMRdData.interrupt := interruptRegs(CONTROLReg.hartid)
+ RAMRdData.haltnot := haltnotRegs(CONTROLReg.hartid)
+ RAMRdData.data := dbRamRdData
+
+ dbRdData := UInt(0)
+
+ // Higher numbers of numHaltnotStatus Not Implemented.
+ // This logic assumes only up to 128 components.
+ rdHaltnotStatus := Bits(0)
+ for (ii <- 0 until numHaltnotStatus) {
+ when (dbReq.addr === UInt(ii)) {
+ rdHaltnotStatus := haltnotStatus(ii)
+ }
+ }
+
+ dbRamRdEn := Bool(false)
+ when ((dbReq.addr >> 4) === Bits(0)) { // 0x00 - 0x0F Debug RAM
+ dbRdData := RAMRdData.toBits()
+ dbRamRdEn := dbRdEn
+ }.elsewhen (dbReq.addr === DMCONTROL) {
+ dbRdData := CONTROLRdData.toBits()
+ }.elsewhen (dbReq.addr === DMINFO) {
+ dbRdData := DMINFORdData.toBits()
+ }.elsewhen (dbReq.addr === HALTSUM) {
+ if (cfg.hasHaltSum){
+ dbRdData := HALTSUMRdData.toBits()
+ } else {
+ dbRdData := UInt(0)
+ }
+ }.elsewhen ((dbReq.addr >> 2) === UInt(7)) { // 0x1C - 0x1F Haltnot
+ dbRdData := rdHaltnotStatus
+ } .otherwise {
+ //These Registers are not implemented in this version of DebugModule:
+ // AUTHDATA0
+ // AUTHDATA1
+ // SERDATA
+ // SERSTATUS
+ // SBUSADDRESS0
+ // SBUSADDRESS1
+ // SBDATA0
+ // SBDATA1
+ // SBADDRESS2
+ // SBDATA2
+ // SBDATA3
+ // 0x20 - 0x3B haltnot
+ // Upper bytes of Debug RAM.
+ dbRdData := UInt(0)
+ }
+
+ // Conditional write fails if MSB is set of the read data.
+ rdCondWrFailure := dbRdData(dbDataSize - 1 ) &&
+ (dbReq.op === db_OP_READ_COND_WRITE)
+
+ dbWrNeeded := (dbReq.op === db_OP_READ_WRITE) ||
+ ((dbReq.op === db_OP_READ_COND_WRITE) && ~rdCondWrFailure)
+
+ // This is only relevant at end of s_DB_READ.
+ dbResult.resp := Mux(rdCondWrFailure,
+ db_RESP_FAILURE,
+ db_RESP_SUCCESS)
+ dbResult.data := dbRdData
+
+ // -----------------------------------------
+ // DB Access State Machine Decode (Combo)
+ io.db.req.ready := !stallFromSb && ((dbStateReg === s_DB_READY) ||
+ (dbStateReg === s_DB_RESP && io.db.resp.fire()))
+
+ io.db.resp.valid := (dbStateReg === s_DB_RESP)
+ io.db.resp.bits := dbRespReg
+
+ dbRdEn := io.db.req.fire()
+ dbWrEn := dbWrNeeded && io.db.req.fire()
+
+ // -----------------------------------------
+ // DB Access State Machine Update (Seq)
+
+ when (dbStateReg === s_DB_READY){
+ when (io.db.req.fire()){
+ dbStateReg := s_DB_RESP
+ dbRespReg := dbResult
+ }
+ } .elsewhen (dbStateReg === s_DB_RESP){
+ when (io.db.req.fire()){
+ dbStateReg := s_DB_RESP
+ dbRespReg := dbResult
+ }.elsewhen (io.db.resp.fire()){
+ dbStateReg := s_DB_READY
+ }
+ }
+
+
+ //--------------------------------------------------------------
+ // Debug ROM
+ //--------------------------------------------------------------
+
+ sbRomRdData := UInt(0)
+ if (cfg.hasDebugRom) {
+ // Inspired by ROMSlave
+ val romContents = cfg.debugRomContents.get
+ val romByteWidth = tlDataBits / 8
+ val romRows = (romContents.size + romByteWidth - 1)/romByteWidth
+ val romMem = Vec.tabulate(romRows) { ii =>
+ val slice = romContents.slice(ii*romByteWidth, (ii+1)*romByteWidth)
+ UInt(slice.foldRight(BigInt(0)) { case (x,y) => ((y << 8) + (x.toInt & 0xFF))}, width = romByteWidth*8)
+ }
+
+ val sbRomRdAddr = Wire(UInt())
+
+ if (romRows == 1) {
+ sbRomRdAddr := UInt(0)
+ } else {
+ sbRomRdAddr := sbAddr(log2Up(romRows) + sbRomAddrOffset - 1, sbRomAddrOffset)
+ }
+ sbRomRdData := romMem (sbRomRdAddr)
+ }
+
+ //--------------------------------------------------------------
+ // System Bus Access
+ //--------------------------------------------------------------
+
+
+ // -----------------------------------------
+ // SB Access Write Decoder
+
+ sbRamWrEn := Bool(false)
+ SETHALTNOTWrEn := Bool(false)
+ CLEARDEBINTWrEn := Bool(false)
+
+ if (tlDataBits == 32) {
+ SETHALTNOTWrData := sbWrData
+ CLEARDEBINTWrData := sbWrData
+ when (sbAddr(11, 8) === UInt(4)){ // 0x400-0x4ff is Debug RAM
+ sbRamWrEn := sbWrEn
+ sbRamRdEn := sbRdEn
+ }.elsewhen (sbAddr === SETHALTNOT){
+ SETHALTNOTWrEn := sbWrEn
+ }.elsewhen (sbAddr === CLEARDEBINT){
+ CLEARDEBINTWrEn := sbWrEn
+ }.otherwise {
+ //Other registers/RAM are Not Implemented.
+ }
+ } else {
+
+ // Pick out the correct word based on the address.
+ val sbWrDataWords = Vec.tabulate (tlDataBits / 32) {ii => sbWrData((ii+1)*32 - 1, ii*32)}
+ val sbWrMaskWords = Vec.tabulate (tlDataBits / 32) {ii => sbWrMask ((ii+1)*32 -1, ii*32)}
+
+ val sbWrSelTop = log2Up(tlDataBits/8) - 1
+ val sbWrSelBottom = 2
+
+ SETHALTNOTWrData := sbWrDataWords(SETHALTNOT(sbWrSelTop, sbWrSelBottom))
+ CLEARDEBINTWrData := sbWrDataWords(CLEARDEBINT(sbWrSelTop, sbWrSelBottom))
+
+ when (sbAddr(11,8) === UInt(4)){ //0x400-0x4ff is Debug RAM
+ sbRamWrEn := sbWrEn
+ sbRamRdEn := sbRdEn
+ }
+
+ SETHALTNOTWrEn := sbAddr(sbAddrWidth - 1, sbWrSelTop + 1) === SETHALTNOT(sbAddrWidth-1, sbWrSelTop + 1) &&
+ (sbWrMaskWords(SETHALTNOT(sbWrSelTop, sbWrSelBottom))).orR &&
+ sbWrEn
+
+ CLEARDEBINTWrEn := sbAddr(sbAddrWidth - 1, sbWrSelTop + 1) === CLEARDEBINT(sbAddrWidth-1, sbWrSelTop + 1) &&
+ (sbWrMaskWords(CLEARDEBINT(sbWrSelTop, sbWrSelBottom))).orR &&
+ sbWrEn
+
+ }
+
+ // -----------------------------------------
+ // SB Access Read Mux
+
+ sbRdData := UInt(0)
+ sbRamRdEn := Bool(false)
+
+ dbRamRdEn := Bool(false)
+ when (sbAddr(11, 8) === UInt(4)) { //0x400-0x4FF Debug RAM
+ sbRdData := sbRamRdData
+ sbRamRdEn := sbRdEn
+ }.elsewhen (sbAddr(11,8) === UInt(8) || sbAddr(11,8) === UInt(9)){ //0x800-0x9FF Debug ROM
+ if (cfg.hasDebugRom) {
+ sbRdData := sbRomRdData
+ } else {
+ sbRdData := UInt(0)
+ }
+ }. otherwise {
+ // All readable registers are Not Implemented.
+ sbRdData := UInt(0)
+ }
+
+ // -----------------------------------------
+ // SB Access State Machine -- based on BRAM Slave
+
+ val sbAcqReg = Reg(io.tl.acquire.bits)
+ val sbAcqValidReg = Reg(init = Bool(false))
+
+ val (sbReg_get :: sbReg_getblk :: sbReg_put :: sbReg_putblk :: Nil) = Seq(
+ Acquire.getType, Acquire.getBlockType, Acquire.putType, Acquire.putBlockType
+ ).map(sbAcqReg.isBuiltInType _)
+
+ val sbMultibeat = sbReg_getblk & sbAcqValidReg;
+
+ val sbBeatInc1 = sbAcqReg.addr_beat + UInt(1)
+
+ val sbLast = (sbAcqReg.addr_beat === UInt(tlDataBeats - 1))
+
+ sbAddr := sbAcqReg.full_addr()
+ sbRdEn := (sbAcqValidReg && (sbReg_get || sbReg_getblk))
+ sbWrEn := (sbAcqValidReg && (sbReg_put || sbReg_putblk))
+ sbWrData := sbAcqReg.data
+ sbWrMask := sbAcqReg.full_wmask()
+
+ // -----------------------------------------
+ // SB Access State Machine Update (Seq)
+
+ when (io.tl.acquire.fire()){
+ sbAcqReg := io.tl.acquire.bits
+ sbAcqValidReg := Bool(true)
+ } .elsewhen (io.tl.grant.fire()) {
+ when (sbMultibeat){
+ sbAcqReg.addr_beat := sbBeatInc1
+ when (sbLast) {
+ sbAcqValidReg := Bool(false)
+ }
+ } . otherwise {
+ sbAcqValidReg := Bool(false)
+ }
+ }
+
+
+ io.tl.grant.valid := sbAcqValidReg
+ io.tl.grant.bits := Grant(
+ is_builtin_type = Bool(true),
+ g_type = sbAcqReg.getBuiltInGrantType(),
+ client_xact_id = sbAcqReg.client_xact_id,
+ manager_xact_id = UInt(0),
+ addr_beat = sbAcqReg.addr_beat,
+ data = sbRdData
+ )
+
+ stallFromDb := Bool(false) // SB always wins, and DB latches its read data so it is not necessary for SB to wait
+
+ stallFromSb := sbRamRdEn || sbRamWrEn // pessimistically assume that DB/SB are going to conflict on the RAM,
+ // and SB doesn't latch its read data to it is necessary for DB hold
+ // off while SB is accessing the RAM and waiting to send its result.
+
+ val sbStall = (sbMultibeat & !sbLast) || (io.tl.grant.valid && !io.tl.grant.ready) || stallFromDb
+
+ io.tl.acquire.ready := !sbStall
+
+ //--------------------------------------------------------------
+ // Misc. Outputs
+ //--------------------------------------------------------------
+
+ io.ndreset := ndresetCtrReg.orR
+ io.fullreset := CONTROLReg.fullreset
+
+}
+
+object AsyncDebugBusFrom { // OutsideClockDomain
+ def apply(from_clock: Clock, from_reset: Bool, source: DebugBusIO, depth: Int = 0, sync: Int = 2)(implicit p: Parameters): DebugBusIO = {
+ val sink = Wire(new DebugBusIO)
+ sink.req <> AsyncDecoupledFrom(from_clock, from_reset, source.req)
+ source.resp <> AsyncDecoupledTo(from_clock, from_reset, sink.resp)
+ sink
+ }
+}
+
+object AsyncDebugBusTo { // OutsideClockDomain
+ def apply(to_clock: Clock, to_reset: Bool, source: DebugBusIO, depth: Int = 0, sync: Int = 2)(implicit p: Parameters): DebugBusIO = {
+ val sink = Wire(new DebugBusIO)
+ sink.req <> AsyncDecoupledTo(to_clock, to_reset, source.req)
+ source.resp <> AsyncDecoupledFrom(to_clock, to_reset, sink.resp)
+ sink
+ }
+}
diff --git a/uncore/src/main/scala/devices/Dma.scala b/uncore/src/main/scala/devices/Dma.scala
new file mode 100644
index 00000000..036bf95b
--- /dev/null
+++ b/uncore/src/main/scala/devices/Dma.scala
@@ -0,0 +1,534 @@
+package uncore.devices
+
+import Chisel._
+import cde.{Parameters, Field}
+import junctions._
+import junctions.NastiConstants._
+import uncore.tilelink._
+
+case object NDmaTransactors extends Field[Int]
+case object NDmaXacts extends Field[Int]
+case object NDmaClients extends Field[Int]
+
+trait HasDmaParameters {
+ implicit val p: Parameters
+ val nDmaTransactors = p(NDmaTransactors)
+ val nDmaXacts = p(NDmaXacts)
+ val nDmaClients = p(NDmaClients)
+ val dmaXactIdBits = log2Up(nDmaXacts)
+ val dmaClientIdBits = log2Up(nDmaClients)
+ val addrBits = p(PAddrBits)
+ val dmaStatusBits = 2
+ val dmaWordSizeBits = 2
+}
+
+abstract class DmaModule(implicit val p: Parameters) extends Module with HasDmaParameters
+abstract class DmaBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) with HasDmaParameters
+
+class DmaRequest(implicit p: Parameters) extends DmaBundle()(p) {
+ val xact_id = UInt(width = dmaXactIdBits)
+ val client_id = UInt(width = dmaClientIdBits)
+ val cmd = UInt(width = DmaRequest.DMA_CMD_SZ)
+ val source = UInt(width = addrBits)
+ val dest = UInt(width = addrBits)
+ val length = UInt(width = addrBits)
+ val size = UInt(width = dmaWordSizeBits)
+}
+
+class DmaResponse(implicit p: Parameters) extends DmaBundle()(p) {
+ val xact_id = UInt(width = dmaXactIdBits)
+ val client_id = UInt(width = dmaClientIdBits)
+ val status = UInt(width = dmaStatusBits)
+}
+
+object DmaRequest {
+ val DMA_CMD_SZ = 3
+
+ val DMA_CMD_COPY = UInt("b000")
+ val DMA_CMD_PFR = UInt("b010")
+ val DMA_CMD_PFW = UInt("b011")
+ val DMA_CMD_SIN = UInt("b100")
+ val DMA_CMD_SOUT = UInt("b101")
+
+ def apply(xact_id: UInt = UInt(0),
+ client_id: UInt,
+ cmd: UInt,
+ source: UInt,
+ dest: UInt,
+ length: UInt,
+ size: UInt = UInt(0))(implicit p: Parameters): DmaRequest = {
+ val req = Wire(new DmaRequest)
+ req.xact_id := xact_id
+ req.client_id := client_id
+ req.cmd := cmd
+ req.source := source
+ req.dest := dest
+ req.length := length
+ req.size := size
+ req
+ }
+}
+import DmaRequest._
+
+class DmaIO(implicit p: Parameters) extends DmaBundle()(p) {
+ val req = Decoupled(new DmaRequest)
+ val resp = Decoupled(new DmaResponse).flip
+}
+
+class DmaTrackerIO(implicit p: Parameters) extends DmaBundle()(p) {
+ val dma = (new DmaIO).flip
+ val mem = new ClientUncachedTileLinkIO
+ val mmio = new NastiIO
+}
+
+class DmaManager(outstandingCSR: Int)(implicit p: Parameters)
+ extends DmaModule()(p)
+ with HasNastiParameters
+ with HasAddrMapParameters {
+
+ val io = new Bundle {
+ val ctrl = (new NastiIO).flip
+ val mmio = new NastiIO
+ val dma = new DmaIO
+ }
+
+ private val wordBits = 1 << log2Up(addrBits)
+ private val wordBytes = wordBits / 8
+ private val wordOff = log2Up(wordBytes)
+ private val wordMSB = wordOff + 2
+
+ val s_idle :: s_wdata :: s_dma_req :: s_wresp :: Nil = Enum(Bits(), 4)
+ val state = Reg(init = s_idle)
+
+ val nCtrlWords = (addrBits * 4) / nastiXDataBits
+ val ctrl_regs = Reg(Vec(nCtrlWords, UInt(width = nastiXDataBits)))
+ val ctrl_idx = Reg(UInt(width = log2Up(nCtrlWords)))
+ val ctrl_done = Reg(Bool())
+ val ctrl_blob = ctrl_regs.toBits
+ val ctrl_id = Reg(UInt(width = nastiXIdBits))
+
+ val sizeOffset = 3 * addrBits
+ val cmdOffset = sizeOffset + dmaWordSizeBits
+
+ val dma_req = new DmaRequest().fromBits(ctrl_blob)
+ val dma_busy = Reg(init = UInt(0, nDmaXacts))
+ val dma_xact_id = PriorityEncoder(~dma_busy)
+
+ when (io.ctrl.aw.fire()) {
+ ctrl_id := io.ctrl.aw.bits.id
+ ctrl_idx := UInt(0)
+ ctrl_done := Bool(false)
+ state := s_wdata
+ }
+
+ when (io.ctrl.w.fire()) {
+ when (!ctrl_done) {
+ ctrl_regs(ctrl_idx) := io.ctrl.w.bits.data
+ ctrl_idx := ctrl_idx + UInt(1)
+ }
+ when (ctrl_idx === UInt(nCtrlWords - 1)) { ctrl_done := Bool(true) }
+ when (io.ctrl.w.bits.last) { state := s_dma_req }
+ }
+
+ dma_busy := (dma_busy |
+ Mux(io.dma.req.fire(), UIntToOH(dma_xact_id), UInt(0))) &
+ ~Mux(io.dma.resp.fire(), UIntToOH(io.dma.resp.bits.xact_id), UInt(0))
+
+ when (io.dma.req.fire()) { state := s_wresp }
+ when (io.ctrl.b.fire()) { state := s_idle }
+
+ io.ctrl.ar.ready := Bool(false)
+ io.ctrl.aw.ready := (state === s_idle)
+ io.ctrl.w.ready := (state === s_wdata)
+
+ io.ctrl.r.valid := Bool(false)
+ io.ctrl.b.valid := (state === s_wresp)
+ io.ctrl.b.bits := NastiWriteResponseChannel(id = ctrl_id)
+
+ io.dma.req.valid := (state === s_dma_req) && !dma_busy.andR
+ io.dma.req.bits := dma_req
+ io.dma.req.bits.xact_id := dma_xact_id
+
+ val resp_waddr_pending = Reg(init = Bool(false))
+ val resp_wdata_pending = Reg(init = Bool(false))
+ val resp_wresp_pending = Reg(init = Bool(false))
+ val resp_pending = resp_waddr_pending || resp_wdata_pending || resp_wresp_pending
+
+ val resp_client_id = Reg(UInt(width = dmaClientIdBits))
+ val resp_status = Reg(UInt(width = dmaStatusBits))
+
+ io.dma.resp.ready := !resp_pending
+
+ when (io.dma.resp.fire()) {
+ resp_client_id := io.dma.resp.bits.client_id
+ resp_status := io.dma.resp.bits.status
+ resp_waddr_pending := Bool(true)
+ resp_wdata_pending := Bool(true)
+ resp_wresp_pending := Bool(true)
+ }
+
+ val addrTable = Vec.tabulate(nDmaClients) { i =>
+ //UInt(addrMap(s"conf:csr$i").start + outstandingCSR * csrDataBytes)
+ require(false, "CSR MMIO ports no longer exist")
+ UInt(0)
+ }
+
+ io.mmio.ar.valid := Bool(false)
+ io.mmio.aw.valid := resp_waddr_pending
+ io.mmio.aw.bits := NastiWriteAddressChannel(
+ id = UInt(0),
+ addr = addrTable(resp_client_id),
+ size = { require(false, "CSR MMIO ports no longer exist"); UInt(0) })
+ io.mmio.w.valid := resp_wdata_pending
+ io.mmio.w.bits := NastiWriteDataChannel(data = resp_status)
+ io.mmio.b.ready := resp_wresp_pending
+ io.mmio.r.ready := Bool(false)
+
+ when (io.mmio.aw.fire()) { resp_waddr_pending := Bool(false) }
+ when (io.mmio.w.fire()) { resp_wdata_pending := Bool(false) }
+ when (io.mmio.b.fire()) { resp_wresp_pending := Bool(false) }
+}
+
+class DmaEngine(outstandingCSR: Int)(implicit p: Parameters) extends DmaModule()(p) {
+ val io = new Bundle {
+ val ctrl = (new NastiIO).flip
+ val mem = new ClientUncachedTileLinkIO
+ val mmio = new NastiIO
+ }
+
+ val manager = Module(new DmaManager(outstandingCSR))
+ val trackers = Module(new DmaTrackerFile)
+
+ manager.io.ctrl <> io.ctrl
+ trackers.io.dma <> manager.io.dma
+
+ val innerIOs = trackers.io.mem
+ val outerIOs = trackers.io.mmio :+ manager.io.mmio
+
+ val innerArb = Module(new ClientUncachedTileLinkIOArbiter(innerIOs.size))
+ innerArb.io.in <> innerIOs
+ io.mem <> innerArb.io.out
+
+ val outerArb = Module(new NastiArbiter(outerIOs.size))
+ outerArb.io.master <> outerIOs
+ io.mmio <> outerArb.io.slave
+
+ assert(!io.mmio.b.valid || io.mmio.b.bits.resp === UInt(0),
+ "DmaEngine: NASTI write response error")
+
+ assert(!io.mmio.r.valid || io.mmio.r.bits.resp === UInt(0),
+ "DmaEngine: NASTI read response error")
+}
+
+class DmaTrackerFile(implicit p: Parameters) extends DmaModule()(p) {
+ val io = new Bundle {
+ val dma = (new DmaIO).flip
+ val mem = Vec(nDmaTransactors, new ClientUncachedTileLinkIO)
+ val mmio = Vec(nDmaTransactors, new NastiIO)
+ }
+
+ val trackers = List.fill(nDmaTransactors) { Module(new DmaTracker) }
+ val reqReadys = Vec(trackers.map(_.io.dma.req.ready)).toBits
+
+ io.mem <> trackers.map(_.io.mem)
+ io.mmio <> trackers.map(_.io.mmio)
+
+ if (nDmaTransactors > 1) {
+ val resp_arb = Module(new RRArbiter(new DmaResponse, nDmaTransactors))
+ resp_arb.io.in <> trackers.map(_.io.dma.resp)
+ io.dma.resp <> resp_arb.io.out
+
+ val selection = PriorityEncoder(reqReadys)
+ trackers.zipWithIndex.foreach { case (tracker, i) =>
+ tracker.io.dma.req.valid := io.dma.req.valid && selection === UInt(i)
+ tracker.io.dma.req.bits := io.dma.req.bits
+ }
+ io.dma.req.ready := reqReadys.orR
+ } else {
+ io.dma <> trackers.head.io.dma
+ }
+}
+
+class DmaTracker(implicit p: Parameters) extends DmaModule()(p)
+ with HasTileLinkParameters with HasNastiParameters {
+ val io = new DmaTrackerIO
+
+ private val blockOffset = tlBeatAddrBits + tlByteAddrBits
+ private val blockBytes = tlDataBeats * tlDataBytes
+
+ val data_buffer = Reg(Vec(2 * tlDataBeats, Bits(width = tlDataBits)))
+ val get_inflight = Reg(UInt(2 * tlDataBeats))
+ val put_inflight = Reg(Bool())
+ val put_half = Reg(UInt(width = 1))
+ val get_half = Reg(UInt(width = 1))
+ val prefetch_put = Reg(Bool())
+ val get_done = !get_inflight.orR
+
+ val src_block = Reg(UInt(width = tlBlockAddrBits))
+ val dst_block = Reg(UInt(width = tlBlockAddrBits))
+ val offset = Reg(UInt(width = blockOffset))
+ val alignment = Reg(UInt(width = blockOffset))
+ val shift_dir = Reg(Bool())
+
+ val bytes_left = Reg(UInt(width = addrBits))
+ val streaming = Reg(Bool())
+ val stream_addr = Reg(UInt(width = nastiXAddrBits))
+ val stream_len = Reg(UInt(width = nastiXLenBits))
+ val stream_size = Reg(UInt(width = nastiXSizeBits))
+ val stream_idx = Reg(UInt(width = blockOffset))
+ val stream_bytesel = MuxLookup(stream_size, UInt("b11111111"), Seq(
+ UInt("b00") -> UInt("b00000001"),
+ UInt("b01") -> UInt("b00000011"),
+ UInt("b10") -> UInt("b00001111")))
+ val stream_mask = FillInterleaved(8, stream_bytesel)
+ val stream_last = Reg(Bool())
+
+ val stream_word_bytes = UInt(1) << stream_size
+ val stream_beat_idx = stream_idx(blockOffset - 1, tlByteAddrBits)
+ val stream_byte_idx = stream_idx(tlByteAddrBits - 1, 0)
+ val stream_bitshift = Cat(stream_byte_idx, UInt(0, 3))
+ val stream_in_beat =
+ (((io.mmio.r.bits.data & stream_mask) << stream_bitshift)) |
+ (data_buffer(stream_beat_idx) & ~(stream_mask << stream_bitshift))
+ val stream_out_word = data_buffer(stream_beat_idx) >> stream_bitshift
+ val stream_out_last = bytes_left === stream_word_bytes
+
+ val acq = io.mem.acquire.bits
+ val gnt = io.mem.grant.bits
+
+ val (s_idle :: s_get :: s_put :: s_prefetch ::
+ s_stream_read_req :: s_stream_read_resp ::
+ s_stream_write_req :: s_stream_write_data :: s_stream_write_resp ::
+ s_wait :: s_resp :: Nil) = Enum(Bits(), 11)
+ val state = Reg(init = s_idle)
+
+ val (put_beat, put_done) = Counter(
+ io.mem.acquire.fire() && acq.hasData(), tlDataBeats)
+
+ val put_mask = Vec.tabulate(tlDataBytes) { i =>
+ val byte_index = Cat(put_beat, UInt(i, tlByteAddrBits))
+ byte_index >= offset && byte_index < bytes_left
+ }.toBits
+
+ val prefetch_sent = io.mem.acquire.fire() && io.mem.acquire.bits.isPrefetch()
+ val prefetch_busy = Reg(init = UInt(0, tlMaxClientXacts))
+ val (prefetch_id, _) = Counter(prefetch_sent, tlMaxClientXacts)
+
+ val base_index = Cat(put_half, put_beat)
+ val put_data = Wire(init = Bits(0, tlDataBits))
+ val beat_align = alignment(blockOffset - 1, tlByteAddrBits)
+ val bit_align = Cat(alignment(tlByteAddrBits - 1, 0), UInt(0, 3))
+ val rev_align = UInt(tlDataBits) - bit_align
+
+ def getBit(value: UInt, sel: UInt): Bool =
+ (value >> sel)(0)
+
+ when (alignment === UInt(0)) {
+ put_data := data_buffer.read(base_index)
+ } .elsewhen (shift_dir) {
+ val shift_index = base_index - beat_align
+ when (bit_align === UInt(0)) {
+ put_data := data_buffer.read(shift_index)
+ } .otherwise {
+ val upper_bits = data_buffer.read(shift_index)
+ val lower_bits = data_buffer.read(shift_index - UInt(1))
+ val upper_shifted = upper_bits << bit_align
+ val lower_shifted = lower_bits >> rev_align
+ put_data := upper_shifted | lower_shifted
+ }
+ } .otherwise {
+ val shift_index = base_index + beat_align
+ when (bit_align === UInt(0)) {
+ put_data := data_buffer.read(shift_index)
+ } .otherwise {
+ val upper_bits = data_buffer.read(shift_index + UInt(1))
+ val lower_bits = data_buffer.read(shift_index)
+ val upper_shifted = upper_bits << rev_align
+ val lower_shifted = lower_bits >> bit_align
+ put_data := upper_shifted | lower_shifted
+ }
+ }
+
+ val put_acquire = PutBlock(
+ client_xact_id = UInt(2),
+ addr_block = dst_block,
+ addr_beat = put_beat,
+ data = put_data,
+ wmask = Some(put_mask))
+
+ val get_acquire = GetBlock(
+ client_xact_id = get_half,
+ addr_block = src_block,
+ alloc = Bool(false))
+
+ val prefetch_acquire = Mux(prefetch_put,
+ PutPrefetch(client_xact_id = prefetch_id, addr_block = dst_block),
+ GetPrefetch(client_xact_id = prefetch_id, addr_block = dst_block))
+
+ val resp_xact_id = Reg(UInt(width = dmaXactIdBits))
+ val resp_client_id = Reg(UInt(width = dmaClientIdBits))
+
+ io.mem.acquire.valid := (state === s_get) ||
+ (state === s_put && get_done) ||
+ (state === s_prefetch && !prefetch_busy(prefetch_id))
+ io.mem.acquire.bits := MuxLookup(
+ state, prefetch_acquire, Seq(
+ s_get -> get_acquire,
+ s_put -> put_acquire))
+ io.mem.grant.ready := Bool(true)
+ io.dma.req.ready := state === s_idle
+ io.dma.resp.valid := state === s_resp
+ io.dma.resp.bits.xact_id := resp_xact_id
+ io.dma.resp.bits.client_id := resp_client_id
+ io.dma.resp.bits.status := UInt(0)
+ io.mmio.ar.valid := (state === s_stream_read_req)
+ io.mmio.ar.bits := NastiReadAddressChannel(
+ id = UInt(0),
+ addr = stream_addr,
+ size = stream_size,
+ len = stream_len,
+ burst = BURST_FIXED)
+ io.mmio.r.ready := (state === s_stream_read_resp)
+
+ io.mmio.aw.valid := (state === s_stream_write_req)
+ io.mmio.aw.bits := NastiWriteAddressChannel(
+ id = UInt(0),
+ addr = stream_addr,
+ size = stream_size,
+ len = stream_len,
+ burst = BURST_FIXED)
+ io.mmio.w.valid := (state === s_stream_write_data) && get_done
+ io.mmio.w.bits := NastiWriteDataChannel(
+ data = stream_out_word,
+ last = stream_out_last)
+ io.mmio.b.ready := (state === s_stream_write_resp)
+
+ when (io.dma.req.fire()) {
+ val src_off = io.dma.req.bits.source(blockOffset - 1, 0)
+ val dst_off = io.dma.req.bits.dest(blockOffset - 1, 0)
+ val direction = src_off < dst_off
+
+ resp_xact_id := io.dma.req.bits.xact_id
+ resp_client_id := io.dma.req.bits.client_id
+ src_block := io.dma.req.bits.source(addrBits - 1, blockOffset)
+ dst_block := io.dma.req.bits.dest(addrBits - 1, blockOffset)
+ alignment := Mux(direction, dst_off - src_off, src_off - dst_off)
+ shift_dir := direction
+ offset := dst_off
+ bytes_left := io.dma.req.bits.length + dst_off
+ get_inflight := UInt(0)
+ put_inflight := Bool(false)
+ get_half := UInt(0)
+ put_half := UInt(0)
+ streaming := Bool(false)
+ stream_len := (io.dma.req.bits.length >> io.dma.req.bits.size) - UInt(1)
+ stream_size := io.dma.req.bits.size
+ stream_last := Bool(false)
+
+ when (io.dma.req.bits.cmd === DMA_CMD_COPY) {
+ state := s_get
+ } .elsewhen (io.dma.req.bits.cmd(2, 1) === UInt("b01")) {
+ prefetch_put := io.dma.req.bits.cmd(0)
+ state := s_prefetch
+ } .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SIN) {
+ stream_addr := io.dma.req.bits.source
+ stream_idx := dst_off
+ streaming := Bool(true)
+ alignment := UInt(0)
+ state := s_stream_read_req
+ } .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SOUT) {
+ stream_addr := io.dma.req.bits.dest
+ stream_idx := src_off
+ streaming := Bool(true)
+ bytes_left := io.dma.req.bits.length
+ state := s_stream_write_req
+ }
+ }
+
+ when (io.mmio.ar.fire()) { state := s_stream_read_resp }
+
+ when (io.mmio.r.fire()) {
+ data_buffer(stream_beat_idx) := stream_in_beat
+ stream_idx := stream_idx + stream_word_bytes
+ val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes
+ when (block_finished || io.mmio.r.bits.last) { state := s_put }
+ }
+
+ when (io.mmio.aw.fire()) { state := s_get }
+
+ when (io.mmio.w.fire()) {
+ stream_idx := stream_idx + stream_word_bytes
+ bytes_left := bytes_left - stream_word_bytes
+ val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes
+ when (stream_out_last) {
+ state := s_stream_write_resp
+ } .elsewhen (block_finished) {
+ state := s_get
+ }
+ }
+
+ when (io.mmio.b.fire()) { state := s_resp }
+
+ when (state === s_get && io.mem.acquire.ready) {
+ get_inflight := get_inflight | FillInterleaved(tlDataBeats, UIntToOH(get_half))
+ src_block := src_block + UInt(1)
+ when (streaming) {
+ state := s_stream_write_data
+ } .otherwise {
+ val bytes_in_buffer = UInt(blockBytes) - alignment
+ val extra_read = alignment > UInt(0) && !shift_dir && // dst_off < src_off
+ get_half === UInt(0) && // this is the first block
+ bytes_in_buffer < bytes_left // there is still more data left to fetch
+ get_half := get_half + UInt(1)
+ when (!extra_read) { state := s_put }
+ }
+ }
+
+ when (prefetch_sent) {
+ prefetch_busy := prefetch_busy | UIntToOH(prefetch_id)
+ when (bytes_left < UInt(blockBytes)) {
+ bytes_left := UInt(0)
+ state := s_resp
+ } .otherwise {
+ bytes_left := bytes_left - UInt(blockBytes)
+ dst_block := dst_block + UInt(1)
+ }
+ }
+
+ when (io.mem.grant.fire()) {
+ when (gnt.g_type === Grant.prefetchAckType) {
+ prefetch_busy := prefetch_busy & ~UIntToOH(gnt.client_xact_id)
+ } .elsewhen (gnt.hasData()) {
+ val write_half = gnt.client_xact_id(0)
+ val write_idx = Cat(write_half, gnt.addr_beat)
+ get_inflight := get_inflight & ~UIntToOH(write_idx)
+ data_buffer.write(write_idx, gnt.data)
+ } .otherwise {
+ put_inflight := Bool(false)
+ }
+ }
+
+ when (put_done) { // state === s_put
+ when (!streaming) {
+ put_half := put_half + UInt(1)
+ }
+ offset := UInt(0)
+ stream_idx := UInt(0)
+ when (bytes_left < UInt(blockBytes)) {
+ bytes_left := UInt(0)
+ } .otherwise {
+ bytes_left := bytes_left - UInt(blockBytes)
+ }
+ put_inflight := Bool(true)
+ dst_block := dst_block + UInt(1)
+ state := s_wait
+ }
+
+ when (state === s_wait && get_done && !put_inflight) {
+ state := MuxCase(s_get, Seq(
+ (bytes_left === UInt(0)) -> s_resp,
+ streaming -> s_stream_read_resp))
+ }
+
+ when (io.dma.resp.fire()) { state := s_idle }
+}
diff --git a/uncore/src/main/scala/devices/Plic.scala b/uncore/src/main/scala/devices/Plic.scala
new file mode 100644
index 00000000..776c581b
--- /dev/null
+++ b/uncore/src/main/scala/devices/Plic.scala
@@ -0,0 +1,187 @@
+// See LICENSE for license details.
+
+package uncore.devices
+
+import Chisel._
+import Chisel.ImplicitConversions._
+
+import junctions._
+import uncore.tilelink._
+import cde.Parameters
+
+class GatewayPLICIO extends Bundle {
+ val valid = Bool(OUTPUT)
+ val ready = Bool(INPUT)
+ val complete = Bool(INPUT)
+}
+
+class LevelGateway extends Module {
+ val io = new Bundle {
+ val interrupt = Bool(INPUT)
+ val plic = new GatewayPLICIO
+ }
+
+ val inFlight = Reg(init=Bool(false))
+ when (io.interrupt && io.plic.ready) { inFlight := true }
+ when (io.plic.complete) { inFlight := false }
+ io.plic.valid := io.interrupt && !inFlight
+}
+
+case class PLICConfig(nHartsIn: Int, supervisor: Boolean, nDevices: Int, nPriorities: Int) {
+ def contextsPerHart = if (supervisor) 2 else 1
+ def nHarts = contextsPerHart * nHartsIn
+ def context(i: Int, mode: Char) = mode match {
+ case 'M' => i * contextsPerHart
+ case 'S' => require(supervisor); i * contextsPerHart + 1
+ }
+ def claimAddr(i: Int, mode: Char) = hartBase + hartOffset(context(i, mode)) + claimOffset
+ def threshAddr(i: Int, mode: Char) = hartBase + hartOffset(context(i, mode))
+ def enableAddr(i: Int, mode: Char) = enableBase + enableOffset(context(i, mode))
+ def size = hartBase + hartOffset(maxHarts)
+
+ def maxDevices = 1023
+ def maxHarts = 15872
+ def pendingBase = 0x1000
+ def enableBase = 0x2000
+ def hartBase = 0x200000
+ require(hartBase >= enableBase + enableOffset(maxHarts))
+
+ def enableOffset(i: Int) = i * ((maxDevices+7)/8)
+ def hartOffset(i: Int) = i * 0x1000
+ def claimOffset = 4
+ def priorityBytes = 4
+
+ require(nDevices > 0 && nDevices <= maxDevices)
+ require(nHarts > 0 && nHarts <= maxHarts)
+ require(nPriorities >= 0 && nPriorities <= nDevices)
+}
+
+/** Platform-Level Interrupt Controller */
+class PLIC(val cfg: PLICConfig)(implicit val p: Parameters) extends Module
+ with HasTileLinkParameters
+ with HasAddrMapParameters {
+ val io = new Bundle {
+ val devices = Vec(cfg.nDevices, new GatewayPLICIO).flip
+ val harts = Vec(cfg.nHarts, Bool()).asOutput
+ val tl = new ClientUncachedTileLinkIO().flip
+ }
+
+ val priority =
+ if (cfg.nPriorities > 0) Reg(Vec(cfg.nDevices+1, UInt(width=log2Up(cfg.nPriorities+1))))
+ else Wire(init=Vec.fill(cfg.nDevices+1)(UInt(1)))
+ val threshold =
+ if (cfg.nPriorities > 0) Reg(Vec(cfg.nHarts, UInt(width = log2Up(cfg.nPriorities+1))))
+ else Wire(init=Vec.fill(cfg.nHarts)(UInt(0)))
+ val pending = Reg(init=Vec.fill(cfg.nDevices+1){Bool(false)})
+ val enables = Reg(Vec(cfg.nHarts, Vec(cfg.nDevices+1, Bool())))
+
+ for ((p, g) <- pending.tail zip io.devices) {
+ g.ready := !p
+ g.complete := false
+ when (g.valid) { p := true }
+ }
+
+ def findMax(x: Seq[UInt]): (UInt, UInt) = {
+ if (x.length > 1) {
+ val half = 1 << (log2Ceil(x.length) - 1)
+ val lMax = findMax(x take half)
+ val rMax = findMax(x drop half)
+ val useLeft = lMax._1 >= rMax._1
+ (Mux(useLeft, lMax._1, rMax._1), Mux(useLeft, lMax._2, UInt(half) + rMax._2))
+ } else (x.head, UInt(0))
+ }
+
+ val maxDevs = Wire(Vec(cfg.nHarts, UInt(width = log2Up(pending.size))))
+ for (hart <- 0 until cfg.nHarts) {
+ val effectivePriority =
+ for (((p, en), pri) <- (pending zip enables(hart) zip priority).tail)
+ yield Cat(p && en, pri)
+ val (maxPri, maxDev) = findMax((UInt(1) << priority(0).getWidth) +: effectivePriority)
+
+ maxDevs(hart) := Reg(next = maxDev)
+ io.harts(hart) := Reg(next = maxPri) > Cat(UInt(1), threshold(hart))
+ }
+
+ val acq = Queue(io.tl.acquire, 1)
+ val read = acq.fire() && acq.bits.isBuiltInType(Acquire.getType)
+ val write = acq.fire() && acq.bits.isBuiltInType(Acquire.putType)
+ assert(!acq.fire() || read || write, "unsupported PLIC operation")
+ val addr = acq.bits.full_addr()(log2Up(cfg.size)-1,0)
+
+ val claimant =
+ if (cfg.nHarts == 1) UInt(0)
+ else (addr - cfg.hartBase)(log2Up(cfg.hartOffset(cfg.nHarts))-1,log2Up(cfg.hartOffset(1)))
+ val hart = Wire(init = claimant)
+ val myMaxDev = maxDevs(claimant) + UInt(0) // XXX FIRRTL bug w/o the + UInt(0)
+ val myEnables = enables(hart)
+ val rdata = Wire(init = UInt(0, tlDataBits))
+ val masked_wdata = (acq.bits.data & acq.bits.full_wmask()) | (rdata & ~acq.bits.full_wmask())
+
+ when (addr >= cfg.hartBase) {
+ val word =
+ if (tlDataBytes > cfg.claimOffset) UInt(0)
+ else addr(log2Up(cfg.claimOffset),log2Up(tlDataBytes))
+ rdata := Cat(myMaxDev, UInt(0, 8*cfg.priorityBytes-threshold(0).getWidth), threshold(claimant)) >> (word * tlDataBits)
+
+ when (read && addr(log2Ceil(cfg.claimOffset))) {
+ pending(myMaxDev) := false
+ }
+ when (write) {
+ when (if (tlDataBytes > cfg.claimOffset) acq.bits.wmask()(cfg.claimOffset) else addr(log2Ceil(cfg.claimOffset))) {
+ val dev = (acq.bits.data >> ((8 * cfg.claimOffset) % tlDataBits))(log2Up(pending.size)-1,0)
+ when (myEnables(dev)) { io.devices(dev-1).complete := true }
+ }.otherwise {
+ if (cfg.nPriorities > 0) threshold(claimant) := acq.bits.data
+ }
+ }
+ }.elsewhen (addr >= cfg.enableBase) {
+ val enableHart =
+ if (cfg.nHarts > 1) (addr - cfg.enableBase)(log2Up(cfg.enableOffset(cfg.nHarts))-1,log2Up(cfg.enableOffset(1)))
+ else UInt(0)
+ hart := enableHart
+ val word =
+ if (tlDataBits >= cfg.nHarts) UInt(0)
+ else addr(log2Up((cfg.nHarts+7)/8)-1,log2Up(tlDataBytes))
+ for (i <- 0 until cfg.nHarts by tlDataBits) {
+ when (word === i/tlDataBits) {
+ rdata := Cat(myEnables.slice(i, i + tlDataBits).reverse)
+ for (j <- 0 until (tlDataBits min (myEnables.size - i))) {
+ when (write) { enables(enableHart)(i+j) := masked_wdata(j) }
+ }
+ }
+ }
+ }.elsewhen (addr >= cfg.pendingBase) {
+ val word =
+ if (tlDataBytes >= pending.size) UInt(0)
+ else addr(log2Up(pending.size)-1,log2Up(tlDataBytes))
+ rdata := pending.toBits >> (word * tlDataBits)
+ }.otherwise {
+ val regsPerBeat = tlDataBytes >> log2Up(cfg.priorityBytes)
+ val word =
+ if (regsPerBeat >= priority.size) UInt(0)
+ else addr(log2Up(priority.size*cfg.priorityBytes)-1,log2Up(tlDataBytes))
+ for (i <- 0 until priority.size by regsPerBeat) {
+ when (word === i/regsPerBeat) {
+ rdata := Cat(priority.slice(i, i + regsPerBeat).map(p => Cat(UInt(0, 8*cfg.priorityBytes-p.getWidth), p)).reverse)
+ for (j <- 0 until (regsPerBeat min (priority.size - i))) {
+ if (cfg.nPriorities > 0) when (write) { priority(i+j) := masked_wdata >> (j * 8 * cfg.priorityBytes) }
+ }
+ }
+ }
+ }
+
+ priority(0) := 0
+ pending(0) := false
+ for (e <- enables)
+ e(0) := false
+
+ io.tl.grant.valid := acq.valid
+ acq.ready := io.tl.grant.ready
+ io.tl.grant.bits := Grant(
+ is_builtin_type = Bool(true),
+ g_type = acq.bits.getBuiltInGrantType(),
+ client_xact_id = acq.bits.client_xact_id,
+ manager_xact_id = UInt(0),
+ addr_beat = UInt(0),
+ data = rdata)
+}
diff --git a/uncore/src/main/scala/devices/Prci.scala b/uncore/src/main/scala/devices/Prci.scala
new file mode 100644
index 00000000..19497aff
--- /dev/null
+++ b/uncore/src/main/scala/devices/Prci.scala
@@ -0,0 +1,127 @@
+// See LICENSE for license details.
+
+package uncore.devices
+
+import Chisel._
+import Chisel.ImplicitConversions._
+import junctions._
+import junctions.NastiConstants._
+import uncore.tilelink._
+import cde.{Parameters, Field}
+
+/** Number of tiles */
+case object NTiles extends Field[Int]
+
+class PRCIInterrupts extends Bundle {
+ val meip = Bool()
+ val seip = Bool()
+ val debug = Bool()
+}
+
+class PRCITileIO(implicit p: Parameters) extends Bundle {
+ val reset = Bool(OUTPUT)
+ val id = UInt(OUTPUT, log2Up(p(NTiles)))
+ val interrupts = new PRCIInterrupts {
+ val mtip = Bool()
+ val msip = Bool()
+ }.asOutput
+
+ override def cloneType: this.type = new PRCITileIO().asInstanceOf[this.type]
+}
+
+object PRCI {
+ def msip(hart: Int) = hart * msipBytes
+ def timecmp(hart: Int) = 0x4000 + hart * timecmpBytes
+ def time = 0xbff8
+ def msipBytes = 4
+ def timecmpBytes = 8
+ def size = 0xc000
+}
+
+/** Power, Reset, Clock, Interrupt */
+class PRCI(implicit val p: Parameters) extends Module
+ with HasTileLinkParameters
+ with HasAddrMapParameters {
+ val io = new Bundle {
+ val interrupts = Vec(p(NTiles), new PRCIInterrupts).asInput
+ val tl = new ClientUncachedTileLinkIO().flip
+ val tiles = Vec(p(NTiles), new PRCITileIO)
+ val rtcTick = Bool(INPUT)
+ }
+
+ val timeWidth = 64
+ val timecmp = Reg(Vec(p(NTiles), UInt(width = timeWidth)))
+ val time = Reg(init=UInt(0, timeWidth))
+ when (io.rtcTick) { time := time + UInt(1) }
+
+ val ipi = Reg(init=Vec.fill(p(NTiles))(UInt(0, 32)))
+
+ val acq = Queue(io.tl.acquire, 1)
+ val addr = acq.bits.full_addr()(log2Ceil(PRCI.size)-1,0)
+ val read = acq.bits.isBuiltInType(Acquire.getType)
+ val rdata = Wire(init=UInt(0))
+ io.tl.grant.valid := acq.valid
+ acq.ready := io.tl.grant.ready
+ io.tl.grant.bits := Grant(
+ is_builtin_type = Bool(true),
+ g_type = acq.bits.getBuiltInGrantType(),
+ client_xact_id = acq.bits.client_xact_id,
+ manager_xact_id = UInt(0),
+ addr_beat = UInt(0),
+ data = rdata)
+
+ when (addr(log2Floor(PRCI.time))) {
+ require(log2Floor(PRCI.timecmp(p(NTiles)-1)) < log2Floor(PRCI.time))
+ rdata := load(Vec(time + UInt(0)), acq.bits)
+ }.elsewhen (addr >= PRCI.timecmp(0)) {
+ rdata := store(timecmp, acq.bits)
+ }.otherwise {
+ rdata := store(ipi, acq.bits) & Fill(tlDataBits/32, UInt(1, 32))
+ }
+
+ for ((tile, i) <- io.tiles zipWithIndex) {
+ tile.interrupts := io.interrupts(i)
+ tile.interrupts.msip := ipi(i)(0)
+ tile.interrupts.mtip := time >= timecmp(i)
+ tile.id := UInt(i)
+ }
+
+ // TODO generalize these to help other TL slaves
+ def load(v: Vec[UInt], acq: Acquire): UInt = {
+ val w = v.head.getWidth
+ val a = acq.full_addr()
+ require(isPow2(w) && w >= 8)
+ if (w > tlDataBits) {
+ (v(a(log2Up(w/8*v.size)-1,log2Up(w/8))) >> a(log2Up(w/8)-1,log2Up(tlDataBytes)))(tlDataBits-1,0)
+ } else {
+ val row = for (i <- 0 until v.size by tlDataBits/w)
+ yield Cat(v.slice(i, i + tlDataBits/w).reverse)
+ if (row.size == 1) row.head
+ else Vec(row)(a(log2Up(w/8*v.size)-1,log2Up(tlDataBytes)))
+ }
+ }
+
+ def store(v: Vec[UInt], acq: Acquire): UInt = {
+ val w = v.head.getWidth
+ require(isPow2(w) && w >= 8)
+ val a = acq.full_addr()
+ val rdata = load(v, acq)
+ val wdata = (acq.data & acq.full_wmask()) | (rdata & ~acq.full_wmask())
+ if (w <= tlDataBits) {
+ val word =
+ if (tlDataBits/w >= v.size) UInt(0)
+ else a(log2Up(w/8*v.size)-1,log2Up(tlDataBytes))
+ for (i <- 0 until v.size) {
+ when (acq.isBuiltInType(Acquire.putType) && word === i/(tlDataBits/w)) {
+ val base = i % (tlDataBits/w)
+ v(i) := wdata >> (w * (i % (tlDataBits/w)))
+ }
+ }
+ } else {
+ val i = a(log2Up(w/8*v.size)-1,log2Up(w/8))
+ val mask = FillInterleaved(tlDataBits, UIntToOH(a(log2Up(w/8)-1,log2Up(tlDataBytes))))
+ v(i) := (wdata & mask) | (v(i) & ~mask)
+ }
+ rdata
+ }
+}
diff --git a/uncore/src/main/scala/devices/Rom.scala b/uncore/src/main/scala/devices/Rom.scala
new file mode 100644
index 00000000..0fd9dd3e
--- /dev/null
+++ b/uncore/src/main/scala/devices/Rom.scala
@@ -0,0 +1,67 @@
+package uncore.devices
+
+import Chisel._
+import junctions._
+import uncore.tilelink._
+import uncore.util._
+import cde.{Parameters, Field}
+
+class ROMSlave(contents: Seq[Byte])(implicit val p: Parameters) extends Module
+ with HasTileLinkParameters
+ with HasAddrMapParameters {
+ val io = new ClientUncachedTileLinkIO().flip
+
+ val acq = Queue(io.acquire, 1)
+ val single_beat = acq.bits.isBuiltInType(Acquire.getType)
+ val multi_beat = acq.bits.isBuiltInType(Acquire.getBlockType)
+ assert(!acq.valid || single_beat || multi_beat, "unsupported ROMSlave operation")
+
+ val addr_beat = Reg(UInt())
+ when (io.grant.fire()) { addr_beat := addr_beat + UInt(1) }
+ when (io.acquire.fire()) { addr_beat := io.acquire.bits.addr_beat }
+
+ val byteWidth = tlDataBits / 8
+ val rows = (contents.size + byteWidth - 1)/byteWidth
+ val rom = Vec.tabulate(rows) { i =>
+ val slice = contents.slice(i*byteWidth, (i+1)*byteWidth)
+ UInt(slice.foldRight(BigInt(0)) { case (x,y) => (y << 8) + (x.toInt & 0xFF) }, byteWidth*8)
+ }
+ val raddr = Cat(acq.bits.addr_block, addr_beat)
+ val rdata = rom(if (rows == 1) UInt(0) else raddr(log2Up(rom.size)-1,0))
+
+ val last = !multi_beat || addr_beat === UInt(tlDataBeats-1)
+ io.grant.valid := acq.valid
+ acq.ready := io.grant.ready && last
+ io.grant.bits := Grant(
+ is_builtin_type = Bool(true),
+ g_type = acq.bits.getBuiltInGrantType(),
+ client_xact_id = acq.bits.client_xact_id,
+ manager_xact_id = UInt(0),
+ addr_beat = addr_beat,
+ data = rdata)
+}
+
+class NastiROM(contents: Seq[Byte])(implicit p: Parameters) extends Module {
+ val io = new NastiIO().flip
+ val ar = Queue(io.ar, 1)
+
+ // This assumes ROMs are in read-only parts of the address map.
+ // Reuse b_queue code from NastiErrorSlave if this assumption is bad.
+ when (ar.valid) { assert(ar.bits.len === UInt(0), "Can't burst-read from NastiROM") }
+ assert(!(io.aw.valid || io.w.valid), "Can't write to NastiROM")
+ io.aw.ready := Bool(false)
+ io.w.ready := Bool(false)
+ io.b.valid := Bool(false)
+
+ val byteWidth = io.r.bits.nastiXDataBits / 8
+ val rows = (contents.size + byteWidth - 1)/byteWidth
+ val rom = Vec.tabulate(rows) { i =>
+ val slice = contents.slice(i*byteWidth, (i+1)*byteWidth)
+ UInt(slice.foldRight(BigInt(0)) { case (x,y) => (y << 8) + (x.toInt & 0xFF) }, byteWidth*8)
+ }
+ val rdata_word = rom(if (rows == 1) UInt(0) else ar.bits.addr(log2Up(contents.size)-1,log2Up(byteWidth)))
+ val rdata = new LoadGen(Cat(UInt(1), ar.bits.size), ar.bits.addr, rdata_word, Bool(false), byteWidth).data
+
+ io.r <> ar
+ io.r.bits := NastiReadDataChannel(ar.bits.id, rdata)
+}
diff --git a/uncore/src/main/scala/tilelink/Arbiters.scala b/uncore/src/main/scala/tilelink/Arbiters.scala
new file mode 100644
index 00000000..ab1f05ae
--- /dev/null
+++ b/uncore/src/main/scala/tilelink/Arbiters.scala
@@ -0,0 +1,196 @@
+package uncore.tilelink
+import Chisel._
+import junctions._
+import cde.{Parameters, Field}
+
+/** Utility functions for constructing TileLinkIO arbiters */
+trait TileLinkArbiterLike extends HasTileLinkParameters {
+ // Some shorthand type variables
+ type ManagerSourcedWithId = ManagerToClientChannel with HasClientTransactionId
+ type ClientSourcedWithId = ClientToManagerChannel with HasClientTransactionId
+ type ClientSourcedWithIdAndData = ClientToManagerChannel with HasClientTransactionId with HasTileLinkData
+
+ val arbN: Int // The number of ports on the client side
+
+ // These abstract funcs are filled in depending on whether the arbiter mucks with the
+ // outgoing client ids to track sourcing and then needs to revert them on the way back
+ def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int): Bits
+ def managerSourcedClientXactId(in: ManagerSourcedWithId): Bits
+ def arbIdx(in: ManagerSourcedWithId): UInt
+
+ // The following functions are all wiring helpers for each of the different types of TileLink channels
+
+ def hookupClientSource[M <: ClientSourcedWithIdAndData](
+ clts: Seq[DecoupledIO[LogicalNetworkIO[M]]],
+ mngr: DecoupledIO[LogicalNetworkIO[M]]) {
+ def hasData(m: LogicalNetworkIO[M]) = m.payload.hasMultibeatData()
+ val arb = Module(new LockingRRArbiter(mngr.bits, arbN, tlDataBeats, Some(hasData _)))
+ clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => {
+ arb.valid := req.valid
+ arb.bits := req.bits
+ arb.bits.payload.client_xact_id := clientSourcedClientXactId(req.bits.payload, id)
+ req.ready := arb.ready
+ }}
+ mngr <> arb.io.out
+ }
+
+ def hookupClientSourceHeaderless[M <: ClientSourcedWithIdAndData](
+ clts: Seq[DecoupledIO[M]],
+ mngr: DecoupledIO[M]) {
+ def hasData(m: M) = m.hasMultibeatData()
+ val arb = Module(new LockingRRArbiter(mngr.bits, arbN, tlDataBeats, Some(hasData _)))
+ clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => {
+ arb.valid := req.valid
+ arb.bits := req.bits
+ arb.bits.client_xact_id := clientSourcedClientXactId(req.bits, id)
+ req.ready := arb.ready
+ }}
+ mngr <> arb.io.out
+ }
+
+ def hookupManagerSourceWithHeader[M <: ManagerToClientChannel](
+ clts: Seq[DecoupledIO[LogicalNetworkIO[M]]],
+ mngr: DecoupledIO[LogicalNetworkIO[M]]) {
+ mngr.ready := Bool(false)
+ for (i <- 0 until arbN) {
+ clts(i).valid := Bool(false)
+ when (mngr.bits.header.dst === UInt(i)) {
+ clts(i).valid := mngr.valid
+ mngr.ready := clts(i).ready
+ }
+ clts(i).bits := mngr.bits
+ }
+ }
+
+ def hookupManagerSourceWithId[M <: ManagerSourcedWithId](
+ clts: Seq[DecoupledIO[LogicalNetworkIO[M]]],
+ mngr: DecoupledIO[LogicalNetworkIO[M]]) {
+ mngr.ready := Bool(false)
+ for (i <- 0 until arbN) {
+ clts(i).valid := Bool(false)
+ when (arbIdx(mngr.bits.payload) === UInt(i)) {
+ clts(i).valid := mngr.valid
+ mngr.ready := clts(i).ready
+ }
+ clts(i).bits := mngr.bits
+ clts(i).bits.payload.client_xact_id := managerSourcedClientXactId(mngr.bits.payload)
+ }
+ }
+
+ def hookupManagerSourceHeaderlessWithId[M <: ManagerSourcedWithId](
+ clts: Seq[DecoupledIO[M]],
+ mngr: DecoupledIO[M]) {
+ mngr.ready := Bool(false)
+ for (i <- 0 until arbN) {
+ clts(i).valid := Bool(false)
+ when (arbIdx(mngr.bits) === UInt(i)) {
+ clts(i).valid := mngr.valid
+ mngr.ready := clts(i).ready
+ }
+ clts(i).bits := mngr.bits
+ clts(i).bits.client_xact_id := managerSourcedClientXactId(mngr.bits)
+ }
+ }
+
+ def hookupManagerSourceBroadcast[M <: Data](clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) {
+ clts.map{ _.valid := mngr.valid }
+ clts.map{ _.bits := mngr.bits }
+ mngr.ready := clts.map(_.ready).reduce(_&&_)
+ }
+
+ def hookupFinish[M <: LogicalNetworkIO[Finish]]( clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) {
+ val arb = Module(new RRArbiter(mngr.bits, arbN))
+ arb.io.in <> clts
+ mngr <> arb.io.out
+ }
+}
+
+/** Abstract base case for any Arbiters that have UncachedTileLinkIOs */
+abstract class UncachedTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module
+ with TileLinkArbiterLike {
+ val io = new Bundle {
+ val in = Vec(arbN, new UncachedTileLinkIO).flip
+ val out = new UncachedTileLinkIO
+ }
+ hookupClientSource(io.in.map(_.acquire), io.out.acquire)
+ hookupFinish(io.in.map(_.finish), io.out.finish)
+ hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant)
+}
+
+/** Abstract base case for any Arbiters that have cached TileLinkIOs */
+abstract class TileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module
+ with TileLinkArbiterLike {
+ val io = new Bundle {
+ val in = Vec(arbN, new TileLinkIO).flip
+ val out = new TileLinkIO
+ }
+ hookupClientSource(io.in.map(_.acquire), io.out.acquire)
+ hookupClientSource(io.in.map(_.release), io.out.release)
+ hookupFinish(io.in.map(_.finish), io.out.finish)
+ hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe)
+ hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant)
+}
+
+/** Appends the port index of the arbiter to the client_xact_id */
+trait AppendsArbiterId extends TileLinkArbiterLike {
+ def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) =
+ Cat(in.client_xact_id, UInt(id, log2Up(arbN)))
+ def managerSourcedClientXactId(in: ManagerSourcedWithId) = {
+ /* This shouldn't be necessary, but Chisel3 doesn't emit correct Verilog
+ * when right shifting by too many bits. See
+ * https://github.com/ucb-bar/firrtl/issues/69 */
+ if (in.client_xact_id.getWidth > log2Up(arbN))
+ in.client_xact_id >> log2Up(arbN)
+ else
+ UInt(0)
+ }
+ def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id(log2Up(arbN)-1,0).toUInt
+}
+
+/** Uses the client_xact_id as is (assumes it has been set to port index) */
+trait PassesId extends TileLinkArbiterLike {
+ def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = in.client_xact_id
+ def managerSourcedClientXactId(in: ManagerSourcedWithId) = in.client_xact_id
+ def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id
+}
+
+/** Overwrites some default client_xact_id with the port idx */
+trait UsesNewId extends TileLinkArbiterLike {
+ def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = UInt(id, log2Up(arbN))
+ def managerSourcedClientXactId(in: ManagerSourcedWithId) = UInt(0)
+ def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id
+}
+
+// Now we can mix-in thevarious id-generation traits to make concrete arbiter classes
+class UncachedTileLinkIOArbiterThatAppendsArbiterId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with AppendsArbiterId
+class UncachedTileLinkIOArbiterThatPassesId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with PassesId
+class UncachedTileLinkIOArbiterThatUsesNewId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with UsesNewId
+class TileLinkIOArbiterThatAppendsArbiterId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with AppendsArbiterId
+class TileLinkIOArbiterThatPassesId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with PassesId
+class TileLinkIOArbiterThatUsesNewId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with UsesNewId
+
+/** Concrete uncached client-side arbiter that appends the arbiter's port id to client_xact_id */
+class ClientUncachedTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module with TileLinkArbiterLike with AppendsArbiterId {
+ val io = new Bundle {
+ val in = Vec(arbN, new ClientUncachedTileLinkIO).flip
+ val out = new ClientUncachedTileLinkIO
+ }
+ if (arbN > 1) {
+ hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire)
+ hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant)
+ } else { io.out <> io.in.head }
+}
+
+/** Concrete client-side arbiter that appends the arbiter's port id to client_xact_id */
+class ClientTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module with TileLinkArbiterLike with AppendsArbiterId {
+ val io = new Bundle {
+ val in = Vec(arbN, new ClientTileLinkIO).flip
+ val out = new ClientTileLinkIO
+ }
+ if (arbN > 1) {
+ hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire)
+ hookupClientSourceHeaderless(io.in.map(_.release), io.out.release)
+ hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe)
+ hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant)
+ } else { io.out <> io.in.head }
+}
diff --git a/uncore/src/main/scala/tilelink/Definitions.scala b/uncore/src/main/scala/tilelink/Definitions.scala
new file mode 100644
index 00000000..86e59983
--- /dev/null
+++ b/uncore/src/main/scala/tilelink/Definitions.scala
@@ -0,0 +1,964 @@
+// See LICENSE for license details.
+
+package uncore.tilelink
+import Chisel._
+import junctions._
+import uncore.coherence.CoherencePolicy
+import uncore.Util._
+import scala.math.max
+import uncore.constants._
+import cde.{Parameters, Field}
+
+case object CacheBlockOffsetBits extends Field[Int]
+case object AmoAluOperandBits extends Field[Int]
+
+case object TLId extends Field[String]
+case class TLKey(id: String) extends Field[TileLinkParameters]
+
+/** Parameters exposed to the top-level design, set based on
+ * external requirements or design space exploration
+ *
+ * Coherency policy used to define custom mesage types
+ * Number of manager agents
+ * Number of client agents that cache data and use custom [[uncore.Acquire]] types
+ * Number of client agents that do not cache data and use built-in [[uncore.Acquire]] types
+ * Maximum number of unique outstanding transactions per client
+ * Maximum number of clients multiplexed onto a single port
+ * Maximum number of unique outstanding transactions per manager
+ * Width of cache block addresses
+ * Total amount of data per cache block
+ * Number of data beats per cache block
+ **/
+
+case class TileLinkParameters(
+ coherencePolicy: CoherencePolicy,
+ nManagers: Int,
+ nCachingClients: Int,
+ nCachelessClients: Int,
+ maxClientXacts: Int,
+ maxClientsPerPort: Int,
+ maxManagerXacts: Int,
+ dataBits: Int,
+ dataBeats: Int = 4,
+ overrideDataBitsPerBeat: Option[Int] = None
+ ) {
+ val nClients = nCachingClients + nCachelessClients
+ val writeMaskBits: Int = ((dataBits / dataBeats) - 1) / 8 + 1
+ val dataBitsPerBeat: Int = overrideDataBitsPerBeat.getOrElse(dataBits / dataBeats)
+}
+
+
+/** Utility trait for building Modules and Bundles that use TileLink parameters */
+trait HasTileLinkParameters {
+ implicit val p: Parameters
+ val tlExternal = p(TLKey(p(TLId)))
+ val tlCoh = tlExternal.coherencePolicy
+ val tlNManagers = tlExternal.nManagers
+ val tlNCachingClients = tlExternal.nCachingClients
+ val tlNCachelessClients = tlExternal.nCachelessClients
+ val tlNClients = tlExternal.nClients
+ val tlClientIdBits = log2Up(tlNClients)
+ val tlManagerIdBits = log2Up(tlNManagers)
+ val tlMaxClientXacts = tlExternal.maxClientXacts
+ val tlMaxClientsPerPort = tlExternal.maxClientsPerPort
+ val tlMaxManagerXacts = tlExternal.maxManagerXacts
+ val tlClientXactIdBits = log2Up(tlMaxClientXacts*tlMaxClientsPerPort)
+ val tlManagerXactIdBits = log2Up(tlMaxManagerXacts)
+ val tlBlockAddrBits = p(PAddrBits) - p(CacheBlockOffsetBits)
+ val tlDataBeats = tlExternal.dataBeats
+ val tlDataBits = tlExternal.dataBitsPerBeat
+ val tlDataBytes = tlDataBits/8
+ val tlWriteMaskBits = tlExternal.writeMaskBits
+ val tlBeatAddrBits = log2Up(tlDataBeats)
+ val tlByteAddrBits = log2Up(tlWriteMaskBits)
+ val tlMemoryOpcodeBits = M_SZ
+ val tlMemoryOperandSizeBits = MT_SZ
+ val tlAcquireTypeBits = max(log2Up(Acquire.nBuiltInTypes),
+ tlCoh.acquireTypeWidth)
+ val tlAcquireUnionBits = max(tlWriteMaskBits,
+ (tlByteAddrBits +
+ tlMemoryOperandSizeBits +
+ tlMemoryOpcodeBits)) + 1
+ val tlGrantTypeBits = max(log2Up(Grant.nBuiltInTypes),
+ tlCoh.grantTypeWidth) + 1
+/** Whether the underlying physical network preserved point-to-point ordering of messages */
+ val tlNetworkPreservesPointToPointOrdering = false
+ val tlNetworkDoesNotInterleaveBeats = true
+ val amoAluOperandBits = p(AmoAluOperandBits)
+ val amoAluOperandBytes = amoAluOperandBits/8
+}
+
+abstract class TLModule(implicit val p: Parameters) extends Module
+ with HasTileLinkParameters
+abstract class TLBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p)
+ with HasTileLinkParameters
+
+/** Base trait for all TileLink channels */
+abstract class TileLinkChannel(implicit p: Parameters) extends TLBundle()(p) {
+ def hasData(dummy: Int = 0): Bool
+ def hasMultibeatData(dummy: Int = 0): Bool
+}
+/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
+abstract class ClientToManagerChannel(implicit p: Parameters) extends TileLinkChannel()(p)
+/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
+abstract class ManagerToClientChannel(implicit p: Parameters) extends TileLinkChannel()(p)
+/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
+abstract class ClientToClientChannel(implicit p: Parameters) extends TileLinkChannel()(p) // Unused for now
+
+/** Common signals that are used in multiple channels.
+ * These traits are useful for type parameterizing bundle wiring functions.
+ */
+
+/** Address of a cache block. */
+trait HasCacheBlockAddress extends HasTileLinkParameters {
+ val addr_block = UInt(width = tlBlockAddrBits)
+
+ def conflicts(that: HasCacheBlockAddress) = this.addr_block === that.addr_block
+ def conflicts(addr: UInt) = this.addr_block === addr
+}
+
+/** Sub-block address or beat id of multi-beat data */
+trait HasTileLinkBeatId extends HasTileLinkParameters {
+ val addr_beat = UInt(width = tlBeatAddrBits)
+}
+
+/* Client-side transaction id. Usually Miss Status Handling Register File index */
+trait HasClientTransactionId extends HasTileLinkParameters {
+ val client_xact_id = Bits(width = tlClientXactIdBits)
+}
+
+/** Manager-side transaction id. Usually Transaction Status Handling Register File index. */
+trait HasManagerTransactionId extends HasTileLinkParameters {
+ val manager_xact_id = Bits(width = tlManagerXactIdBits)
+}
+
+/** A single beat of cache block data */
+trait HasTileLinkData extends HasTileLinkBeatId {
+ val data = UInt(width = tlDataBits)
+
+ def hasData(dummy: Int = 0): Bool
+ def hasMultibeatData(dummy: Int = 0): Bool
+ def first(dummy: Int = 0): Bool = !hasMultibeatData() || addr_beat === UInt(0)
+ def last(dummy: Int = 0): Bool = !hasMultibeatData() || addr_beat === UInt(tlDataBeats-1)
+}
+
+/** An entire cache block of data */
+trait HasTileLinkBlock extends HasTileLinkParameters {
+ val data_buffer = Vec(tlDataBeats, UInt(width = tlDataBits))
+ val wmask_buffer = Vec(tlDataBeats, UInt(width = tlWriteMaskBits))
+}
+
+/** The id of a client source or destination. Used in managers. */
+trait HasClientId extends HasTileLinkParameters {
+ val client_id = UInt(width = tlClientIdBits)
+}
+
+trait HasManagerId extends HasTileLinkParameters {
+ val manager_id = UInt(width = tlManagerIdBits)
+}
+
+trait HasAcquireUnion extends HasTileLinkParameters {
+ val union = Bits(width = tlAcquireUnionBits)
+
+ // Utility funcs for accessing subblock union:
+ def isBuiltInType(t: UInt): Bool
+ val opCodeOff = 1
+ val opSizeOff = tlMemoryOpcodeBits + opCodeOff
+ val addrByteOff = tlMemoryOperandSizeBits + opSizeOff
+ val addrByteMSB = tlByteAddrBits + addrByteOff
+ /** Hint whether to allocate the block in any interveneing caches */
+ def allocate(dummy: Int = 0) = union(0)
+ /** Op code for [[uncore.PutAtomic]] operations */
+ def op_code(dummy: Int = 0) = Mux(
+ isBuiltInType(Acquire.putType) || isBuiltInType(Acquire.putBlockType),
+ M_XWR, union(opSizeOff-1, opCodeOff))
+ /** Operand size for [[uncore.PutAtomic]] */
+ def op_size(dummy: Int = 0) = union(addrByteOff-1, opSizeOff)
+ /** Byte address for [[uncore.PutAtomic]] operand */
+ def addr_byte(dummy: Int = 0) = union(addrByteMSB-1, addrByteOff)
+ def amo_offset(dummy: Int = 0) =
+ if (tlByteAddrBits > log2Up(amoAluOperandBytes)) addr_byte()(tlByteAddrBits-1, log2Up(amoAluOperandBytes))
+ else UInt(0)
+ /** Bit offset of [[uncore.PutAtomic]] operand */
+ def amo_shift_bytes(dummy: Int = 0) = UInt(amoAluOperandBytes)*amo_offset()
+ /** Write mask for [[uncore.Put]], [[uncore.PutBlock]], [[uncore.PutAtomic]] */
+ def wmask(dummy: Int = 0): UInt = {
+ val is_amo = isBuiltInType(Acquire.putAtomicType)
+ val amo_mask = if (tlByteAddrBits > log2Up(amoAluOperandBytes))
+ FillInterleaved(amoAluOperandBytes, UIntToOH(amo_offset()))
+ else Acquire.fullWriteMask
+ val is_put = isBuiltInType(Acquire.putBlockType) || isBuiltInType(Acquire.putType)
+ val put_mask = union(tlWriteMaskBits, 1)
+ Mux(is_amo, amo_mask, Mux(is_put, put_mask, UInt(0)))
+ }
+ /** Full, beat-sized writemask */
+ def full_wmask(dummy: Int = 0) = FillInterleaved(8, wmask())
+
+ /** Is this message a built-in read message */
+ def hasPartialWritemask(dummy: Int = 0): Bool = wmask() =/= Acquire.fullWriteMask
+
+}
+
+trait HasAcquireType extends HasTileLinkParameters {
+ val is_builtin_type = Bool()
+ val a_type = UInt(width = tlAcquireTypeBits)
+
+ /** Message type equality */
+ def is(t: UInt) = a_type === t //TODO: make this more opaque; def ===?
+
+ /** Is this message a built-in or custom type */
+ def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type
+ /** Is this message a particular built-in type */
+ def isBuiltInType(t: UInt): Bool = is_builtin_type && a_type === t
+
+ /** Does this message refer to subblock operands using info in the Acquire.union subbundle */
+ def isSubBlockType(dummy: Int = 0): Bool = isBuiltInType() && a_type.isOneOf(Acquire.typesOnSubBlocks)
+
+ /** Is this message a built-in prefetch message */
+ def isPrefetch(dummy: Int = 0): Bool = isBuiltInType() &&
+ (is(Acquire.getPrefetchType) || is(Acquire.putPrefetchType))
+
+ /** Is this message a built-in atomic message */
+ def isAtomic(dummy: Int = 0): Bool = isBuiltInType() && is(Acquire.putAtomicType)
+
+ /** Is this message a built-in read message */
+ def isGet(dummy: Int = 0): Bool = isBuiltInType() && (is(Acquire.getType) || is(Acquire.getBlockType))
+
+ /** Does this message contain data? Assumes that no custom message types have data. */
+ def hasData(dummy: Int = 0): Bool = isBuiltInType() && a_type.isOneOf(Acquire.typesWithData)
+
+ /** Does this message contain multiple beats of data? Assumes that no custom message types have data. */
+ def hasMultibeatData(dummy: Int = 0): Bool = Bool(tlDataBeats > 1) && isBuiltInType() &&
+ a_type.isOneOf(Acquire.typesWithMultibeatData)
+
+ /** Mapping between each built-in Acquire type and a built-in Grant type. */
+ def getBuiltInGrantType(dummy: Int = 0): UInt = Acquire.getBuiltInGrantType(this.a_type)
+}
+
+trait HasProbeType extends HasTileLinkParameters {
+ val p_type = UInt(width = tlCoh.probeTypeWidth)
+
+ def is(t: UInt) = p_type === t
+ def hasData(dummy: Int = 0) = Bool(false)
+ def hasMultibeatData(dummy: Int = 0) = Bool(false)
+}
+
+trait MightBeVoluntary {
+ def isVoluntary(dummy: Int = 0): Bool
+}
+
+trait HasReleaseType extends HasTileLinkParameters with MightBeVoluntary {
+ val voluntary = Bool()
+ val r_type = UInt(width = tlCoh.releaseTypeWidth)
+
+ def is(t: UInt) = r_type === t
+ def hasData(dummy: Int = 0) = r_type.isOneOf(tlCoh.releaseTypesWithData)
+ def hasMultibeatData(dummy: Int = 0) = Bool(tlDataBeats > 1) &&
+ r_type.isOneOf(tlCoh.releaseTypesWithData)
+ def isVoluntary(dummy: Int = 0) = voluntary
+ def requiresAck(dummy: Int = 0) = !Bool(tlNetworkPreservesPointToPointOrdering)
+}
+
+trait HasGrantType extends HasTileLinkParameters with MightBeVoluntary {
+ val is_builtin_type = Bool()
+ val g_type = UInt(width = tlGrantTypeBits)
+
+ // Helper funcs
+ def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type
+ def isBuiltInType(t: UInt): Bool = is_builtin_type && g_type === t
+ def is(t: UInt):Bool = g_type === t
+ def hasData(dummy: Int = 0): Bool = Mux(isBuiltInType(),
+ g_type.isOneOf(Grant.typesWithData),
+ g_type.isOneOf(tlCoh.grantTypesWithData))
+ def hasMultibeatData(dummy: Int = 0): Bool =
+ Bool(tlDataBeats > 1) && Mux(isBuiltInType(),
+ g_type.isOneOf(Grant.typesWithMultibeatData),
+ g_type.isOneOf(tlCoh.grantTypesWithData))
+ def isVoluntary(dummy: Int = 0): Bool = isBuiltInType() && (g_type === Grant.voluntaryAckType)
+ def requiresAck(dummy: Int = 0): Bool = !Bool(tlNetworkPreservesPointToPointOrdering) && !isVoluntary()
+}
+
+/** TileLink channel bundle definitions */
+
+/** The Acquire channel is used to intiate coherence protocol transactions in
+ * order to gain access to a cache block's data with certain permissions
+ * enabled. Messages sent over this channel may be custom types defined by
+ * a [[uncore.CoherencePolicy]] for cached data accesse or may be built-in types
+ * used for uncached data accesses. Acquires may contain data for Put or
+ * PutAtomic built-in types. After sending an Acquire, clients must
+ * wait for a manager to send them a [[uncore.Grant]] message in response.
+ */
+class AcquireMetadata(implicit p: Parameters) extends ClientToManagerChannel
+ with HasCacheBlockAddress
+ with HasClientTransactionId
+ with HasTileLinkBeatId
+ with HasAcquireType
+ with HasAcquireUnion {
+ /** Complete physical address for block, beat or operand */
+ def full_addr(dummy: Int = 0) =
+ Cat(this.addr_block, this.addr_beat,
+ Mux(isBuiltInType() && this.a_type.isOneOf(Acquire.typesWithAddrByte),
+ this.addr_byte(), UInt(0, tlByteAddrBits)))
+}
+
+/** [[uncore.AcquireMetadata]] with an extra field containing the data beat */
+class Acquire(implicit p: Parameters) extends AcquireMetadata
+ with HasTileLinkData
+
+/** [[uncore.AcquireMetadata]] with an extra field containing the entire cache block */
+class BufferedAcquire(implicit p: Parameters) extends AcquireMetadata
+ with HasTileLinkBlock
+
+/** [[uncore.Acquire]] with an extra field stating its source id */
+class AcquireFromSrc(implicit p: Parameters) extends Acquire
+ with HasClientId
+
+/** [[uncore.BufferedAcquire]] with an extra field stating its source id */
+class BufferedAcquireFromSrc(implicit p: Parameters) extends BufferedAcquire
+ with HasClientId
+
+/** Used to track metadata for transactions where multiple secondary misses have been merged
+ * and handled by a single transaction tracker.
+ */
+class SecondaryMissInfo(implicit p: Parameters) extends TLBundle
+ with HasClientTransactionId
+ with HasTileLinkBeatId
+ with HasClientId
+ with HasAcquireType
+
+/** Contains definitions of the the built-in Acquire types and a factory
+ * for [[uncore.Acquire]]
+ *
+ * In general you should avoid using this factory directly and use
+ * [[uncore.ClientMetadata.makeAcquire]] for custom cached Acquires and
+ * [[uncore.Get]], [[uncore.Put]], etc. for built-in uncached Acquires.
+ *
+ * @param is_builtin_type built-in or custom type message?
+ * @param a_type built-in type enum or custom type enum
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ * @param addr_beat sub-block address (which beat)
+ * @param data data being put outwards
+ * @param union additional fields used for uncached types
+ */
+object Acquire {
+ val nBuiltInTypes = 7
+ //TODO: Use Enum
+ def getType = UInt("b000") // Get a single beat of data
+ def getBlockType = UInt("b001") // Get a whole block of data
+ def putType = UInt("b010") // Put a single beat of data
+ def putBlockType = UInt("b011") // Put a whole block of data
+ def putAtomicType = UInt("b100") // Perform an atomic memory op
+ def getPrefetchType = UInt("b101") // Prefetch a whole block of data
+ def putPrefetchType = UInt("b110") // Prefetch a whole block of data, with intent to write
+ def typesWithData = Vec(putType, putBlockType, putAtomicType)
+ def typesWithMultibeatData = Vec(putBlockType)
+ def typesOnSubBlocks = Vec(putType, getType, putAtomicType)
+ def typesWithAddrByte = Vec(getType, putAtomicType)
+
+ /** Mapping between each built-in Acquire type and a built-in Grant type. */
+ def getBuiltInGrantType(a_type: UInt): UInt = {
+ MuxLookup(a_type, Grant.putAckType, Array(
+ Acquire.getType -> Grant.getDataBeatType,
+ Acquire.getBlockType -> Grant.getDataBlockType,
+ Acquire.putType -> Grant.putAckType,
+ Acquire.putBlockType -> Grant.putAckType,
+ Acquire.putAtomicType -> Grant.getDataBeatType,
+ Acquire.getPrefetchType -> Grant.prefetchAckType,
+ Acquire.putPrefetchType -> Grant.prefetchAckType))
+ }
+
+ def makeUnion(
+ a_type: UInt,
+ addr_byte: UInt,
+ operand_size: UInt,
+ opcode: UInt,
+ wmask: UInt,
+ alloc: Bool)
+ (implicit p: Parameters): UInt = {
+
+ val tlExternal = p(TLKey(p(TLId)))
+ val tlWriteMaskBits = tlExternal.writeMaskBits
+ val tlByteAddrBits = log2Up(tlWriteMaskBits)
+
+ // These had better be the right size when we cat them together!
+ val my_addr_byte = (UInt(0, tlByteAddrBits) | addr_byte)(tlByteAddrBits-1, 0)
+ val my_operand_size = (UInt(0, MT_SZ) | operand_size)(MT_SZ-1, 0)
+ val my_opcode = (UInt(0, M_SZ) | opcode)(M_SZ-1, 0)
+ val my_wmask = (UInt(0, tlWriteMaskBits) | wmask)(tlWriteMaskBits-1, 0)
+
+ MuxLookup(a_type, UInt(0), Array(
+ Acquire.getType -> Cat(my_addr_byte, my_operand_size, my_opcode, alloc),
+ Acquire.getBlockType -> Cat(my_operand_size, my_opcode, alloc),
+ Acquire.putType -> Cat(my_wmask, alloc),
+ Acquire.putBlockType -> Cat(my_wmask, alloc),
+ Acquire.putAtomicType -> Cat(my_addr_byte, my_operand_size, my_opcode, alloc),
+ Acquire.getPrefetchType -> Cat(M_XRD, alloc),
+ Acquire.putPrefetchType -> Cat(M_XWR, alloc)))
+ }
+
+ def fullWriteMask(implicit p: Parameters) = SInt(-1, width = p(TLKey(p(TLId))).writeMaskBits).toUInt
+
+ // Most generic constructor
+ def apply(
+ is_builtin_type: Bool,
+ a_type: Bits,
+ client_xact_id: UInt,
+ addr_block: UInt,
+ addr_beat: UInt = UInt(0),
+ data: UInt = UInt(0),
+ union: UInt = UInt(0))
+ (implicit p: Parameters): Acquire = {
+ val acq = Wire(new Acquire)
+ acq.is_builtin_type := is_builtin_type
+ acq.a_type := a_type
+ acq.client_xact_id := client_xact_id
+ acq.addr_block := addr_block
+ acq.addr_beat := addr_beat
+ acq.data := data
+ acq.union := union
+ acq
+ }
+
+ // Copy constructor
+ def apply(a: Acquire): Acquire = {
+ val acq = Wire(new Acquire()(a.p))
+ acq := a
+ acq
+ }
+}
+
+object BuiltInAcquireBuilder {
+ def apply(
+ a_type: UInt,
+ client_xact_id: UInt,
+ addr_block: UInt,
+ addr_beat: UInt = UInt(0),
+ data: UInt = UInt(0),
+ addr_byte: UInt = UInt(0),
+ operand_size: UInt = MT_Q,
+ opcode: UInt = UInt(0),
+ wmask: UInt = UInt(0),
+ alloc: Bool = Bool(true))
+ (implicit p: Parameters): Acquire = {
+ Acquire(
+ is_builtin_type = Bool(true),
+ a_type = a_type,
+ client_xact_id = client_xact_id,
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ data = data,
+ union = Acquire.makeUnion(a_type, addr_byte, operand_size, opcode, wmask, alloc))
+ }
+}
+
+/** Get a single beat of data from the outer memory hierarchy
+ *
+ * The client can hint whether he block containing this beat should be
+ * allocated in the intervening levels of the hierarchy.
+ *
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ * @param addr_beat sub-block address (which beat)
+ * @param addr_byte sub-block address (which byte)
+ * @param operand_size {byte, half, word, double} from [[uncore.MemoryOpConstants]]
+ * @param alloc hint whether the block should be allocated in intervening caches
+ */
+object Get {
+ def apply(
+ client_xact_id: UInt,
+ addr_block: UInt,
+ addr_beat: UInt,
+ alloc: Bool = Bool(true))
+ (implicit p: Parameters): Acquire = {
+ BuiltInAcquireBuilder(
+ a_type = Acquire.getType,
+ client_xact_id = client_xact_id,
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ opcode = M_XRD,
+ alloc = alloc)
+ }
+ def apply(
+ client_xact_id: UInt,
+ addr_block: UInt,
+ addr_beat: UInt,
+ addr_byte: UInt,
+ operand_size: UInt,
+ alloc: Bool)
+ (implicit p: Parameters): Acquire = {
+ BuiltInAcquireBuilder(
+ a_type = Acquire.getType,
+ client_xact_id = client_xact_id,
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ addr_byte = addr_byte,
+ operand_size = operand_size,
+ opcode = M_XRD,
+ alloc = alloc)
+ }
+}
+
+/** Get a whole cache block of data from the outer memory hierarchy
+ *
+ * The client can hint whether the block should be allocated in the
+ * intervening levels of the hierarchy.
+ *
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ * @param alloc hint whether the block should be allocated in intervening caches
+ */
+object GetBlock {
+ def apply(
+ client_xact_id: UInt = UInt(0),
+ addr_block: UInt,
+ alloc: Bool = Bool(true))
+ (implicit p: Parameters): Acquire = {
+ BuiltInAcquireBuilder(
+ a_type = Acquire.getBlockType,
+ client_xact_id = client_xact_id,
+ addr_block = addr_block,
+ opcode = M_XRD,
+ alloc = alloc)
+ }
+}
+
+/** Prefetch a cache block into the next-outermost level of the memory hierarchy
+ * with read permissions.
+ *
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ */
+object GetPrefetch {
+ def apply(
+ client_xact_id: UInt,
+ addr_block: UInt)
+ (implicit p: Parameters): Acquire = {
+ BuiltInAcquireBuilder(
+ a_type = Acquire.getPrefetchType,
+ client_xact_id = client_xact_id,
+ addr_block = addr_block)
+ }
+}
+
+/** Put a single beat of data into the outer memory hierarchy
+ *
+ * The block will be allocated in the next-outermost level of the hierarchy.
+ *
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ * @param addr_beat sub-block address (which beat)
+ * @param data data being refilled to the original requestor
+ * @param wmask per-byte write mask for this beat
+ * @param alloc hint whether the block should be allocated in intervening caches
+ */
+object Put {
+ def apply(
+ client_xact_id: UInt,
+ addr_block: UInt,
+ addr_beat: UInt,
+ data: UInt,
+ wmask: Option[UInt]= None,
+ alloc: Bool = Bool(true))
+ (implicit p: Parameters): Acquire = {
+ BuiltInAcquireBuilder(
+ a_type = Acquire.putType,
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ client_xact_id = client_xact_id,
+ data = data,
+ wmask = wmask.getOrElse(Acquire.fullWriteMask),
+ alloc = alloc)
+ }
+}
+
+/** Put a whole cache block of data into the outer memory hierarchy
+ *
+ * If the write mask is not full, the block will be allocated in the
+ * next-outermost level of the hierarchy. If the write mask is full, the
+ * client can hint whether the block should be allocated or not.
+ *
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ * @param addr_beat sub-block address (which beat of several)
+ * @param data data being refilled to the original requestor
+ * @param wmask per-byte write mask for this beat
+ * @param alloc hint whether the block should be allocated in intervening caches
+ */
+object PutBlock {
+ def apply(
+ client_xact_id: UInt,
+ addr_block: UInt,
+ addr_beat: UInt,
+ data: UInt,
+ wmask: Option[UInt] = None,
+ alloc: Bool = Bool(true))
+ (implicit p: Parameters): Acquire = {
+ BuiltInAcquireBuilder(
+ a_type = Acquire.putBlockType,
+ client_xact_id = client_xact_id,
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ data = data,
+ wmask = wmask.getOrElse(Acquire.fullWriteMask),
+ alloc = alloc)
+ }
+}
+
+/** Prefetch a cache block into the next-outermost level of the memory hierarchy
+ * with write permissions.
+ *
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ */
+object PutPrefetch {
+ def apply(
+ client_xact_id: UInt,
+ addr_block: UInt)
+ (implicit p: Parameters): Acquire = {
+ BuiltInAcquireBuilder(
+ a_type = Acquire.putPrefetchType,
+ client_xact_id = client_xact_id,
+ addr_block = addr_block)
+ }
+}
+
+/** Perform an atomic memory operation in the next-outermost level of the memory hierarchy
+ *
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ * @param addr_beat sub-block address (within which beat)
+ * @param addr_byte sub-block address (which byte)
+ * @param atomic_opcode {swap, add, xor, and, min, max, minu, maxu} from [[uncore.MemoryOpConstants]]
+ * @param operand_size {byte, half, word, double} from [[uncore.MemoryOpConstants]]
+ * @param data source operand data
+ */
+object PutAtomic {
+ def apply(
+ client_xact_id: UInt,
+ addr_block: UInt,
+ addr_beat: UInt,
+ addr_byte: UInt,
+ atomic_opcode: UInt,
+ operand_size: UInt,
+ data: UInt)
+ (implicit p: Parameters): Acquire = {
+ BuiltInAcquireBuilder(
+ a_type = Acquire.putAtomicType,
+ client_xact_id = client_xact_id,
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ data = data,
+ addr_byte = addr_byte,
+ operand_size = operand_size,
+ opcode = atomic_opcode)
+ }
+}
+
+/** The Probe channel is used to force clients to release data or cede permissions
+ * on a cache block. Clients respond to Probes with [[uncore.Release]] messages.
+ * The available types of Probes are customized by a particular
+ * [[uncore.CoherencePolicy]].
+ */
+class Probe(implicit p: Parameters) extends ManagerToClientChannel
+ with HasCacheBlockAddress
+ with HasProbeType
+
+/** [[uncore.Probe]] with an extra field stating its destination id */
+class ProbeToDst(implicit p: Parameters) extends Probe()(p) with HasClientId
+
+/** Contains factories for [[uncore.Probe]] and [[uncore.ProbeToDst]]
+ *
+ * In general you should avoid using these factories directly and use
+ * [[uncore.ManagerMetadata.makeProbe(UInt,Acquire)* makeProbe]] instead.
+ *
+ * @param dst id of client to which probe should be sent
+ * @param p_type custom probe type
+ * @param addr_block address of the cache block
+ */
+object Probe {
+ def apply(p_type: UInt, addr_block: UInt)(implicit p: Parameters): Probe = {
+ val prb = Wire(new Probe)
+ prb.p_type := p_type
+ prb.addr_block := addr_block
+ prb
+ }
+ def apply(dst: UInt, p_type: UInt, addr_block: UInt)(implicit p: Parameters): ProbeToDst = {
+ val prb = Wire(new ProbeToDst)
+ prb.client_id := dst
+ prb.p_type := p_type
+ prb.addr_block := addr_block
+ prb
+ }
+}
+
+/** The Release channel is used to release data or permission back to the manager
+ * in response to [[uncore.Probe]] messages. It can also be used to voluntarily
+ * write back data, for example in the event that dirty data must be evicted on
+ * a cache miss. The available types of Release messages are always customized by
+ * a particular [[uncore.CoherencePolicy]]. Releases may contain data or may be
+ * simple acknowledgements. Voluntary Releases are acknowledged with [[uncore.Grant Grants]].
+ */
+class ReleaseMetadata(implicit p: Parameters) extends ClientToManagerChannel
+ with HasTileLinkBeatId
+ with HasCacheBlockAddress
+ with HasClientTransactionId
+ with HasReleaseType {
+ def full_addr(dummy: Int = 0) = Cat(this.addr_block, this.addr_beat, UInt(0, width = tlByteAddrBits))
+}
+
+/** [[uncore.ReleaseMetadata]] with an extra field containing the data beat */
+class Release(implicit p: Parameters) extends ReleaseMetadata
+ with HasTileLinkData
+
+/** [[uncore.ReleaseMetadata]] with an extra field containing the entire cache block */
+class BufferedRelease(implicit p: Parameters) extends ReleaseMetadata
+ with HasTileLinkBlock
+
+/** [[uncore.Release]] with an extra field stating its source id */
+class ReleaseFromSrc(implicit p: Parameters) extends Release
+ with HasClientId
+
+/** [[uncore.BufferedRelease]] with an extra field stating its source id */
+class BufferedReleaseFromSrc(implicit p: Parameters) extends BufferedRelease
+ with HasClientId
+
+/** Contains a [[uncore.Release]] factory
+ *
+ * In general you should avoid using this factory directly and use
+ * [[uncore.ClientMetadata.makeRelease]] instead.
+ *
+ * @param voluntary is this a voluntary writeback
+ * @param r_type type enum defined by coherence protocol
+ * @param client_xact_id client's transaction id
+ * @param addr_block address of the cache block
+ * @param addr_beat beat id of the data
+ * @param data data being written back
+ */
+object Release {
+ def apply(
+ voluntary: Bool,
+ r_type: UInt,
+ client_xact_id: UInt,
+ addr_block: UInt,
+ addr_beat: UInt,
+ data: UInt)
+ (implicit p: Parameters): Release = {
+ val rel = Wire(new Release)
+ rel.r_type := r_type
+ rel.client_xact_id := client_xact_id
+ rel.addr_block := addr_block
+ rel.addr_beat := addr_beat
+ rel.data := data
+ rel.voluntary := voluntary
+ rel
+ }
+
+ def apply(
+ src: UInt,
+ voluntary: Bool,
+ r_type: UInt,
+ client_xact_id: UInt,
+ addr_block: UInt,
+ addr_beat: UInt = UInt(0),
+ data: UInt = UInt(0))
+ (implicit p: Parameters): ReleaseFromSrc = {
+ val rel = Wire(new ReleaseFromSrc)
+ rel.client_id := src
+ rel.voluntary := voluntary
+ rel.r_type := r_type
+ rel.client_xact_id := client_xact_id
+ rel.addr_block := addr_block
+ rel.addr_beat := addr_beat
+ rel.data := data
+ rel
+ }
+}
+
+/** The Grant channel is used to refill data or grant permissions requested of the
+ * manager agent via an [[uncore.Acquire]] message. It is also used to acknowledge
+ * the receipt of voluntary writeback from clients in the form of [[uncore.Release]]
+ * messages. There are built-in Grant messages used for Gets and Puts, and
+ * coherence policies may also define custom Grant types. Grants may contain data
+ * or may be simple acknowledgements. Grants are responded to with [[uncore.Finish]].
+ */
+class GrantMetadata(implicit p: Parameters) extends ManagerToClientChannel
+ with HasTileLinkBeatId
+ with HasClientTransactionId
+ with HasManagerTransactionId
+ with HasGrantType {
+ def makeFinish(dummy: Int = 0): Finish = {
+ val f = Wire(new Finish)
+ f.manager_xact_id := this.manager_xact_id
+ f
+ }
+}
+
+/** [[uncore.GrantMetadata]] with an extra field containing a single beat of data */
+class Grant(implicit p: Parameters) extends GrantMetadata
+ with HasTileLinkData
+
+/** [[uncore.Grant]] with an extra field stating its destination */
+class GrantToDst(implicit p: Parameters) extends Grant
+ with HasClientId
+
+/** [[uncore.Grant]] with an extra field stating its destination */
+class GrantFromSrc(implicit p: Parameters) extends Grant
+ with HasManagerId {
+ override def makeFinish(dummy: Int = 0): FinishToDst = {
+ val f = Wire(new FinishToDst)
+ f.manager_xact_id := this.manager_xact_id
+ f.manager_id := this.manager_id
+ f
+ }
+}
+
+/** [[uncore.GrantMetadata]] with an extra field containing an entire cache block */
+class BufferedGrant(implicit p: Parameters) extends GrantMetadata
+ with HasTileLinkBlock
+
+/** [[uncore.BufferedGrant]] with an extra field stating its destination */
+class BufferedGrantToDst(implicit p: Parameters) extends BufferedGrant
+ with HasClientId
+
+/** Contains definitions of the the built-in grant types and factories
+ * for [[uncore.Grant]] and [[uncore.GrantToDst]]
+ *
+ * In general you should avoid using these factories directly and use
+ * [[uncore.ManagerMetadata.makeGrant(uncore.AcquireFromSrc* makeGrant]] instead.
+ *
+ * @param dst id of client to which grant should be sent
+ * @param is_builtin_type built-in or custom type message?
+ * @param g_type built-in type enum or custom type enum
+ * @param client_xact_id client's transaction id
+ * @param manager_xact_id manager's transaction id
+ * @param addr_beat beat id of the data
+ * @param data data being refilled to the original requestor
+ */
+object Grant {
+ val nBuiltInTypes = 5
+ def voluntaryAckType = UInt("b000") // For acking Releases
+ def prefetchAckType = UInt("b001") // For acking any kind of Prefetch
+ def putAckType = UInt("b011") // For acking any kind of non-prfetch Put
+ def getDataBeatType = UInt("b100") // Supplying a single beat of Get
+ def getDataBlockType = UInt("b101") // Supplying all beats of a GetBlock
+ def typesWithData = Vec(getDataBlockType, getDataBeatType)
+ def typesWithMultibeatData= Vec(getDataBlockType)
+
+ def apply(
+ is_builtin_type: Bool,
+ g_type: UInt,
+ client_xact_id: UInt,
+ manager_xact_id: UInt,
+ addr_beat: UInt,
+ data: UInt)
+ (implicit p: Parameters): Grant = {
+ val gnt = Wire(new Grant)
+ gnt.is_builtin_type := is_builtin_type
+ gnt.g_type := g_type
+ gnt.client_xact_id := client_xact_id
+ gnt.manager_xact_id := manager_xact_id
+ gnt.addr_beat := addr_beat
+ gnt.data := data
+ gnt
+ }
+
+ def apply(
+ dst: UInt,
+ is_builtin_type: Bool,
+ g_type: UInt,
+ client_xact_id: UInt,
+ manager_xact_id: UInt,
+ addr_beat: UInt = UInt(0),
+ data: UInt = UInt(0))
+ (implicit p: Parameters): GrantToDst = {
+ val gnt = Wire(new GrantToDst)
+ gnt.client_id := dst
+ gnt.is_builtin_type := is_builtin_type
+ gnt.g_type := g_type
+ gnt.client_xact_id := client_xact_id
+ gnt.manager_xact_id := manager_xact_id
+ gnt.addr_beat := addr_beat
+ gnt.data := data
+ gnt
+ }
+}
+
+/** The Finish channel is used to provide a global ordering of transactions
+ * in networks that do not guarantee point-to-point ordering of messages.
+ * A Finsish message is sent as acknowledgement of receipt of a [[uncore.Grant]].
+ * When a Finish message is received, a manager knows it is safe to begin
+ * processing other transactions that touch the same cache block.
+ */
+class Finish(implicit p: Parameters) extends ClientToManagerChannel()(p)
+ with HasManagerTransactionId {
+ def hasData(dummy: Int = 0) = Bool(false)
+ def hasMultibeatData(dummy: Int = 0) = Bool(false)
+}
+
+/** [[uncore.Finish]] with an extra field stating its destination */
+class FinishToDst(implicit p: Parameters) extends Finish
+ with HasManagerId
+
+/** Complete IO definition for incoherent TileLink, including networking headers */
+class UncachedTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
+ val acquire = new DecoupledIO(new LogicalNetworkIO(new Acquire))
+ val grant = new DecoupledIO(new LogicalNetworkIO(new Grant)).flip
+ val finish = new DecoupledIO(new LogicalNetworkIO(new Finish))
+}
+
+/** Complete IO definition for coherent TileLink, including networking headers */
+class TileLinkIO(implicit p: Parameters) extends UncachedTileLinkIO()(p) {
+ val probe = new DecoupledIO(new LogicalNetworkIO(new Probe)).flip
+ val release = new DecoupledIO(new LogicalNetworkIO(new Release))
+}
+
+/** This version of UncachedTileLinkIO does not contain network headers.
+ * It is intended for use within client agents.
+ *
+ * Headers are provided in the top-level that instantiates the clients and network,
+ * probably using a [[uncore.ClientTileLinkNetworkPort]] module.
+ * By eliding the header subbundles within the clients we can enable
+ * hierarchical P-and-R while minimizing unconnected port errors in GDS.
+ *
+ * Secondly, this version of the interface elides [[uncore.Finish]] messages, with the
+ * assumption that a [[uncore.FinishUnit]] has been coupled to the TileLinkIO port
+ * to deal with acking received [[uncore.Grant Grants]].
+ */
+class ClientUncachedTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
+ val acquire = new DecoupledIO(new Acquire)
+ val grant = new DecoupledIO(new Grant).flip
+}
+
+/** This version of TileLinkIO does not contain network headers.
+ * It is intended for use within client agents.
+ */
+class ClientTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
+ val acquire = new DecoupledIO(new Acquire)
+ val probe = new DecoupledIO(new Probe).flip
+ val release = new DecoupledIO(new Release)
+ val grant = new DecoupledIO(new GrantFromSrc).flip
+ val finish = new DecoupledIO(new FinishToDst)
+}
+
+/** This version of TileLinkIO does not contain network headers, but
+ * every channel does include an extra client_id subbundle.
+ * It is intended for use within Management agents.
+ *
+ * Managers need to track where [[uncore.Acquire]] and [[uncore.Release]] messages
+ * originated so that they can send a [[uncore.Grant]] to the right place.
+ * Similarly they must be able to issues Probes to particular clients.
+ * However, we'd still prefer to have [[uncore.ManagerTileLinkNetworkPort]] fill in
+ * the header.src to enable hierarchical p-and-r of the managers. Additionally,
+ * coherent clients might be mapped to random network port ids, and we'll leave it to the
+ * [[uncore.ManagerTileLinkNetworkPort]] to apply the correct mapping. Managers do need to
+ * see Finished so they know when to allow new transactions on a cache
+ * block to proceed.
+ */
+class ManagerTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
+ val acquire = new DecoupledIO(new AcquireFromSrc).flip
+ val grant = new DecoupledIO(new GrantToDst)
+ val finish = new DecoupledIO(new Finish).flip
+ val probe = new DecoupledIO(new ProbeToDst)
+ val release = new DecoupledIO(new ReleaseFromSrc).flip
+}
diff --git a/uncore/src/main/scala/tilelink/Interconnect.scala b/uncore/src/main/scala/tilelink/Interconnect.scala
new file mode 100644
index 00000000..353dbb80
--- /dev/null
+++ b/uncore/src/main/scala/tilelink/Interconnect.scala
@@ -0,0 +1,386 @@
+package uncore.tilelink
+
+import Chisel._
+import junctions._
+import scala.collection.mutable.ArraySeq
+import uncore.util._
+import cde.{Parameters, Field}
+
+
+/** PortedTileLinkNetworks combine a TileLink protocol with a particular physical
+ * network implementation.
+ *
+ * Specifically, they provide mappings between ClientTileLinkIO/
+ * ManagerTileLinkIO channels and LogicalNetwork ports (i.e. generic
+ * TileLinkIO with networking headers). Channels coming into the network have
+ * appropriate networking headers appended and outgoing channels have their
+ * headers stripped.
+ *
+ * @constructor base class constructor for Ported TileLink NoC
+ * @param addrToManagerId a mapping from a physical address to the network
+ * id of a coherence manager
+ * @param sharerToClientId a mapping from the id of a particular coherent
+ * client (as determined by e.g. the directory) and the network id
+ * of that client
+ * @param clientDepths the depths of the queue that should be used to buffer
+ * each channel on the client side of the network
+ * @param managerDepths the depths of the queue that should be used to buffer
+ * each channel on the manager side of the network
+ */
+abstract class PortedTileLinkNetwork(
+ addrToManagerId: UInt => UInt,
+ sharerToClientId: UInt => UInt,
+ clientDepths: TileLinkDepths,
+ managerDepths: TileLinkDepths)
+ (implicit p: Parameters) extends TLModule()(p) {
+ val nClients = tlNClients
+ val nManagers = tlNManagers
+ val io = new Bundle {
+ val clients_cached = Vec(tlNCachingClients, new ClientTileLinkIO).flip
+ val clients_uncached = Vec(tlNCachelessClients, new ClientUncachedTileLinkIO).flip
+ val managers = Vec(nManagers, new ManagerTileLinkIO).flip
+ }
+
+ val clients = (io.clients_cached ++ io.clients_uncached).zipWithIndex.map {
+ case (io, idx) => {
+ val qs = Module(new TileLinkEnqueuer(clientDepths))
+ io match {
+ case c: ClientTileLinkIO => {
+ val port = Module(new ClientTileLinkNetworkPort(idx, addrToManagerId))
+ port.io.client <> c
+ qs.io.client <> port.io.network
+ qs.io.manager
+ }
+ case u: ClientUncachedTileLinkIO => {
+ val port = Module(new ClientUncachedTileLinkNetworkPort(idx, addrToManagerId))
+ port.io.client <> u
+ qs.io.client <> port.io.network
+ qs.io.manager
+ }
+ }
+ }
+ }
+
+ val managers = io.managers.zipWithIndex.map {
+ case (m, i) => {
+ val port = Module(new ManagerTileLinkNetworkPort(i, sharerToClientId))
+ val qs = Module(new TileLinkEnqueuer(managerDepths))
+ port.io.manager <> m
+ port.io.network <> qs.io.manager
+ qs.io.client
+ }
+ }
+}
+
+/** A simple arbiter for each channel that also deals with header-based routing.
+ * Assumes a single manager agent. */
+class PortedTileLinkArbiter(
+ sharerToClientId: UInt => UInt = (u: UInt) => u,
+ clientDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0),
+ managerDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0))
+ (implicit p: Parameters)
+ extends PortedTileLinkNetwork(u => UInt(0), sharerToClientId, clientDepths, managerDepths)(p)
+ with TileLinkArbiterLike
+ with PassesId {
+ val arbN = nClients
+ require(nManagers == 1)
+ if(arbN > 1) {
+ hookupClientSource(clients.map(_.acquire), managers.head.acquire)
+ hookupClientSource(clients.map(_.release), managers.head.release)
+ hookupFinish(clients.map(_.finish), managers.head.finish)
+ hookupManagerSourceWithHeader(clients.map(_.probe), managers.head.probe)
+ hookupManagerSourceWithHeader(clients.map(_.grant), managers.head.grant)
+ } else {
+ managers.head <> clients.head
+ }
+}
+
+/** Provides a separate physical crossbar for each channel. Assumes multiple manager
+ * agents. Managers are assigned to higher physical network port ids than
+ * clients, and translations between logical network id and physical crossbar
+ * port id are done automatically.
+ */
+class PortedTileLinkCrossbar(
+ addrToManagerId: UInt => UInt = u => UInt(0),
+ sharerToClientId: UInt => UInt = u => u,
+ clientDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0),
+ managerDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0))
+ (implicit p: Parameters)
+ extends PortedTileLinkNetwork(addrToManagerId, sharerToClientId, clientDepths, managerDepths)(p) {
+ val n = p(LNEndpoints)
+ val phyHdrWidth = log2Up(n)
+ val count = tlDataBeats
+ // Actually instantiate the particular networks required for TileLink
+ val acqNet = Module(new BasicBus(CrossbarConfig(n, new Acquire, count, Some((a: PhysicalNetworkIO[Acquire]) => a.payload.hasMultibeatData()))))
+ val relNet = Module(new BasicBus(CrossbarConfig(n, new Release, count, Some((r: PhysicalNetworkIO[Release]) => r.payload.hasMultibeatData()))))
+ val prbNet = Module(new BasicBus(CrossbarConfig(n, new Probe)))
+ val gntNet = Module(new BasicBus(CrossbarConfig(n, new Grant, count, Some((g: PhysicalNetworkIO[Grant]) => g.payload.hasMultibeatData()))))
+ val ackNet = Module(new BasicBus(CrossbarConfig(n, new Finish)))
+
+ // Aliases for the various network IO bundle types
+ type PNIO[T <: Data] = DecoupledIO[PhysicalNetworkIO[T]]
+ type LNIO[T <: Data] = DecoupledIO[LogicalNetworkIO[T]]
+ type FromCrossbar[T <: Data] = PNIO[T] => LNIO[T]
+ type ToCrossbar[T <: Data] = LNIO[T] => PNIO[T]
+
+ // Shims for converting between logical network IOs and physical network IOs
+ def crossbarToManagerShim[T <: Data](in: PNIO[T]): LNIO[T] = {
+ val out = DefaultFromPhysicalShim(in)
+ out.bits.header.src := in.bits.header.src - UInt(nManagers)
+ out
+ }
+ def crossbarToClientShim[T <: Data](in: PNIO[T]): LNIO[T] = {
+ val out = DefaultFromPhysicalShim(in)
+ out.bits.header.dst := in.bits.header.dst - UInt(nManagers)
+ out
+ }
+ def managerToCrossbarShim[T <: Data](in: LNIO[T]): PNIO[T] = {
+ val out = DefaultToPhysicalShim(n, in)
+ out.bits.header.dst := in.bits.header.dst + UInt(nManagers, phyHdrWidth)
+ out
+ }
+ def clientToCrossbarShim[T <: Data](in: LNIO[T]): PNIO[T] = {
+ val out = DefaultToPhysicalShim(n, in)
+ out.bits.header.src := in.bits.header.src + UInt(nManagers, phyHdrWidth)
+ out
+ }
+
+ // Make an individual connection between virtual and physical ports using
+ // a particular shim. Also pin the unused Decoupled control signal low.
+ def doDecoupledInputHookup[T <: Data](phys_in: PNIO[T], phys_out: PNIO[T], log_io: LNIO[T], shim: ToCrossbar[T]) = {
+ val s = shim(log_io)
+ phys_in.valid := s.valid
+ phys_in.bits := s.bits
+ s.ready := phys_in.ready
+ phys_out.ready := Bool(false)
+ }
+
+ def doDecoupledOutputHookup[T <: Data](phys_in: PNIO[T], phys_out: PNIO[T], log_io: LNIO[T], shim: FromCrossbar[T]) = {
+ val s = shim(phys_out)
+ log_io.valid := s.valid
+ log_io.bits := s.bits
+ s.ready := log_io.ready
+ phys_in.valid := Bool(false)
+ }
+
+ //Hookup all instances of a particular subbundle of TileLink
+ def doDecoupledHookups[T <: Data](physIO: BasicCrossbarIO[T], getLogIO: TileLinkIO => LNIO[T]) = {
+ physIO.in.head.bits.payload match {
+ case c: ClientToManagerChannel => {
+ managers.zipWithIndex.map { case (i, id) =>
+ doDecoupledOutputHookup(physIO.in(id), physIO.out(id), getLogIO(i), crossbarToManagerShim[T])
+ }
+ clients.zipWithIndex.map { case (i, id) =>
+ doDecoupledInputHookup(physIO.in(id+nManagers), physIO.out(id+nManagers), getLogIO(i), clientToCrossbarShim[T])
+ }
+ }
+ case m: ManagerToClientChannel => {
+ managers.zipWithIndex.map { case (i, id) =>
+ doDecoupledInputHookup(physIO.in(id), physIO.out(id), getLogIO(i), managerToCrossbarShim[T])
+ }
+ clients.zipWithIndex.map { case (i, id) =>
+ doDecoupledOutputHookup(physIO.in(id+nManagers), physIO.out(id+nManagers), getLogIO(i), crossbarToClientShim[T])
+ }
+ }
+ }
+ }
+
+ doDecoupledHookups(acqNet.io, (tl: TileLinkIO) => tl.acquire)
+ doDecoupledHookups(relNet.io, (tl: TileLinkIO) => tl.release)
+ doDecoupledHookups(prbNet.io, (tl: TileLinkIO) => tl.probe)
+ doDecoupledHookups(gntNet.io, (tl: TileLinkIO) => tl.grant)
+ doDecoupledHookups(ackNet.io, (tl: TileLinkIO) => tl.finish)
+}
+
+class ClientUncachedTileLinkIORouter(
+ nOuter: Int, routeSel: UInt => UInt)(implicit p: Parameters)
+ extends TLModule {
+
+ val io = new Bundle {
+ val in = (new ClientUncachedTileLinkIO).flip
+ val out = Vec(nOuter, new ClientUncachedTileLinkIO)
+ }
+
+ val acq_route = routeSel(io.in.acquire.bits.full_addr())
+
+ io.in.acquire.ready := Bool(false)
+
+ io.out.zipWithIndex.foreach { case (out, i) =>
+ out.acquire.valid := io.in.acquire.valid && acq_route(i)
+ out.acquire.bits := io.in.acquire.bits
+ when (acq_route(i)) { io.in.acquire.ready := out.acquire.ready }
+ }
+
+ val gnt_arb = Module(new LockingRRArbiter(
+ new Grant, nOuter, tlDataBeats, Some((gnt: Grant) => gnt.hasMultibeatData())))
+ gnt_arb.io.in <> io.out.map(_.grant)
+ io.in.grant <> gnt_arb.io.out
+
+ assert(!io.in.acquire.valid || acq_route.orR, "No valid route")
+}
+
+class TileLinkInterconnectIO(val nInner: Int, val nOuter: Int)
+ (implicit p: Parameters) extends Bundle {
+ val in = Vec(nInner, new ClientUncachedTileLinkIO).flip
+ val out = Vec(nOuter, new ClientUncachedTileLinkIO)
+}
+
+class ClientUncachedTileLinkIOCrossbar(
+ nInner: Int, nOuter: Int, routeSel: UInt => UInt)
+ (implicit p: Parameters) extends TLModule {
+
+ val io = new TileLinkInterconnectIO(nInner, nOuter)
+
+ if (nInner == 1) {
+ val router = Module(new ClientUncachedTileLinkIORouter(nOuter, routeSel))
+ router.io.in <> io.in.head
+ io.out <> router.io.out
+ } else {
+ val routers = List.fill(nInner) {
+ Module(new ClientUncachedTileLinkIORouter(nOuter, routeSel)) }
+ val arbiters = List.fill(nOuter) {
+ Module(new ClientUncachedTileLinkIOArbiter(nInner)) }
+
+ for (i <- 0 until nInner) {
+ routers(i).io.in <> io.in(i)
+ }
+
+ for (i <- 0 until nOuter) {
+ arbiters(i).io.in <> routers.map(r => r.io.out(i))
+ io.out(i) <> arbiters(i).io.out
+ }
+ }
+}
+
+abstract class TileLinkInterconnect(implicit p: Parameters) extends TLModule()(p) {
+ val nInner: Int
+ val nOuter: Int
+
+ lazy val io = new TileLinkInterconnectIO(nInner, nOuter)
+}
+
+class TileLinkRecursiveInterconnect(val nInner: Int, addrMap: AddrMap)
+ (implicit p: Parameters) extends TileLinkInterconnect()(p) {
+ def port(name: String) = io.out(addrMap.port(name))
+ val nOuter = addrMap.numSlaves
+ val routeSel = (addr: UInt) =>
+ Cat(addrMap.entries.map(e => addrMap(e.name).containsAddress(addr)).reverse)
+
+ val xbar = Module(new ClientUncachedTileLinkIOCrossbar(nInner, addrMap.length, routeSel))
+ xbar.io.in <> io.in
+
+ io.out <> addrMap.entries.zip(xbar.io.out).flatMap {
+ case (entry, xbarOut) => {
+ entry.region match {
+ case submap: AddrMap if submap.isEmpty =>
+ xbarOut.acquire.ready := Bool(false)
+ xbarOut.grant.valid := Bool(false)
+ None
+ case submap: AddrMap =>
+ val ic = Module(new TileLinkRecursiveInterconnect(1, submap))
+ ic.io.in.head <> xbarOut
+ ic.io.out
+ case _ =>
+ Some(xbarOut)
+ }
+ }
+ }
+}
+
+class TileLinkMemoryInterconnect(
+ nBanksPerChannel: Int, nChannels: Int)
+ (implicit p: Parameters) extends TileLinkInterconnect()(p) {
+
+ val nBanks = nBanksPerChannel * nChannels
+ val nInner = nBanks
+ val nOuter = nChannels
+
+ def connectChannel(outer: ClientUncachedTileLinkIO, inner: ClientUncachedTileLinkIO) {
+ outer <> inner
+ outer.acquire.bits.addr_block := inner.acquire.bits.addr_block >> UInt(log2Ceil(nChannels))
+ }
+
+ for (i <- 0 until nChannels) {
+ /* Bank assignments to channels are strided so that consecutive banks
+ * map to different channels. That way, consecutive cache lines also
+ * map to different channels */
+ val banks = (i until nBanks by nChannels).map(j => io.in(j))
+
+ val channelArb = Module(new ClientUncachedTileLinkIOArbiter(nBanksPerChannel))
+ channelArb.io.in <> banks
+ connectChannel(io.out(i), channelArb.io.out)
+ }
+}
+
+/** Allows users to switch between various memory configurations. Note that
+ * this is a dangerous operation: not only does switching the select input to
+ * this module violate TileLink, it also causes the memory of the machine to
+ * become garbled. It's expected that select only changes at boot time, as
+ * part of the memory controller configuration. */
+class TileLinkMemorySelectorIO(val nBanks: Int, val maxMemChannels: Int, nConfigs: Int)
+ (implicit p: Parameters)
+ extends TileLinkInterconnectIO(nBanks, maxMemChannels) {
+ val select = UInt(INPUT, width = log2Up(nConfigs))
+ override def cloneType =
+ new TileLinkMemorySelectorIO(nBanks, maxMemChannels, nConfigs).asInstanceOf[this.type]
+}
+
+class TileLinkMemorySelector(nBanks: Int, maxMemChannels: Int, configs: Seq[Int])
+ (implicit p: Parameters)
+ extends TileLinkInterconnect()(p) {
+ val nInner = nBanks
+ val nOuter = maxMemChannels
+ val nConfigs = configs.size
+
+ override lazy val io = new TileLinkMemorySelectorIO(nBanks, maxMemChannels, nConfigs)
+
+ def muxOnSelect[T <: Data](up: DecoupledIO[T], dn: DecoupledIO[T], active: Bool): Unit = {
+ when (active) { dn.bits := up.bits }
+ when (active) { up.ready := dn.ready }
+ when (active) { dn.valid := up.valid }
+ }
+
+ def muxOnSelect(up: ClientUncachedTileLinkIO, dn: ClientUncachedTileLinkIO, active: Bool): Unit = {
+ muxOnSelect(up.acquire, dn.acquire, active)
+ muxOnSelect(dn.grant, up.grant, active)
+ }
+
+ def muxOnSelect(up: Vec[ClientUncachedTileLinkIO], dn: Vec[ClientUncachedTileLinkIO], active: Bool) : Unit = {
+ for (i <- 0 until up.size)
+ muxOnSelect(up(i), dn(i), active)
+ }
+
+ /* Disconnects a vector of TileLink ports, which involves setting them to
+ * invalid. Due to Chisel reasons, we need to also set the bits to 0 (since
+ * there can't be any unconnected inputs). */
+ def disconnectOuter(outer: Vec[ClientUncachedTileLinkIO]) = {
+ outer.foreach{ m =>
+ m.acquire.valid := Bool(false)
+ m.acquire.bits := m.acquire.bits.fromBits(UInt(0))
+ m.grant.ready := Bool(false)
+ }
+ }
+
+ def disconnectInner(inner: Vec[ClientUncachedTileLinkIO]) = {
+ inner.foreach { m =>
+ m.grant.valid := Bool(false)
+ m.grant.bits := m.grant.bits.fromBits(UInt(0))
+ m.acquire.ready := Bool(false)
+ }
+ }
+
+ /* Provides default wires on all our outputs. */
+ disconnectOuter(io.out)
+ disconnectInner(io.in)
+
+ /* Constructs interconnects for each of the layouts suggested by the
+ * configuration and switches between them based on the select input. */
+ configs.zipWithIndex.foreach{ case (nChannels, select) =>
+ val nBanksPerChannel = nBanks / nChannels
+ val ic = Module(new TileLinkMemoryInterconnect(nBanksPerChannel, nChannels))
+ disconnectInner(ic.io.out)
+ disconnectOuter(ic.io.in)
+ muxOnSelect(io.in, ic.io.in, io.select === UInt(select))
+ muxOnSelect(ic.io.out, io.out, io.select === UInt(select))
+ }
+}
diff --git a/uncore/src/main/scala/tilelink/Network.scala b/uncore/src/main/scala/tilelink/Network.scala
new file mode 100644
index 00000000..1c094013
--- /dev/null
+++ b/uncore/src/main/scala/tilelink/Network.scala
@@ -0,0 +1,308 @@
+// See LICENSE for license details.
+
+package uncore.tilelink
+
+import Chisel._
+import uncore.util._
+import cde.{Parameters, Field}
+
+case object LNEndpoints extends Field[Int]
+case object LNHeaderBits extends Field[Int]
+
+class PhysicalHeader(n: Int) extends Bundle {
+ val src = UInt(width = log2Up(n))
+ val dst = UInt(width = log2Up(n))
+}
+
+class PhysicalNetworkIO[T <: Data](n: Int, dType: T) extends Bundle {
+ val header = new PhysicalHeader(n)
+ val payload = dType.cloneType
+ override def cloneType = new PhysicalNetworkIO(n,dType).asInstanceOf[this.type]
+}
+
+class BasicCrossbarIO[T <: Data](n: Int, dType: T) extends Bundle {
+ val in = Vec(n, Decoupled(new PhysicalNetworkIO(n,dType))).flip
+ val out = Vec(n, Decoupled(new PhysicalNetworkIO(n,dType)))
+}
+
+abstract class PhysicalNetwork extends Module
+
+case class CrossbarConfig[T <: Data](n: Int, dType: T, count: Int = 1, needsLock: Option[PhysicalNetworkIO[T] => Bool] = None)
+
+abstract class AbstractCrossbar[T <: Data](conf: CrossbarConfig[T]) extends PhysicalNetwork {
+ val io = new BasicCrossbarIO(conf.n, conf.dType)
+}
+
+class BasicBus[T <: Data](conf: CrossbarConfig[T]) extends AbstractCrossbar(conf) {
+ val arb = Module(new LockingRRArbiter(io.in(0).bits, conf.n, conf.count, conf.needsLock))
+ arb.io.in <> io.in
+
+ arb.io.out.ready := io.out(arb.io.out.bits.header.dst).ready
+ for ((out, i) <- io.out zipWithIndex) {
+ out.valid := arb.io.out.valid && arb.io.out.bits.header.dst === UInt(i)
+ out.bits := arb.io.out.bits
+ }
+}
+
+class BasicCrossbar[T <: Data](conf: CrossbarConfig[T]) extends AbstractCrossbar(conf) {
+ io.in.foreach { _.ready := Bool(false) }
+
+ io.out.zipWithIndex.map{ case (out, i) => {
+ val rrarb = Module(new LockingRRArbiter(io.in(0).bits, conf.n, conf.count, conf.needsLock))
+ (rrarb.io.in, io.in).zipped.map{ case (arb, in) => {
+ val destined = in.bits.header.dst === UInt(i)
+ arb.valid := in.valid && destined
+ arb.bits := in.bits
+ when (arb.ready && destined) { in.ready := Bool(true) }
+ }}
+ out <> rrarb.io.out
+ }}
+}
+
+abstract class LogicalNetwork extends Module
+
+class LogicalHeader(implicit p: Parameters) extends junctions.ParameterizedBundle()(p) {
+ val src = UInt(width = p(LNHeaderBits))
+ val dst = UInt(width = p(LNHeaderBits))
+}
+
+class LogicalNetworkIO[T <: Data](dType: T)(implicit p: Parameters) extends Bundle {
+ val header = new LogicalHeader
+ val payload = dType.cloneType
+ override def cloneType = new LogicalNetworkIO(dType)(p).asInstanceOf[this.type]
+}
+
+object DecoupledLogicalNetworkIOWrapper {
+ def apply[T <: Data](
+ in: DecoupledIO[T],
+ src: UInt = UInt(0),
+ dst: UInt = UInt(0))
+ (implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
+ val out = Wire(Decoupled(new LogicalNetworkIO(in.bits)))
+ out.valid := in.valid
+ out.bits.payload := in.bits
+ out.bits.header.dst := dst
+ out.bits.header.src := src
+ in.ready := out.ready
+ out
+ }
+}
+
+object DecoupledLogicalNetworkIOUnwrapper {
+ def apply[T <: Data](in: DecoupledIO[LogicalNetworkIO[T]])
+ (implicit p: Parameters): DecoupledIO[T] = {
+ val out = Wire(Decoupled(in.bits.payload))
+ out.valid := in.valid
+ out.bits := in.bits.payload
+ in.ready := out.ready
+ out
+ }
+}
+
+object DefaultFromPhysicalShim {
+ def apply[T <: Data](in: DecoupledIO[PhysicalNetworkIO[T]])
+ (implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
+ val out = Wire(Decoupled(new LogicalNetworkIO(in.bits.payload)))
+ out.bits.header := in.bits.header
+ out.bits.payload := in.bits.payload
+ out.valid := in.valid
+ in.ready := out.ready
+ out
+ }
+}
+
+object DefaultToPhysicalShim {
+ def apply[T <: Data](n: Int, in: DecoupledIO[LogicalNetworkIO[T]])
+ (implicit p: Parameters): DecoupledIO[PhysicalNetworkIO[T]] = {
+ val out = Wire(Decoupled(new PhysicalNetworkIO(n, in.bits.payload)))
+ out.bits.header := in.bits.header
+ out.bits.payload := in.bits.payload
+ out.valid := in.valid
+ in.ready := out.ready
+ out
+ }
+}
+
+/** A helper module that automatically issues [[uncore.Finish]] messages in repsonse
+ * to [[uncore.Grant]] that it receives from a manager and forwards to a client
+ */
+class FinishUnit(srcId: Int = 0, outstanding: Int = 2)(implicit p: Parameters) extends TLModule()(p)
+ with HasDataBeatCounters {
+ val io = new Bundle {
+ val grant = Decoupled(new LogicalNetworkIO(new Grant)).flip
+ val refill = Decoupled(new Grant)
+ val finish = Decoupled(new LogicalNetworkIO(new Finish))
+ val ready = Bool(OUTPUT)
+ }
+
+ val g = io.grant.bits.payload
+
+ if(tlNetworkPreservesPointToPointOrdering) {
+ io.finish.valid := Bool(false)
+ io.refill.valid := io.grant.valid
+ io.refill.bits := g
+ io.grant.ready := io.refill.ready
+ io.ready := Bool(true)
+ } else {
+ // We only want to send Finishes after we have collected all beats of
+ // a multibeat Grant. But Grants from multiple managers or transactions may
+ // get interleaved, so we could need a counter for each.
+ val done = if(tlNetworkDoesNotInterleaveBeats) {
+ connectIncomingDataBeatCounterWithHeader(io.grant)
+ } else {
+ val entries = 1 << tlClientXactIdBits
+ def getId(g: LogicalNetworkIO[Grant]) = g.payload.client_xact_id
+ assert(getId(io.grant.bits) <= UInt(entries), "Not enough grant beat counters, only " + entries + " entries.")
+ connectIncomingDataBeatCountersWithHeader(io.grant, entries, getId).reduce(_||_)
+ }
+ val q = Module(new FinishQueue(outstanding))
+ q.io.enq.valid := io.grant.fire() && g.requiresAck() && (!g.hasMultibeatData() || done)
+ q.io.enq.bits := g.makeFinish()
+ q.io.enq.bits.manager_id := io.grant.bits.header.src
+
+ io.finish.bits.header.src := UInt(srcId)
+ io.finish.bits.header.dst := q.io.deq.bits.manager_id
+ io.finish.bits.payload := q.io.deq.bits
+ io.finish.valid := q.io.deq.valid
+ q.io.deq.ready := io.finish.ready
+
+ io.refill.valid := (q.io.enq.ready || !g.requiresAck()) && io.grant.valid
+ io.refill.bits := g
+ io.grant.ready := (q.io.enq.ready || !g.requiresAck()) && io.refill.ready
+ io.ready := q.io.enq.ready
+ }
+}
+
+class FinishQueue(entries: Int)(implicit p: Parameters) extends Queue(new FinishToDst()(p), entries)
+
+/** A port to convert [[uncore.ClientTileLinkIO]].flip into [[uncore.TileLinkIO]]
+ *
+ * Creates network headers for [[uncore.Acquire]] and [[uncore.Release]] messages,
+ * calculating header.dst and filling in header.src.
+ * Strips headers from [[uncore.Probe Probes]].
+ * Passes [[uncore.GrantFromSrc]] and accepts [[uncore.FinishFromDst]] in response,
+ * setting up the headers for each.
+ *
+ * @param clientId network port id of this agent
+ * @param addrConvert how a physical address maps to a destination manager port id
+ */
+class ClientTileLinkNetworkPort(clientId: Int, addrConvert: UInt => UInt)
+ (implicit p: Parameters) extends TLModule()(p) {
+ val io = new Bundle {
+ val client = new ClientTileLinkIO().flip
+ val network = new TileLinkIO
+ }
+
+ val acq_with_header = ClientTileLinkHeaderCreator(io.client.acquire, clientId, addrConvert)
+ val rel_with_header = ClientTileLinkHeaderCreator(io.client.release, clientId, addrConvert)
+ val fin_with_header = ClientTileLinkHeaderCreator(io.client.finish, clientId)
+ val prb_without_header = DecoupledLogicalNetworkIOUnwrapper(io.network.probe)
+ val gnt_without_header = DecoupledLogicalNetworkIOUnwrapper(io.network.grant)
+
+ io.network.acquire <> acq_with_header
+ io.network.release <> rel_with_header
+ io.network.finish <> fin_with_header
+ io.client.probe <> prb_without_header
+ io.client.grant.bits.manager_id := io.network.grant.bits.header.src
+ io.client.grant <> gnt_without_header
+}
+
+/** A port to convert [[uncore.ClientUncachedTileLinkIO]].flip into [[uncore.TileLinkIO]]
+ *
+ * Creates network headers for [[uncore.Acquire]] and [[uncore.Release]] messages,
+ * calculating header.dst and filling in header.src.
+ * Responds to [[uncore.Grant]] by automatically issuing [[uncore.Finish]] to the granting managers.
+ *
+ * @param clientId network port id of this agent
+ * @param addrConvert how a physical address maps to a destination manager port id
+ */
+class ClientUncachedTileLinkNetworkPort(clientId: Int, addrConvert: UInt => UInt)
+ (implicit p: Parameters) extends TLModule()(p) {
+ val io = new Bundle {
+ val client = new ClientUncachedTileLinkIO().flip
+ val network = new TileLinkIO
+ }
+
+ val finisher = Module(new FinishUnit(clientId))
+ finisher.io.grant <> io.network.grant
+ io.network.finish <> finisher.io.finish
+
+ val acq_with_header = ClientTileLinkHeaderCreator(io.client.acquire, clientId, addrConvert)
+ val gnt_without_header = finisher.io.refill
+
+ io.network.acquire.bits := acq_with_header.bits
+ io.network.acquire.valid := acq_with_header.valid && finisher.io.ready
+ acq_with_header.ready := io.network.acquire.ready && finisher.io.ready
+ io.client.grant <> gnt_without_header
+ io.network.probe.ready := Bool(false)
+ io.network.release.valid := Bool(false)
+}
+
+object ClientTileLinkHeaderCreator {
+ def apply[T <: ClientToManagerChannel with HasManagerId](
+ in: DecoupledIO[T],
+ clientId: Int)
+ (implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
+ val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits)))
+ out.bits.payload := in.bits
+ out.bits.header.src := UInt(clientId)
+ out.bits.header.dst := in.bits.manager_id
+ out.valid := in.valid
+ in.ready := out.ready
+ out
+ }
+ def apply[T <: ClientToManagerChannel with HasCacheBlockAddress](
+ in: DecoupledIO[T],
+ clientId: Int,
+ addrConvert: UInt => UInt)
+ (implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
+ val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits)))
+ out.bits.payload := in.bits
+ out.bits.header.src := UInt(clientId)
+ out.bits.header.dst := addrConvert(in.bits.addr_block)
+ out.valid := in.valid
+ in.ready := out.ready
+ out
+ }
+}
+
+/** A port to convert [[uncore.ManagerTileLinkIO]].flip into [[uncore.TileLinkIO]].flip
+ *
+ * Creates network headers for [[uncore.Probe]] and [[uncore.Grant]] messagess,
+ * calculating header.dst and filling in header.src.
+ * Strips headers from [[uncore.Acquire]], [[uncore.Release]] and [[uncore.Finish]],
+ * but supplies client_id instead.
+ *
+ * @param managerId the network port id of this agent
+ * @param idConvert how a sharer id maps to a destination client port id
+ */
+class ManagerTileLinkNetworkPort(managerId: Int, idConvert: UInt => UInt)
+ (implicit p: Parameters) extends TLModule()(p) {
+ val io = new Bundle {
+ val manager = new ManagerTileLinkIO().flip
+ val network = new TileLinkIO().flip
+ }
+ io.network.grant <> ManagerTileLinkHeaderCreator(io.manager.grant, managerId, (u: UInt) => u)
+ io.network.probe <> ManagerTileLinkHeaderCreator(io.manager.probe, managerId, idConvert)
+ io.manager.acquire <> DecoupledLogicalNetworkIOUnwrapper(io.network.acquire)
+ io.manager.acquire.bits.client_id := io.network.acquire.bits.header.src
+ io.manager.release <> DecoupledLogicalNetworkIOUnwrapper(io.network.release)
+ io.manager.release.bits.client_id := io.network.release.bits.header.src
+ io.manager.finish <> DecoupledLogicalNetworkIOUnwrapper(io.network.finish)
+}
+
+object ManagerTileLinkHeaderCreator {
+ def apply[T <: ManagerToClientChannel with HasClientId](
+ in: DecoupledIO[T],
+ managerId: Int,
+ idConvert: UInt => UInt)
+ (implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
+ val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits)))
+ out.bits.payload := in.bits
+ out.bits.header.src := UInt(managerId)
+ out.bits.header.dst := idConvert(in.bits.client_id)
+ out.valid := in.valid
+ in.ready := out.ready
+ out
+ }
+}
diff --git a/uncore/src/main/scala/util/AmoAlu.scala b/uncore/src/main/scala/util/AmoAlu.scala
new file mode 100644
index 00000000..d6ff9ce8
--- /dev/null
+++ b/uncore/src/main/scala/util/AmoAlu.scala
@@ -0,0 +1,109 @@
+// See LICENSE for license details.
+
+package uncore.util
+
+import Chisel._
+import uncore.tilelink._
+import cde.Parameters
+import uncore.constants._
+
+class StoreGen(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) {
+ val size = typ(log2Up(log2Up(maxSize)+1)-1,0)
+ def misaligned =
+ (addr & ((UInt(1) << size) - UInt(1))(log2Up(maxSize)-1,0)).orR
+
+ def mask = {
+ var res = UInt(1)
+ for (i <- 0 until log2Up(maxSize)) {
+ val upper = Mux(addr(i), res, UInt(0)) | Mux(size >= UInt(i+1), UInt((BigInt(1) << (1 << i))-1), UInt(0))
+ val lower = Mux(addr(i), UInt(0), res)
+ res = Cat(upper, lower)
+ }
+ res
+ }
+
+ protected def genData(i: Int): UInt =
+ if (i >= log2Up(maxSize)) dat
+ else Mux(size === UInt(i), Fill(1 << (log2Up(maxSize)-i), dat((8 << i)-1,0)), genData(i+1))
+
+ def data = genData(0)
+ def wordData = genData(2)
+}
+
+class StoreGenAligned(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) extends StoreGen(typ, addr, dat, maxSize) {
+ override def genData(i: Int) = dat
+}
+
+class LoadGen(typ: UInt, addr: UInt, dat: UInt, zero: Bool, maxSize: Int) {
+ private val t = new StoreGen(typ, addr, dat, maxSize)
+ private val signed = typ.toSInt >= SInt(0)
+
+ private def genData(logMinSize: Int): UInt = {
+ var res = dat
+ for (i <- log2Up(maxSize)-1 to logMinSize by -1) {
+ val pos = 8 << i
+ val shifted = Mux(addr(i), res(2*pos-1,pos), res(pos-1,0))
+ val doZero = Bool(i == 0) && zero
+ val zeroed = Mux(doZero, UInt(0), shifted)
+ res = Cat(Mux(t.size === UInt(i) || doZero, Fill(8*maxSize-pos, signed && zeroed(pos-1)), res(8*maxSize-1,pos)), zeroed)
+ }
+ res
+ }
+
+ def wordData = genData(2)
+ def data = genData(0)
+}
+
+class AMOALU(rhsIsAligned: Boolean = false)(implicit p: Parameters) extends Module {
+ val operandBits = p(AmoAluOperandBits)
+ val blockOffBits = p(CacheBlockOffsetBits)
+ require(operandBits == 32 || operandBits == 64)
+ val io = new Bundle {
+ val addr = Bits(INPUT, blockOffBits)
+ val cmd = Bits(INPUT, M_SZ)
+ val typ = Bits(INPUT, MT_SZ)
+ val lhs = Bits(INPUT, operandBits)
+ val rhs = Bits(INPUT, operandBits)
+ val out = Bits(OUTPUT, operandBits)
+ }
+
+ val storegen =
+ if(rhsIsAligned) new StoreGenAligned(io.typ, io.addr, io.rhs, operandBits/8)
+ else new StoreGen(io.typ, io.addr, io.rhs, operandBits/8)
+ val rhs = storegen.wordData
+
+ val sgned = io.cmd === M_XA_MIN || io.cmd === M_XA_MAX
+ val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU
+ val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU
+ val word = io.typ === MT_W || io.typ === MT_WU || // Logic minimization:
+ io.typ === MT_B || io.typ === MT_BU
+
+ val adder_out =
+ if (operandBits == 32) io.lhs + rhs
+ else {
+ val mask = ~UInt(0,64) ^ (io.addr(2) << 31)
+ (io.lhs & mask).toUInt + (rhs & mask)
+ }
+
+ val less =
+ if (operandBits == 32) Mux(io.lhs(31) === rhs(31), io.lhs < rhs, Mux(sgned, io.lhs(31), io.rhs(31)))
+ else {
+ val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63))
+ val cmp_rhs = Mux(word && !io.addr(2), rhs(31), rhs(63))
+ val lt_lo = io.lhs(31,0) < rhs(31,0)
+ val lt_hi = io.lhs(63,32) < rhs(63,32)
+ val eq_hi = io.lhs(63,32) === rhs(63,32)
+ val lt = Mux(word, Mux(io.addr(2), lt_hi, lt_lo), lt_hi || eq_hi && lt_lo)
+ Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs))
+ }
+
+ val out = Mux(io.cmd === M_XA_ADD, adder_out,
+ Mux(io.cmd === M_XA_AND, io.lhs & rhs,
+ Mux(io.cmd === M_XA_OR, io.lhs | rhs,
+ Mux(io.cmd === M_XA_XOR, io.lhs ^ rhs,
+ Mux(Mux(less, min, max), io.lhs,
+ storegen.data)))))
+
+ val wmask = FillInterleaved(8, storegen.mask)
+ io.out := wmask & out | ~wmask & io.lhs
+}
diff --git a/uncore/src/main/scala/util/Counters.scala b/uncore/src/main/scala/util/Counters.scala
new file mode 100644
index 00000000..3bc2d85b
--- /dev/null
+++ b/uncore/src/main/scala/util/Counters.scala
@@ -0,0 +1,134 @@
+package uncore.util
+
+import Chisel._
+import uncore.tilelink._
+import cde.Parameters
+
+// Produces 0-width value when counting to 1
+class ZCounter(val n: Int) {
+ val value = Reg(init=UInt(0, log2Ceil(n)))
+ def inc(): Bool = {
+ if (n == 1) Bool(true)
+ else {
+ val wrap = value === UInt(n-1)
+ value := Mux(Bool(!isPow2(n)) && wrap, UInt(0), value + UInt(1))
+ wrap
+ }
+ }
+}
+
+object ZCounter {
+ def apply(n: Int) = new ZCounter(n)
+ def apply(cond: Bool, n: Int): (UInt, Bool) = {
+ val c = new ZCounter(n)
+ var wrap: Bool = null
+ when (cond) { wrap = c.inc() }
+ (c.value, cond && wrap)
+ }
+}
+
+object TwoWayCounter {
+ def apply(up: Bool, down: Bool, max: Int): UInt = {
+ val cnt = Reg(init = UInt(0, log2Up(max+1)))
+ when (up && !down) { cnt := cnt + UInt(1) }
+ when (down && !up) { cnt := cnt - UInt(1) }
+ cnt
+ }
+}
+
+class BeatCounterStatus extends Bundle {
+ val idx = UInt()
+ val done = Bool()
+}
+
+class TwoWayBeatCounterStatus extends Bundle {
+ val pending = Bool()
+ val up = new BeatCounterStatus()
+ val down = new BeatCounterStatus()
+}
+
+/** Utility trait containing wiring functions to keep track of how many data beats have
+ * been sent or recieved over a particular [[uncore.TileLinkChannel]] or pair of channels.
+ *
+ * Won't count message types that don't have data.
+ * Used in [[uncore.XactTracker]] and [[uncore.FinishUnit]].
+ */
+trait HasDataBeatCounters {
+ type HasBeat = TileLinkChannel with HasTileLinkBeatId
+ type HasId = TileLinkChannel with HasClientId
+
+ /** Returns the current count on this channel and when a message is done
+ * @param inc increment the counter (usually .valid or .fire())
+ * @param data the actual channel data
+ * @param beat count to return for single-beat messages
+ */
+ def connectDataBeatCounter[S <: TileLinkChannel](inc: Bool, data: S, beat: UInt) = {
+ val multi = data.hasMultibeatData()
+ val (multi_cnt, multi_done) = Counter(inc && multi, data.tlDataBeats)
+ val cnt = Mux(multi, multi_cnt, beat)
+ val done = Mux(multi, multi_done, inc)
+ (cnt, done)
+ }
+
+ /** Counter for beats on outgoing [[chisel.DecoupledIO]] */
+ def connectOutgoingDataBeatCounter[T <: TileLinkChannel](
+ out: DecoupledIO[T],
+ beat: UInt = UInt(0)): (UInt, Bool) =
+ connectDataBeatCounter(out.fire(), out.bits, beat)
+
+ /** Returns done but not cnt. Use the addr_beat subbundle instead of cnt for beats on
+ * incoming channels in case of network reordering.
+ */
+ def connectIncomingDataBeatCounter[T <: TileLinkChannel](in: DecoupledIO[T]): Bool =
+ connectDataBeatCounter(in.fire(), in.bits, UInt(0))._2
+
+ /** Counter for beats on incoming DecoupledIO[LogicalNetworkIO[]]s returns done */
+ def connectIncomingDataBeatCounterWithHeader[T <: TileLinkChannel](in: DecoupledIO[LogicalNetworkIO[T]]): Bool =
+ connectDataBeatCounter(in.fire(), in.bits.payload, UInt(0))._2
+
+ /** If the network might interleave beats from different messages, we need a Vec of counters,
+ * one for every outstanding message id that might be interleaved.
+ *
+ * @param getId mapping from Message to counter id
+ */
+ def connectIncomingDataBeatCountersWithHeader[T <: TileLinkChannel with HasClientTransactionId](
+ in: DecoupledIO[LogicalNetworkIO[T]],
+ entries: Int,
+ getId: LogicalNetworkIO[T] => UInt): Vec[Bool] = {
+ Vec((0 until entries).map { i =>
+ connectDataBeatCounter(in.fire() && getId(in.bits) === UInt(i), in.bits.payload, UInt(0))._2
+ })
+ }
+
+ /** Provides counters on two channels, as well a meta-counter that tracks how many
+ * messages have been sent over the up channel but not yet responded to over the down channel
+ *
+ * @param status bundle of status of the counters
+ * @param up outgoing channel
+ * @param down incoming channel
+ * @param max max number of outstanding ups with no down
+ * @param beat overrides cnts on single-beat messages
+ * @param track whether up's message should be tracked
+ * @return a tuple containing whether their are outstanding messages, up's count,
+ * up's done, down's count, down's done
+ */
+ def connectTwoWayBeatCounters[T <: TileLinkChannel, S <: TileLinkChannel](
+ status: TwoWayBeatCounterStatus,
+ up: DecoupledIO[T],
+ down: DecoupledIO[S],
+ max: Int = 1,
+ beat: UInt = UInt(0),
+ trackUp: T => Bool = (t: T) => Bool(true),
+ trackDown: S => Bool = (s: S) => Bool(true)) {
+ val (up_idx, up_done) = connectDataBeatCounter(up.fire() && trackUp(up.bits), up.bits, beat)
+ val (dn_idx, dn_done) = connectDataBeatCounter(down.fire() && trackDown(down.bits), down.bits, beat)
+ val cnt = TwoWayCounter(up_done, dn_done, max)
+ status.pending := cnt > UInt(0)
+ status.up.idx := up_idx
+ status.up.done := up_done
+ status.down.idx := dn_idx
+ status.down.done := dn_done
+ }
+}
+
+
diff --git a/uncore/src/main/scala/util/Enqueuer.scala b/uncore/src/main/scala/util/Enqueuer.scala
new file mode 100644
index 00000000..f6f3587a
--- /dev/null
+++ b/uncore/src/main/scala/util/Enqueuer.scala
@@ -0,0 +1,56 @@
+package uncore.util
+
+import Chisel._
+import uncore.tilelink._
+import cde.Parameters
+
+/** Struct for describing per-channel queue depths */
+case class TileLinkDepths(acq: Int, prb: Int, rel: Int, gnt: Int, fin: Int)
+
+/** Optionally enqueues each [[uncore.TileLinkChannel]] individually */
+class TileLinkEnqueuer(depths: TileLinkDepths)(implicit p: Parameters) extends Module {
+ val io = new Bundle {
+ val client = new TileLinkIO().flip
+ val manager = new TileLinkIO
+ }
+ io.manager.acquire <> (if(depths.acq > 0) Queue(io.client.acquire, depths.acq) else io.client.acquire)
+ io.client.probe <> (if(depths.prb > 0) Queue(io.manager.probe, depths.prb) else io.manager.probe)
+ io.manager.release <> (if(depths.rel > 0) Queue(io.client.release, depths.rel) else io.client.release)
+ io.client.grant <> (if(depths.gnt > 0) Queue(io.manager.grant, depths.gnt) else io.manager.grant)
+ io.manager.finish <> (if(depths.fin > 0) Queue(io.client.finish, depths.fin) else io.client.finish)
+}
+
+object TileLinkEnqueuer {
+ def apply(in: TileLinkIO, depths: TileLinkDepths)(implicit p: Parameters): TileLinkIO = {
+ val t = Module(new TileLinkEnqueuer(depths))
+ t.io.client <> in
+ t.io.manager
+ }
+ def apply(in: TileLinkIO, depth: Int)(implicit p: Parameters): TileLinkIO = {
+ apply(in, TileLinkDepths(depth, depth, depth, depth, depth))
+ }
+}
+
+class ClientTileLinkEnqueuer(depths: TileLinkDepths)(implicit p: Parameters) extends Module {
+ val io = new Bundle {
+ val inner = new ClientTileLinkIO().flip
+ val outer = new ClientTileLinkIO
+ }
+
+ io.outer.acquire <> (if(depths.acq > 0) Queue(io.inner.acquire, depths.acq) else io.inner.acquire)
+ io.inner.probe <> (if(depths.prb > 0) Queue(io.outer.probe, depths.prb) else io.outer.probe)
+ io.outer.release <> (if(depths.rel > 0) Queue(io.inner.release, depths.rel) else io.inner.release)
+ io.inner.grant <> (if(depths.gnt > 0) Queue(io.outer.grant, depths.gnt) else io.outer.grant)
+ io.outer.finish <> (if(depths.fin > 0) Queue(io.inner.finish, depths.fin) else io.inner.finish)
+}
+
+object ClientTileLinkEnqueuer {
+ def apply(in: ClientTileLinkIO, depths: TileLinkDepths)(implicit p: Parameters): ClientTileLinkIO = {
+ val t = Module(new ClientTileLinkEnqueuer(depths))
+ t.io.inner <> in
+ t.io.outer
+ }
+ def apply(in: ClientTileLinkIO, depth: Int)(implicit p: Parameters): ClientTileLinkIO = {
+ apply(in, TileLinkDepths(depth, depth, depth, depth, depth))
+ }
+}
diff --git a/uncore/src/main/scala/util/Serializer.scala b/uncore/src/main/scala/util/Serializer.scala
new file mode 100644
index 00000000..8cc0caa2
--- /dev/null
+++ b/uncore/src/main/scala/util/Serializer.scala
@@ -0,0 +1,69 @@
+// See LICENSE for license details.
+
+package uncore.util
+
+import Chisel._
+import uncore.tilelink._
+
+class FlowThroughSerializer[T <: Bundle with HasTileLinkData](gen: T, n: Int) extends Module {
+ val io = new Bundle {
+ val in = Decoupled(gen).flip
+ val out = Decoupled(gen)
+ val cnt = UInt(OUTPUT, log2Up(n))
+ val done = Bool(OUTPUT)
+ }
+ val narrowWidth = io.in.bits.data.getWidth / n
+ require(io.in.bits.data.getWidth % narrowWidth == 0)
+
+ if(n == 1) {
+ io.out <> io.in
+ io.cnt := UInt(0)
+ io.done := Bool(true)
+ } else {
+ val cnt = Reg(init=UInt(0, width = log2Up(n)))
+ val wrap = cnt === UInt(n-1)
+ val rbits = Reg{io.in.bits}
+ val active = Reg(init=Bool(false))
+
+ val shifter = Wire(Vec(n, Bits(width = narrowWidth)))
+ (0 until n).foreach {
+ i => shifter(i) := rbits.data((i+1)*narrowWidth-1,i*narrowWidth)
+ }
+
+ io.done := Bool(false)
+ io.cnt := cnt
+ io.in.ready := !active
+ io.out.valid := active || io.in.valid
+ io.out.bits := io.in.bits
+ when(!active && io.in.valid) {
+ when(io.in.bits.hasData()) {
+ cnt := Mux(io.out.ready, UInt(1), UInt(0))
+ rbits := io.in.bits
+ active := Bool(true)
+ }
+ io.done := !io.in.bits.hasData()
+ }
+ when(active) {
+ io.out.bits := rbits
+ io.out.bits.data := shifter(cnt)
+ when(io.out.ready) {
+ cnt := cnt + UInt(1)
+ when(wrap) {
+ cnt := UInt(0)
+ io.done := Bool(true)
+ active := Bool(false)
+ }
+ }
+ }
+ }
+}
+
+object FlowThroughSerializer {
+ def apply[T <: Bundle with HasTileLinkData](in: DecoupledIO[T], n: Int): DecoupledIO[T] = {
+ val fs = Module(new FlowThroughSerializer(in.bits, n))
+ fs.io.in.valid := in.valid
+ fs.io.in.bits := in.bits
+ in.ready := fs.io.in.ready
+ fs.io.out
+ }
+}