diff --git a/uncore/.gitignore b/uncore/.gitignore new file mode 100644 index 00000000..555feb41 --- /dev/null +++ b/uncore/.gitignore @@ -0,0 +1,2 @@ +target/ +project/target/ diff --git a/uncore/LICENSE b/uncore/LICENSE new file mode 100644 index 00000000..7cff15e4 --- /dev/null +++ b/uncore/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2012-2014, The Regents of the University of California +(Regents). All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. Neither the name of the Regents nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING +OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS +BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED +HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE +MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/uncore/README.md b/uncore/README.md new file mode 100644 index 00000000..3a628a24 --- /dev/null +++ b/uncore/README.md @@ -0,0 +1,12 @@ +Uncore Library +============== + +This is the repository for uncore components assosciated with Rocket chip +project. To uses these modules, include this repo as a git submodule within +the your chip repository and add it as a project in your chip's build.scala. +These components are only dependent on the ucb-bar/chisel repo, i.e. + + lazy val uncore = project.dependsOn(chisel) + +ScalaDoc for the uncore library is available here +and an overview of the TileLink Protocol is available here, with associated CoherencePolicy documentation here. diff --git a/uncore/build.sbt b/uncore/build.sbt new file mode 100644 index 00000000..120670b5 --- /dev/null +++ b/uncore/build.sbt @@ -0,0 +1,19 @@ +organization := "edu.berkeley.cs" + +version := "2.0" + +name := "uncore" + +scalaVersion := "2.11.6" + +// Provide a managed dependency on X if -DXVersion="" is supplied on the command line. +libraryDependencies ++= (Seq("chisel","junctions","cde").map { + dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten + +site.settings + +site.includeScaladoc() + +ghpages.settings + +git.remoteRepo := "git@github.com:ucb-bar/uncore.git" diff --git a/uncore/project/plugins.sbt b/uncore/project/plugins.sbt new file mode 100644 index 00000000..4f4825c4 --- /dev/null +++ b/uncore/project/plugins.sbt @@ -0,0 +1,5 @@ +resolvers += "jgit-repo" at "http://download.eclipse.org/jgit/maven" + +addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.5.3") + +addSbtPlugin("com.typesafe.sbt" % "sbt-site" % "0.8.1") diff --git a/uncore/src/main/scala/Builder.scala b/uncore/src/main/scala/Builder.scala new file mode 100644 index 00000000..ba2c296a --- /dev/null +++ b/uncore/src/main/scala/Builder.scala @@ -0,0 +1,117 @@ +package uncore + +import Chisel._ +import cde.{Config, Parameters, ParameterDump, Knob, Dump} +import junctions.PAddrBits +import uncore.tilelink._ +import uncore.agents._ +import uncore.coherence._ + +object UncoreBuilder extends App with FileSystemUtilities { + val topModuleName = args(0) + val configClassName = args(1) + val config = try { + Class.forName(s"uncore.$configClassName").newInstance.asInstanceOf[Config] + } catch { + case e: java.lang.ClassNotFoundException => + throwException("Unable to find configClassName \"" + configClassName + + "\", did you misspell it?", e) + } + val world = config.toInstance + val paramsFromConfig: Parameters = Parameters.root(world) + + val gen = () => + Class.forName(s"uncore.$topModuleName") + .getConstructor(classOf[cde.Parameters]) + .newInstance(paramsFromConfig) + .asInstanceOf[Module] + + chiselMain.run(args.drop(2), gen) + + val pdFile = createOutputFile(s"$topModuleName.prm") + pdFile.write(ParameterDump.getDump) + pdFile.close + +} + +class DefaultL2Config extends Config ( + topDefinitions = { (pname,site,here) => + pname match { + case PAddrBits => 32 + case CacheId => 0 + case CacheName => "L2Bank" + case TLId => "L1toL2" + case InnerTLId => "L1toL2" + case OuterTLId => "L2toMC" + case "N_CACHED" => Dump("N_CACHED",here[Int]("CACHED_CLIENTS_PER_PORT")) + case "N_UNCACHED" => Dump("N_UNCACHED",here[Int]("MAX_CLIENTS_PER_PORT") - here[Int]("N_CACHED")) + case "MAX_CLIENT_XACTS" => 4 + case "MAX_CLIENTS_PER_PORT" => Knob("NTILES") + case "CACHED_CLIENTS_PER_PORT" => Knob("N_CACHED_TILES") + case TLKey("L1toL2") => + TileLinkParameters( + coherencePolicy = new MESICoherence(site(L2DirectoryRepresentation)), + nManagers = 1, + nCachingClients = here[Int]("N_CACHED"), + nCachelessClients = here[Int]("N_UNCACHED"), + maxClientXacts = here[Int]("MAX_CLIENT_XACTS"), + maxClientsPerPort = here[Int]("MAX_CLIENTS_PER_PORT"), + maxManagerXacts = site(NAcquireTransactors) + 2, + dataBits = site(CacheBlockBytes)*8, + dataBeats = 2) + case TLKey("L2toMC") => + TileLinkParameters( + coherencePolicy = new MEICoherence(new NullRepresentation(1)), + nManagers = 1, + nCachingClients = 1, + nCachelessClients = 0, + maxClientXacts = 1, + maxClientsPerPort = site(NAcquireTransactors) + 2, + maxManagerXacts = 1, + dataBits = site(CacheBlockBytes)*8, + dataBeats = 2) + case CacheBlockBytes => 64 + case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes)) + case "L2_SETS" => Knob("L2_SETS") + case NSets => Dump("L2_SETS",here[Int]("L2_SETS")) + case NWays => Knob("L2_WAYS") + case RowBits => site(TLKey(site(TLId))).dataBitsPerBeat + case CacheIdBits => Dump("CACHE_ID_BITS",1) + case L2StoreDataQueueDepth => 1 + case NAcquireTransactors => Dump("N_ACQUIRE_TRANSACTORS",2) + case NSecondaryMisses => 4 + case L2DirectoryRepresentation => new FullRepresentation(here[Int]("N_CACHED")) + case L2Replacer => () => new SeqRandom(site(NWays)) + case ECCCode => None + case AmoAluOperandBits => 64 + case SplitMetadata => false + // case XLen => 128 + }}, + knobValues = { + case "L2_WAYS" => 1 + case "L2_SETS" => 1024 + case "NTILES" => 2 + case "N_CACHED_TILES" => 2 + case "L2_CAPACITY_IN_KB" => 256 + } +) + +class WithPLRU extends Config( + (pname, site, here) => pname match { + case L2Replacer => () => new SeqPLRU(site(NSets), site(NWays)) + }) + +class PLRUL2Config extends Config(new WithPLRU ++ new DefaultL2Config) + +class With1L2Ways extends Config(knobValues = { case "L2_WAYS" => 1 }) +class With2L2Ways extends Config(knobValues = { case "L2_WAYS" => 2 }) +class With4L2Ways extends Config(knobValues = { case "L2_WAYS" => 4 }) + +class With1Cached extends Config(knobValues = { case "N_CACHED_TILES" => 1 }) +class With2Cached extends Config(knobValues = { case "N_CACHED_TILES" => 2 }) + + +class W1Cached1WaysConfig extends Config(new With1L2Ways ++ new With1Cached ++ new DefaultL2Config) +class W1Cached2WaysConfig extends Config(new With2L2Ways ++ new With1Cached ++ new DefaultL2Config) +class W2Cached1WaysConfig extends Config(new With1L2Ways ++ new With2Cached ++ new DefaultL2Config) +class W2Cached2WaysConfig extends Config(new With2L2Ways ++ new With2Cached ++ new DefaultL2Config) diff --git a/uncore/src/main/scala/Consts.scala b/uncore/src/main/scala/Consts.scala new file mode 100644 index 00000000..a4a4e93b --- /dev/null +++ b/uncore/src/main/scala/Consts.scala @@ -0,0 +1,50 @@ +// See LICENSE for license details. + +package uncore +package constants + +import Chisel._ + +object MemoryOpConstants extends MemoryOpConstants +trait MemoryOpConstants { + val MT_SZ = 3 + val MT_X = BitPat("b???") + val MT_B = UInt("b000") + val MT_H = UInt("b001") + val MT_W = UInt("b010") + val MT_D = UInt("b011") + val MT_BU = UInt("b100") + val MT_HU = UInt("b101") + val MT_WU = UInt("b110") + val MT_Q = UInt("b111") + + val NUM_XA_OPS = 9 + val M_SZ = 5 + val M_X = BitPat("b?????"); + val M_XRD = UInt("b00000"); // int load + val M_XWR = UInt("b00001"); // int store + val M_PFR = UInt("b00010"); // prefetch with intent to read + val M_PFW = UInt("b00011"); // prefetch with intent to write + val M_XA_SWAP = UInt("b00100"); + val M_FLUSH_ALL = UInt("b00101") // flush all lines + val M_XLR = UInt("b00110"); + val M_XSC = UInt("b00111"); + val M_XA_ADD = UInt("b01000"); + val M_XA_XOR = UInt("b01001"); + val M_XA_OR = UInt("b01010"); + val M_XA_AND = UInt("b01011"); + val M_XA_MIN = UInt("b01100"); + val M_XA_MAX = UInt("b01101"); + val M_XA_MINU = UInt("b01110"); + val M_XA_MAXU = UInt("b01111"); + val M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions + val M_PRODUCE = UInt("b10001") // write back dirty data and cede W permissions + val M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions + + def isAMO(cmd: UInt) = cmd(3) || cmd === M_XA_SWAP + def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW + def isRead(cmd: UInt) = cmd === M_XRD || cmd === M_XLR || cmd === M_XSC || isAMO(cmd) + def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_XSC || isAMO(cmd) + def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR +} + diff --git a/uncore/src/main/scala/Package.scala b/uncore/src/main/scala/Package.scala new file mode 100644 index 00000000..c9a35dbb --- /dev/null +++ b/uncore/src/main/scala/Package.scala @@ -0,0 +1,4 @@ +// See LICENSE for license details. +package uncore + +package object constants extends uncore.constants.MemoryOpConstants diff --git a/uncore/src/main/scala/Util.scala b/uncore/src/main/scala/Util.scala new file mode 100644 index 00000000..aceee5c2 --- /dev/null +++ b/uncore/src/main/scala/Util.scala @@ -0,0 +1,11 @@ +package uncore + +import Chisel._ + +package object Util { + implicit class UIntIsOneOf(val x: UInt) extends AnyVal { + def isOneOf(s: Seq[UInt]): Bool = s.map(x === _).reduce(_||_) + + def isOneOf(u1: UInt, u2: UInt*): Bool = isOneOf(u1 +: u2.toSeq) + } +} diff --git a/uncore/src/main/scala/agents/Agents.scala b/uncore/src/main/scala/agents/Agents.scala new file mode 100644 index 00000000..b7519633 --- /dev/null +++ b/uncore/src/main/scala/agents/Agents.scala @@ -0,0 +1,161 @@ +// See LICENSE for license details. + +package uncore.agents + +import Chisel._ +import cde.{Parameters, Field} +import junctions._ +import uncore.tilelink._ +import uncore.converters._ +import uncore.coherence._ + +case object NReleaseTransactors extends Field[Int] +case object NProbeTransactors extends Field[Int] +case object NAcquireTransactors extends Field[Int] + +trait HasCoherenceAgentParameters { + implicit val p: Parameters + val nReleaseTransactors = 1 + val nAcquireTransactors = p(NAcquireTransactors) + val nTransactors = nReleaseTransactors + nAcquireTransactors + val blockAddrBits = p(PAddrBits) - p(CacheBlockOffsetBits) + val outerTLId = p(OuterTLId) + val outerTLParams = p(TLKey(outerTLId)) + val outerDataBeats = outerTLParams.dataBeats + val outerDataBits = outerTLParams.dataBitsPerBeat + val outerBeatAddrBits = log2Up(outerDataBeats) + val outerByteAddrBits = log2Up(outerDataBits/8) + val outerWriteMaskBits = outerTLParams.writeMaskBits + val innerTLId = p(InnerTLId) + val innerTLParams = p(TLKey(innerTLId)) + val innerDataBeats = innerTLParams.dataBeats + val innerDataBits = innerTLParams.dataBitsPerBeat + val innerWriteMaskBits = innerTLParams.writeMaskBits + val innerBeatAddrBits = log2Up(innerDataBeats) + val innerByteAddrBits = log2Up(innerDataBits/8) + val innerNCachingClients = innerTLParams.nCachingClients + val maxManagerXacts = innerTLParams.maxManagerXacts + require(outerDataBeats == innerDataBeats) //TODO: fix all xact_data Vecs to remove this requirement +} + +abstract class CoherenceAgentModule(implicit val p: Parameters) extends Module + with HasCoherenceAgentParameters +abstract class CoherenceAgentBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p) + with HasCoherenceAgentParameters + +trait HasCoherenceAgentWiringHelpers { + def doOutputArbitration[T <: TileLinkChannel]( + out: DecoupledIO[T], + ins: Seq[DecoupledIO[T]]) { + def lock(o: T) = o.hasMultibeatData() + val arb = Module(new LockingRRArbiter(out.bits, ins.size, out.bits.tlDataBeats, Some(lock _))) + out <> arb.io.out + arb.io.in <> ins + } + + def doInputRouting[T <: Bundle with HasManagerTransactionId]( + in: DecoupledIO[T], + outs: Seq[DecoupledIO[T]]) { + val idx = in.bits.manager_xact_id + outs.map(_.bits := in.bits) + outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && idx === UInt(i) } + in.ready := Vec(outs.map(_.ready)).read(idx) + } + + /** Broadcasts valid messages on this channel to all trackers, + * but includes logic to allocate a new tracker in the case where + * no previously allocated tracker matches the new req's addr. + * + * When a match is reported, if ready is high the new transaction + * is merged; when ready is low the transaction is being blocked. + * When no match is reported, any high idles are presumed to be + * from trackers that are available for allocation, and one is + * assigned via alloc based on priority; if no idles are high then + * all trackers are busy with other transactions. If idle is high + * but ready is low, the tracker will be allocated but does not + * have sufficient buffering for the data. + */ + def doInputRoutingWithAllocation[T <: TileLinkChannel with HasTileLinkData]( + in: DecoupledIO[T], + outs: Seq[DecoupledIO[T]], + allocs: Seq[TrackerAllocation], + dataOverrides: Option[Seq[UInt]] = None, + allocOverride: Option[Bool] = None, + matchOverride: Option[Bool] = None) { + val ready_bits = Vec(outs.map(_.ready)).toBits + val can_alloc_bits = Vec(allocs.map(_.can)).toBits + val should_alloc_bits = PriorityEncoderOH(can_alloc_bits) + val match_bits = Vec(allocs.map(_.matches)).toBits + val no_matches = !match_bits.orR + val alloc_ok = allocOverride.getOrElse(Bool(true)) + val match_ok = matchOverride.getOrElse(Bool(true)) + in.ready := (Mux(no_matches, can_alloc_bits, match_bits) & ready_bits).orR && alloc_ok && match_ok + outs.zip(allocs).zipWithIndex.foreach { case((out, alloc), i) => + out.valid := in.valid && match_ok && alloc_ok + out.bits := in.bits + dataOverrides foreach { d => out.bits.data := d(i) } + alloc.should := should_alloc_bits(i) && no_matches && alloc_ok + } + } +} + +trait HasInnerTLIO extends HasCoherenceAgentParameters { + val inner = new ManagerTileLinkIO()(p.alterPartial({case TLId => p(InnerTLId)})) + val incoherent = Vec(inner.tlNCachingClients, Bool()).asInput + def iacq(dummy: Int = 0) = inner.acquire.bits + def iprb(dummy: Int = 0) = inner.probe.bits + def irel(dummy: Int = 0) = inner.release.bits + def ignt(dummy: Int = 0) = inner.grant.bits + def ifin(dummy: Int = 0) = inner.finish.bits +} + +trait HasUncachedOuterTLIO extends HasCoherenceAgentParameters { + val outer = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => p(OuterTLId)})) + def oacq(dummy: Int = 0) = outer.acquire.bits + def ognt(dummy: Int = 0) = outer.grant.bits +} + +trait HasCachedOuterTLIO extends HasCoherenceAgentParameters { + val outer = new ClientTileLinkIO()(p.alterPartial({case TLId => p(OuterTLId)})) + def oacq(dummy: Int = 0) = outer.acquire.bits + def oprb(dummy: Int = 0) = outer.probe.bits + def orel(dummy: Int = 0) = outer.release.bits + def ognt(dummy: Int = 0) = outer.grant.bits +} + +class ManagerTLIO(implicit p: Parameters) extends CoherenceAgentBundle()(p) + with HasInnerTLIO + with HasUncachedOuterTLIO + +abstract class CoherenceAgent(implicit p: Parameters) extends CoherenceAgentModule()(p) { + def innerTL: ManagerTileLinkIO + def outerTL: ClientTileLinkIO + def incoherent: Vec[Bool] +} + +abstract class ManagerCoherenceAgent(implicit p: Parameters) extends CoherenceAgent()(p) + with HasCoherenceAgentWiringHelpers { + val io = new ManagerTLIO + def innerTL = io.inner + def outerTL = TileLinkIOWrapper(io.outer)(p.alterPartial({case TLId => p(OuterTLId)})) + def incoherent = io.incoherent +} + +class HierarchicalTLIO(implicit p: Parameters) extends CoherenceAgentBundle()(p) + with HasInnerTLIO + with HasCachedOuterTLIO + +abstract class HierarchicalCoherenceAgent(implicit p: Parameters) extends CoherenceAgent()(p) + with HasCoherenceAgentWiringHelpers { + val io = new HierarchicalTLIO + def innerTL = io.inner + def outerTL = io.outer + def incoherent = io.incoherent + + // TODO: Remove this function (and all its calls) when we support probing the L2 + def disconnectOuterProbeAndFinish() { + io.outer.probe.ready := Bool(false) + io.outer.finish.valid := Bool(false) + assert(!io.outer.probe.valid, "L2 agent got illegal probe") + } +} diff --git a/uncore/src/main/scala/agents/Broadcast.scala b/uncore/src/main/scala/agents/Broadcast.scala new file mode 100644 index 00000000..9845342e --- /dev/null +++ b/uncore/src/main/scala/agents/Broadcast.scala @@ -0,0 +1,204 @@ +// See LICENSE for license details. + +package uncore.agents + +import Chisel._ +import uncore.coherence._ +import uncore.tilelink._ +import uncore.constants._ +import uncore.Util._ +import cde.Parameters + +class L2BroadcastHub(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) { + + // Create TSHRs for outstanding transactions + val irelTrackerList = + (0 until nReleaseTransactors).map(id => + Module(new BufferedBroadcastVoluntaryReleaseTracker(id))) + val iacqTrackerList = + (nReleaseTransactors until nTransactors).map(id => + Module(new BufferedBroadcastAcquireTracker(id))) + val trackerList = irelTrackerList ++ iacqTrackerList + + // Propagate incoherence flags + trackerList.map(_.io.incoherent) foreach { _ := io.incoherent } + + // Create an arbiter for the one memory port + val outerList = trackerList.map(_.io.outer) + val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size) + (p.alterPartial({ case TLId => p(OuterTLId) }))) + outer_arb.io.in <> outerList + io.outer <> outer_arb.io.out + + // Handle acquire transaction initiation + val irel_vs_iacq_conflict = + io.inner.acquire.valid && + io.inner.release.valid && + io.irel().conflicts(io.iacq()) + + doInputRoutingWithAllocation( + in = io.inner.acquire, + outs = trackerList.map(_.io.inner.acquire), + allocs = trackerList.map(_.io.alloc.iacq), + allocOverride = Some(!irel_vs_iacq_conflict)) + + // Handle releases, which might be voluntary and might have data + doInputRoutingWithAllocation( + in = io.inner.release, + outs = trackerList.map(_.io.inner.release), + allocs = trackerList.map(_.io.alloc.irel)) + + // Wire probe requests and grant reply to clients, finish acks from clients + doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe)) + + doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant)) + + doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish)) + + disconnectOuterProbeAndFinish() +} + +class BroadcastXactTracker(implicit p: Parameters) extends XactTracker()(p) { + val io = new HierarchicalXactTrackerIO + pinAllReadyValidLow(io) +} + +trait BroadcastsToAllClients extends HasCoherenceAgentParameters { + val coh = HierarchicalMetadata.onReset + val inner_coh = coh.inner + val outer_coh = coh.outer + def full_representation = ~UInt(0, width = innerNCachingClients) +} + +abstract class BroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters) + extends VoluntaryReleaseTracker(trackerId)(p) + with EmitsVoluntaryReleases + with BroadcastsToAllClients { + val io = new HierarchicalXactTrackerIO + pinAllReadyValidLow(io) + + // Checks for illegal behavior + assert(!(state === s_idle && io.inner.release.fire() && io.alloc.irel.should && !io.irel().isVoluntary()), + "VoluntaryReleaseTracker accepted Release that wasn't voluntary!") +} + +abstract class BroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters) + extends AcquireTracker(trackerId)(p) + with EmitsVoluntaryReleases + with BroadcastsToAllClients { + val io = new HierarchicalXactTrackerIO + pinAllReadyValidLow(io) + + val alwaysWriteFullBeat = false + val nSecondaryMisses = 1 + def iacq_can_merge = Bool(false) + + // Checks for illegal behavior + // TODO: this could be allowed, but is a useful check against allocation gone wild + assert(!(state === s_idle && io.inner.acquire.fire() && io.alloc.iacq.should && + io.iacq().hasMultibeatData() && !io.iacq().first()), + "AcquireTracker initialized with a tail data beat.") + + assert(!(state =/= s_idle && pending_ignt && xact_iacq.isPrefetch()), + "Broadcast Hub does not support Prefetches.") + + assert(!(state =/= s_idle && pending_ignt && xact_iacq.isAtomic()), + "Broadcast Hub does not support PutAtomics.") +} + +class BufferedBroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters) + extends BroadcastVoluntaryReleaseTracker(trackerId)(p) + with HasDataBuffer { + + // Tell the parent if any incoming messages conflict with the ongoing transaction + routeInParent(irelCanAlloc = Bool(true)) + + // Start transaction by accepting inner release + innerRelease(block_vol_ignt = pending_orel || vol_ognt_counter.pending) + + // A release beat can be accepted if we are idle, if its a mergeable transaction, or if its a tail beat + io.inner.release.ready := state === s_idle || irel_can_merge || irel_same_xact + + when(io.inner.release.fire()) { data_buffer(io.irel().addr_beat) := io.irel().data } + + // Dispatch outer release + outerRelease( + coh = outer_coh.onHit(M_XWR), + data = data_buffer(vol_ognt_counter.up.idx), + add_pending_send_bit = irel_is_allocating) + + quiesce() {} +} + +class BufferedBroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters) + extends BroadcastAcquireTracker(trackerId)(p) + with HasByteWriteMaskBuffer { + + // Setup IOs used for routing in the parent + routeInParent(iacqCanAlloc = Bool(true)) + + // First, take care of accpeting new acquires or secondary misses + // Handling of primary and secondary misses' data and write mask merging + innerAcquire( + can_alloc = Bool(false), + next = s_inner_probe) + + io.inner.acquire.ready := state === s_idle || iacq_can_merge || iacq_same_xact_multibeat + + // Track which clients yet need to be probed and make Probe message + // If a writeback occurs, we can forward its data via the buffer, + // and skip having to go outwards + val skip_outer_acquire = pending_ignt_data.andR + + innerProbe( + inner_coh.makeProbe(curr_probe_dst, xact_iacq, xact_addr_block), + Mux(!skip_outer_acquire, s_outer_acquire, s_busy)) + + // Handle incoming releases from clients, which may reduce sharer counts + // and/or write back dirty data, and may be unexpected voluntary releases + def irel_can_merge = io.irel().conflicts(xact_addr_block) && + io.irel().isVoluntary() && + !state.isOneOf(s_idle, s_meta_write) && + !all_pending_done && + !io.outer.grant.fire() && + !io.inner.grant.fire() && + !vol_ignt_counter.pending && + !blockInnerRelease() + + innerRelease(block_vol_ignt = vol_ognt_counter.pending) + + //TODO: accept vol irels when state === s_idle, operate like the VolRelTracker + io.inner.release.ready := irel_can_merge || irel_same_xact + + mergeDataInner(io.inner.release) + + // If there was a writeback, forward it outwards + outerRelease( + coh = outer_coh.onHit(M_XWR), + data = data_buffer(vol_ognt_counter.up.idx)) + + // Send outer request for miss + outerAcquire( + caching = !xact_iacq.isBuiltInType(), + coh = outer_coh, + data = data_buffer(ognt_counter.up.idx), + wmask = wmask_buffer(ognt_counter.up.idx), + next = s_busy) + + // Handle the response from outer memory + mergeDataOuter(io.outer.grant) + + // Acknowledge or respond with data + innerGrant( + data = data_buffer(ignt_data_idx), + external_pending = pending_orel || ognt_counter.pending || vol_ognt_counter.pending) + + when(iacq_is_allocating) { + initializeProbes() + } + + initDataInner(io.inner.acquire, iacq_is_allocating || iacq_is_merging) + + // Wait for everything to quiesce + quiesce() { clearWmaskBuffer() } +} diff --git a/uncore/src/main/scala/agents/Bufferless.scala b/uncore/src/main/scala/agents/Bufferless.scala new file mode 100644 index 00000000..5371d74a --- /dev/null +++ b/uncore/src/main/scala/agents/Bufferless.scala @@ -0,0 +1,162 @@ +// See LICENSE for license details. + +package uncore.agents + +import Chisel._ +import uncore.coherence._ +import uncore.tilelink._ +import uncore.constants._ +import cde.Parameters + + +class BufferlessBroadcastHub(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) { + + // Create TSHRs for outstanding transactions + val irelTrackerList = + (0 until nReleaseTransactors).map(id => + Module(new BufferlessBroadcastVoluntaryReleaseTracker(id))) + val iacqTrackerList = + (nReleaseTransactors until nTransactors).map(id => + Module(new BufferlessBroadcastAcquireTracker(id))) + val trackerList = irelTrackerList ++ iacqTrackerList + + // Propagate incoherence flags + trackerList.map(_.io.incoherent) foreach { _ := io.incoherent } + + // Create an arbiter for the one memory port + val outerList = trackerList.map(_.io.outer) + val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size) + (p.alterPartial({ case TLId => p(OuterTLId) }))) + outer_arb.io.in <> outerList + io.outer <> outer_arb.io.out + + val iacq = Queue(io.inner.acquire, 1, pipe=true) + val irel = Queue(io.inner.release, 1, pipe=true) + + // Handle acquire transaction initiation + val irel_vs_iacq_conflict = + iacq.valid && + irel.valid && + irel.bits.conflicts(iacq.bits) + + doInputRoutingWithAllocation( + in = iacq, + outs = trackerList.map(_.io.inner.acquire), + allocs = trackerList.map(_.io.alloc.iacq), + allocOverride = Some(!irel_vs_iacq_conflict)) + io.outer.acquire.bits.data := iacq.bits.data + when (io.oacq().hasData()) { + io.outer.acquire.bits.addr_beat := iacq.bits.addr_beat + } + + // Handle releases, which might be voluntary and might have data + doInputRoutingWithAllocation( + in = irel, + outs = trackerList.map(_.io.inner.release), + allocs = trackerList.map(_.io.alloc.irel)) + io.outer.release.bits.data := irel.bits.data + when (io.orel().hasData()) { + io.outer.release.bits.addr_beat := irel.bits.addr_beat + } + + // Wire probe requests and grant reply to clients, finish acks from clients + doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe)) + + doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant)) + io.inner.grant.bits.data := io.outer.grant.bits.data + io.inner.grant.bits.addr_beat := io.outer.grant.bits.addr_beat + + doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish)) + + disconnectOuterProbeAndFinish() +} + +class BufferlessBroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters) + extends BroadcastVoluntaryReleaseTracker(trackerId)(p) { + + // Tell the parent if any incoming messages conflict with the ongoing transaction + routeInParent(irelCanAlloc = Bool(true)) + + // Start transaction by accepting inner release + innerRelease(block_vol_ignt = pending_orel || vol_ognt_counter.pending) + + // A release beat can be accepted if we are idle, if its a mergeable transaction, or if its a tail beat + // and if the outer relase path is clear + io.inner.release.ready := Mux(io.irel().hasData(), + (state =/= s_idle) && (irel_can_merge || irel_same_xact) && io.outer.release.ready, + (state === s_idle) || irel_can_merge || irel_same_xact) + + // Dispatch outer release + outerRelease(coh = outer_coh.onHit(M_XWR), buffering = Bool(false)) + + quiesce() {} +} + +class BufferlessBroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters) + extends BroadcastAcquireTracker(trackerId)(p) { + + // Setup IOs used for routing in the parent + routeInParent(iacqCanAlloc = Bool(true)) + + // First, take care of accpeting new acquires or secondary misses + // Handling of primary and secondary misses' data and write mask merging + innerAcquire( + can_alloc = Bool(false), + next = s_inner_probe) + + // We are never going to merge anything in the bufferless hub + // Therefore, we only need to concern ourselves with the allocated + // transaction and (in case of PutBlock) subsequent tail beats + val iacq_can_forward = iacq_same_xact && !vol_ognt_counter.pending + io.inner.acquire.ready := Mux(io.iacq().hasData(), + state === s_outer_acquire && iacq_can_forward && io.outer.acquire.ready, + state === s_idle && io.alloc.iacq.should) + + // Track which clients yet need to be probed and make Probe message + innerProbe( + inner_coh.makeProbe(curr_probe_dst, xact_iacq, xact_addr_block), + s_outer_acquire) + + // Handle incoming releases from clients, which may reduce sharer counts + // and/or write back dirty data, and may be unexpected voluntary releases + def irel_can_merge = io.irel().conflicts(xact_addr_block) && + io.irel().isVoluntary() && + !vol_ignt_counter.pending && + !(io.irel().hasData() && ognt_counter.pending) && + (state =/= s_idle) + + innerRelease(block_vol_ignt = vol_ognt_counter.pending) + + val irel_could_accept = irel_can_merge || irel_same_xact + io.inner.release.ready := irel_could_accept && + (!io.irel().hasData() || io.outer.release.ready) + + // If there was a writeback, forward it outwards + outerRelease( + coh = outer_coh.onHit(M_XWR), + buffering = Bool(false), + block_orel = !irel_could_accept) + + // Send outer request for miss + outerAcquire( + caching = !xact_iacq.isBuiltInType(), + block_outer_acquire = vol_ognt_counter.pending, + buffering = Bool(false), + coh = outer_coh, + next = s_busy) + + // Handle the response from outer memory + when (ognt_counter.pending && io.ognt().hasData()) { + io.outer.grant.ready := io.inner.grant.ready // bypass data + } + + // Acknowledge or respond with data + innerGrant( + external_pending = pending_orel || vol_ognt_counter.pending, + buffering = Bool(false)) + + when(iacq_is_allocating) { initializeProbes() } + + // Wait for everything to quiesce + quiesce() {} +} diff --git a/uncore/src/main/scala/agents/Cache.scala b/uncore/src/main/scala/agents/Cache.scala new file mode 100644 index 00000000..80f8f8dd --- /dev/null +++ b/uncore/src/main/scala/agents/Cache.scala @@ -0,0 +1,1146 @@ +// See LICENSE for license details. + +package uncore.agents + +import Chisel._ +import scala.reflect.ClassTag +import junctions._ +import uncore.util.AMOALU +import uncore.coherence._ +import uncore.tilelink._ +import uncore.constants._ +import uncore.Util._ +import cde.{Parameters, Field} + +case object CacheName extends Field[String] +case object NSets extends Field[Int] +case object NWays extends Field[Int] +case object RowBits extends Field[Int] +case object Replacer extends Field[() => ReplacementPolicy] +case object L2Replacer extends Field[() => SeqReplacementPolicy] +case object NPrimaryMisses extends Field[Int] +case object NSecondaryMisses extends Field[Int] +case object CacheBlockBytes extends Field[Int] +case object ECCCode extends Field[Option[Code]] +case object CacheIdBits extends Field[Int] +case object CacheId extends Field[Int] +case object SplitMetadata extends Field[Boolean] + +trait HasCacheParameters { + implicit val p: Parameters + val nSets = p(NSets) + val blockOffBits = p(CacheBlockOffsetBits) + val cacheIdBits = p(CacheIdBits) + val idxBits = log2Up(nSets) + val untagBits = blockOffBits + cacheIdBits + idxBits + val tagBits = p(PAddrBits) - untagBits + val nWays = p(NWays) + val wayBits = log2Up(nWays) + val isDM = nWays == 1 + val rowBits = p(RowBits) + val rowBytes = rowBits/8 + val rowOffBits = log2Up(rowBytes) + val code = p(ECCCode).getOrElse(new IdentityCode) + val hasSplitMetadata = p(SplitMetadata) +} + +abstract class CacheModule(implicit val p: Parameters) extends Module + with HasCacheParameters +abstract class CacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) + with HasCacheParameters + +abstract class ReplacementPolicy { + def way: UInt + def miss: Unit + def hit: Unit +} + +class RandomReplacement(ways: Int) extends ReplacementPolicy { + private val replace = Wire(Bool()) + replace := Bool(false) + val lfsr = LFSR16(replace) + + def way = if(ways == 1) UInt(0) else lfsr(log2Up(ways)-1,0) + def miss = replace := Bool(true) + def hit = {} +} + +abstract class SeqReplacementPolicy { + def access(set: UInt): Unit + def update(valid: Bool, hit: Bool, set: UInt, way: UInt): Unit + def way: UInt +} + +class SeqRandom(n_ways: Int) extends SeqReplacementPolicy { + val logic = new RandomReplacement(n_ways) + def access(set: UInt) = { } + def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = { + when (valid && !hit) { logic.miss } + } + def way = logic.way +} + +class PseudoLRU(n: Int) +{ + require(isPow2(n)) + val state_reg = Reg(Bits(width = n)) + def access(way: UInt) { + state_reg := get_next_state(state_reg,way) + } + def get_next_state(state: UInt, way: UInt) = { + var next_state = state + var idx = UInt(1,1) + for (i <- log2Up(n)-1 to 0 by -1) { + val bit = way(i) + next_state = next_state.bitSet(idx, !bit) + idx = Cat(idx, bit) + } + next_state + } + def replace = get_replace_way(state_reg) + def get_replace_way(state: Bits) = { + var idx = UInt(1,1) + for (i <- 0 until log2Up(n)) + idx = Cat(idx, state(idx)) + idx(log2Up(n)-1,0) + } +} + +class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy { + val state = SeqMem(n_sets, Bits(width = n_ways-1)) + val logic = new PseudoLRU(n_ways) + val current_state = Wire(Bits()) + val plru_way = logic.get_replace_way(current_state) + val next_state = Wire(Bits()) + + def access(set: UInt) = { + current_state := Cat(state.read(set), Bits(0, width = 1)) + } + + def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = { + val update_way = Mux(hit, way, plru_way) + next_state := logic.get_next_state(current_state, update_way) + when (valid) { state.write(set, next_state(n_ways-1,1)) } + } + + def way = plru_way +} + +abstract class Metadata(implicit p: Parameters) extends CacheBundle()(p) { + val tag = Bits(width = tagBits) + val coh: CoherenceMetadata +} + +class MetaReadReq(implicit p: Parameters) extends CacheBundle()(p) { + val idx = Bits(width = idxBits) + val way_en = Bits(width = nWays) +} + +class MetaWriteReq[T <: Metadata](gen: T)(implicit p: Parameters) extends MetaReadReq()(p) { + val data = gen.cloneType + override def cloneType = new MetaWriteReq(gen)(p).asInstanceOf[this.type] +} + +class MetadataArray[T <: Metadata](onReset: () => T)(implicit p: Parameters) extends CacheModule()(p) { + val rstVal = onReset() + val io = new Bundle { + val read = Decoupled(new MetaReadReq).flip + val write = Decoupled(new MetaWriteReq(rstVal)).flip + val resp = Vec(nWays, rstVal.cloneType).asOutput + } + val rst_cnt = Reg(init=UInt(0, log2Up(nSets+1))) + val rst = rst_cnt < UInt(nSets) + val waddr = Mux(rst, rst_cnt, io.write.bits.idx) + val wdata = Mux(rst, rstVal, io.write.bits.data).toBits + val wmask = Mux(rst || Bool(nWays == 1), SInt(-1), io.write.bits.way_en.toSInt).toBools + val rmask = Mux(rst || Bool(nWays == 1), SInt(-1), io.read.bits.way_en.toSInt).toBools + when (rst) { rst_cnt := rst_cnt+UInt(1) } + + val metabits = rstVal.getWidth + + if (hasSplitMetadata) { + val tag_arrs = List.fill(nWays){ SeqMem(nSets, UInt(width = metabits)) } + val tag_readout = Wire(Vec(nWays,rstVal.cloneType)) + val tags_vec = Wire(Vec(nWays, UInt(width = metabits))) + (0 until nWays).foreach { (i) => + when (rst || (io.write.valid && wmask(i))) { + tag_arrs(i).write(waddr, wdata) + } + tags_vec(i) := tag_arrs(i).read(io.read.bits.idx, io.read.valid && rmask(i)) + } + io.resp := io.resp.fromBits(tags_vec.toBits) + } else { + val tag_arr = SeqMem(nSets, Vec(nWays, UInt(width = metabits))) + when (rst || io.write.valid) { + tag_arr.write(waddr, Vec.fill(nWays)(wdata), wmask) + } + val tags = tag_arr.read(io.read.bits.idx, io.read.valid).toBits + io.resp := io.resp.fromBits(tags) + } + + io.read.ready := !rst && !io.write.valid // so really this could be a 6T RAM + io.write.ready := !rst +} + +case object L2DirectoryRepresentation extends Field[DirectoryRepresentation] + +trait HasOuterCacheParameters extends HasCacheParameters with HasCoherenceAgentParameters { + val cacheId = p(CacheId) + val idxLSB = cacheIdBits + val idxMSB = idxLSB + idxBits - 1 + val tagLSB = idxLSB + idxBits + val tagMSB = tagLSB + tagBits - 1 + + def inSameSet(block_a: HasCacheBlockAddress, block_b: HasCacheBlockAddress): Bool = + inSameSet(block_a, block_b.addr_block) + + def inSameSet(block: HasCacheBlockAddress, addr: UInt): Bool = + inSet(block, addr(idxMSB, idxLSB)) + + def inSet(block: HasCacheBlockAddress, idx: UInt): Bool = + block.addr_block(idxMSB,idxLSB) === idx + + def haveSameTag(block: HasCacheBlockAddress, addr: UInt): Bool = + hasTag(block, addr(tagMSB, tagLSB)) + + def hasTag(block: HasCacheBlockAddress, tag: UInt): Bool = + block.addr_block(tagMSB, tagLSB) === tag + + def isSameBlock(block: HasCacheBlockAddress, tag: UInt, idx: UInt) = + hasTag(block, tag) && inSet(block, idx) + + //val blockAddrBits = p(TLBlockAddrBits) + val refillCyclesPerBeat = outerDataBits/rowBits + val refillCycles = refillCyclesPerBeat*outerDataBeats + val internalDataBeats = p(CacheBlockBytes)*8/rowBits + require(refillCyclesPerBeat == 1) + val amoAluOperandBits = p(AmoAluOperandBits) + require(amoAluOperandBits <= innerDataBits) + require(rowBits == innerDataBits) // TODO: relax this by improving s_data_* states + val nSecondaryMisses = p(NSecondaryMisses) + val isLastLevelCache = true + val alwaysWriteFullBeat = !p(ECCCode).isEmpty +} + +abstract class L2HellaCacheModule(implicit val p: Parameters) extends Module + with HasOuterCacheParameters { + def doInternalOutputArbitration[T <: Data : ClassTag]( + out: DecoupledIO[T], + ins: Seq[DecoupledIO[T]], + block_transfer: T => Bool = (t: T) => Bool(false)) { + val arb = Module(new RRArbiter(out.bits, ins.size)) + out.valid := arb.io.out.valid && !block_transfer(arb.io.out.bits) + out.bits := arb.io.out.bits + arb.io.out.ready := out.ready && !block_transfer(arb.io.out.bits) + arb.io.in <> ins + } + + def doInternalInputRouting[T <: Bundle with HasL2Id](in: ValidIO[T], outs: Seq[ValidIO[T]]) { + outs.map(_.bits := in.bits) + outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && in.bits.id === UInt(i) } + } +} + +abstract class L2HellaCacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) + with HasOuterCacheParameters + +trait HasL2Id extends HasCoherenceAgentParameters { + val id = UInt(width = log2Up(nTransactors + 1)) +} + +trait HasL2InternalRequestState extends HasOuterCacheParameters { + val tag_match = Bool() + val meta = new L2Metadata + val way_en = Bits(width = nWays) +} + +trait HasL2BeatAddr extends HasOuterCacheParameters { + val addr_beat = UInt(width = log2Up(refillCycles)) +} + +trait HasL2Data extends HasOuterCacheParameters + with HasL2BeatAddr { + val data = UInt(width = rowBits) + def hasData(dummy: Int = 0) = Bool(true) + def hasMultibeatData(dummy: Int = 0) = Bool(refillCycles > 1) +} + +class L2Metadata(implicit p: Parameters) extends Metadata()(p) with HasOuterCacheParameters { + val coh = new HierarchicalMetadata +} + +object L2Metadata { + def apply(tag: Bits, coh: HierarchicalMetadata) + (implicit p: Parameters): L2Metadata = { + val meta = Wire(new L2Metadata) + meta.tag := tag + meta.coh := coh + meta + } + + def apply( + tag: Bits, + inner: ManagerMetadata, + outer: ClientMetadata)(implicit p: Parameters): L2Metadata = { + val coh = Wire(new HierarchicalMetadata) + coh.inner := inner + coh.outer := outer + apply(tag, coh) + } +} + +class L2MetaReadReq(implicit p: Parameters) extends MetaReadReq()(p) with HasL2Id { + val tag = Bits(width = tagBits) +} + +class L2MetaWriteReq(implicit p: Parameters) extends MetaWriteReq[L2Metadata](new L2Metadata)(p) + with HasL2Id { + override def cloneType = new L2MetaWriteReq().asInstanceOf[this.type] +} + +class L2MetaResp(implicit p: Parameters) extends L2HellaCacheBundle()(p) + with HasL2Id + with HasL2InternalRequestState + +trait HasL2MetaReadIO extends HasOuterCacheParameters { + val read = Decoupled(new L2MetaReadReq) + val resp = Valid(new L2MetaResp).flip +} + +trait HasL2MetaWriteIO extends HasOuterCacheParameters { + val write = Decoupled(new L2MetaWriteReq) +} + +class L2MetaRWIO(implicit p: Parameters) extends L2HellaCacheBundle()(p) + with HasL2MetaReadIO + with HasL2MetaWriteIO + +class L2MetaReadOnlyIO(implicit p: Parameters) extends L2HellaCacheBundle()(p) + with HasL2MetaReadIO + +trait HasL2MetaRWIO extends HasOuterCacheParameters { + val meta = new L2MetaRWIO +} + +class L2MetadataArray(implicit p: Parameters) extends L2HellaCacheModule()(p) { + val io = new L2MetaRWIO().flip + + def onReset = L2Metadata(UInt(0), HierarchicalMetadata.onReset) + val meta = Module(new MetadataArray(onReset _)) + meta.io.read <> io.read + meta.io.write <> io.write + val way_en_1h = (Vec.fill(nWays){Bool(true)}).toBits + val s1_way_en_1h = RegEnable(way_en_1h, io.read.valid) + meta.io.read.bits.way_en := way_en_1h + + val s1_tag = RegEnable(io.read.bits.tag, io.read.valid) + val s1_id = RegEnable(io.read.bits.id, io.read.valid) + def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f)) + val s1_clk_en = Reg(next = io.read.fire()) + val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === s1_tag) + val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta.io.resp(w).coh.outer.isValid() && s1_way_en_1h(w).toBool).toBits + val s1_idx = RegEnable(io.read.bits.idx, io.read.valid) // deal with stalls? + val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en) + val s2_tag_match = s2_tag_match_way.orR + val s2_hit_coh = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en))) + + val replacer = p(L2Replacer)() + val s1_hit_way = Wire(Bits()) + s1_hit_way := Bits(0) + (0 until nWays).foreach(i => when (s1_tag_match_way(i)) { s1_hit_way := Bits(i) }) + replacer.access(io.read.bits.idx) + replacer.update(s1_clk_en, s1_tag_match_way.orR, s1_idx, s1_hit_way) + + val s1_replaced_way_en = UIntToOH(replacer.way) + val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en)) + val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => + RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) + + io.resp.valid := Reg(next = s1_clk_en) + io.resp.bits.id := RegEnable(s1_id, s1_clk_en) + io.resp.bits.tag_match := s2_tag_match + io.resp.bits.meta := Mux(s2_tag_match, + L2Metadata(s2_repl_meta.tag, s2_hit_coh), + s2_repl_meta) + io.resp.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) +} + +class L2DataReadReq(implicit p: Parameters) extends L2HellaCacheBundle()(p) + with HasL2BeatAddr + with HasL2Id { + val addr_idx = UInt(width = idxBits) + val way_en = Bits(width = nWays) +} + +object L2DataReadReq { + def apply( + id: UInt, + way_en: UInt, + addr_idx: UInt, + addr_beat: UInt)(implicit p: Parameters) = { + val req = Wire(new L2DataReadReq) + req.id := id + req.way_en := way_en + req.addr_idx := addr_idx + req.addr_beat := addr_beat + req + } +} + +class L2DataWriteReq(implicit p: Parameters) extends L2DataReadReq()(p) + with HasL2Data { + val wmask = Bits(width = rowBits/8) +} + +object L2DataWriteReq { + def apply( + id: UInt, + way_en: UInt, + addr_idx: UInt, + addr_beat: UInt, + wmask: UInt, + data: UInt)(implicit p: Parameters) = { + val req = Wire(new L2DataWriteReq) + req.id := id + req.way_en := way_en + req.addr_idx := addr_idx + req.addr_beat := addr_beat + req.wmask := wmask + req.data := data + req + } +} + +class L2DataResp(implicit p: Parameters) extends L2HellaCacheBundle()(p) + with HasL2Id + with HasL2Data + +trait HasL2DataReadIO extends HasOuterCacheParameters { + val read = Decoupled(new L2DataReadReq) + val resp = Valid(new L2DataResp).flip +} + +trait HasL2DataWriteIO extends HasOuterCacheParameters { + val write = Decoupled(new L2DataWriteReq) +} + +class L2DataRWIO(implicit p: Parameters) extends L2HellaCacheBundle()(p) + with HasL2DataReadIO + with HasL2DataWriteIO + +trait HasL2DataRWIO extends HasOuterCacheParameters { + val data = new L2DataRWIO +} + +class L2DataArray(delay: Int)(implicit p: Parameters) extends L2HellaCacheModule()(p) { + val io = new L2DataRWIO().flip + + val array = SeqMem(nWays*nSets*refillCycles, Vec(rowBits/8, Bits(width=8))) + val ren = !io.write.valid && io.read.valid + val raddr = Cat(OHToUInt(io.read.bits.way_en), io.read.bits.addr_idx, io.read.bits.addr_beat) + val waddr = Cat(OHToUInt(io.write.bits.way_en), io.write.bits.addr_idx, io.write.bits.addr_beat) + val wdata = Vec.tabulate(rowBits/8)(i => io.write.bits.data(8*(i+1)-1,8*i)) + val wmask = io.write.bits.wmask.toBools + when (io.write.valid) { array.write(waddr, wdata, wmask) } + + val r_req = Pipe(io.read.fire(), io.read.bits) + io.resp := Pipe(r_req.valid, r_req.bits, delay) + io.resp.bits.data := Pipe(r_req.valid, array.read(raddr, ren).toBits, delay).bits + io.read.ready := !io.write.valid + io.write.ready := Bool(true) +} + +class L2HellaCacheBank(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) + with HasOuterCacheParameters { + require(isPow2(nSets)) + require(isPow2(nWays)) + + val meta = Module(new L2MetadataArray) // TODO: add delay knob + val data = Module(new L2DataArray(1)) + val tshrfile = Module(new TSHRFile) + io.inner <> tshrfile.io.inner + io.outer <> tshrfile.io.outer + tshrfile.io.incoherent <> io.incoherent + meta.io <> tshrfile.io.meta + data.io <> tshrfile.io.data + + disconnectOuterProbeAndFinish() +} + +class TSHRFileIO(implicit p: Parameters) extends HierarchicalTLIO()(p) + with HasL2MetaRWIO + with HasL2DataRWIO + +class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p) + with HasCoherenceAgentWiringHelpers { + val io = new TSHRFileIO + + // Create TSHRs for outstanding transactions + val irelTrackerList = + (0 until nReleaseTransactors).map(id => + Module(new CacheVoluntaryReleaseTracker(id))) + val iacqTrackerList = + (nReleaseTransactors until nTransactors).map(id => + Module(new CacheAcquireTracker(id))) + val trackerList = irelTrackerList ++ iacqTrackerList + + // Don't allow a writeback request to go through if we are taking + // a voluntary release for the same block. + // The writeback can go forward once the voluntary release is handled + def writebackConflictsWithVolRelease(wb: L2WritebackReq): Bool = + irelTrackerList + .map(tracker => + !tracker.io.alloc.idle && + isSameBlock(tracker.io.alloc, wb.tag, wb.idx)) + .reduce(_ || _) || + (io.inner.release.valid && + isSameBlock(io.inner.release.bits, wb.tag, wb.idx)) + + // WritebackUnit evicts data from L2, including invalidating L1s + val wb = Module(new L2WritebackUnit(nTransactors)) + val trackerAndWbIOs = trackerList.map(_.io) :+ wb.io + doInternalOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req), + block_transfer = writebackConflictsWithVolRelease _) + doInternalInputRouting(wb.io.wb.resp, trackerList.map(_.io.wb.resp)) + + // Propagate incoherence flags + (trackerList.map(_.io.incoherent) :+ wb.io.incoherent) foreach { _ := io.incoherent } + + // Handle acquire transaction initiation + val irel_vs_iacq_conflict = + io.inner.acquire.valid && + io.inner.release.valid && + inSameSet(io.inner.acquire.bits, io.inner.release.bits) + doInputRoutingWithAllocation( + in = io.inner.acquire, + outs = trackerList.map(_.io.inner.acquire), + allocs = trackerList.map(_.io.alloc.iacq), + allocOverride = Some(!irel_vs_iacq_conflict)) + + assert(PopCount(trackerList.map(_.io.alloc.iacq.should)) <= UInt(1), + "At most a single tracker should now be allocated for any given Acquire") + + // Wire releases from clients + doInputRoutingWithAllocation( + in = io.inner.release, + outs = trackerAndWbIOs.map(_.inner.release), + allocs = trackerAndWbIOs.map(_.alloc.irel)) + + assert(PopCount(trackerAndWbIOs.map(_.alloc.irel.should)) <= UInt(1), + "At most a single tracker should now be allocated for any given Release") + + // Wire probe requests and grant reply to clients, finish acks from clients + doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe) :+ wb.io.inner.probe) + doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant) :+ wb.io.inner.grant) + doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish)) + + // Create an arbiter for the one memory port + val outerList = trackerList.map(_.io.outer) :+ wb.io.outer + val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size) + (p.alterPartial({ case TLId => p(OuterTLId)}))) + outer_arb.io.in <> outerList + io.outer <> outer_arb.io.out + + // Wire local memory arrays + doInternalOutputArbitration(io.meta.read, trackerList.map(_.io.meta.read) :+ wb.io.meta.read) + doInternalOutputArbitration(io.meta.write, trackerList.map(_.io.meta.write)) + doInternalOutputArbitration(io.data.read, trackerList.map(_.io.data.read) :+ wb.io.data.read) + doInternalOutputArbitration(io.data.write, trackerList.map(_.io.data.write)) + doInternalInputRouting(io.meta.resp, trackerList.map(_.io.meta.resp) :+ wb.io.meta.resp) + doInternalInputRouting(io.data.resp, trackerList.map(_.io.data.resp) :+ wb.io.data.resp) +} + + +class L2XactTrackerIO(implicit p: Parameters) extends HierarchicalXactTrackerIO()(p) + with HasL2DataRWIO + with HasL2MetaRWIO + with HasL2WritebackIO + +trait HasRowBeatCounters extends HasOuterCacheParameters with HasPendingBitHelpers { + def mergeData(dataBits: Int)(beat: UInt, incoming: UInt): Unit + + def connectDataBeatCounter[S <: L2HellaCacheBundle](inc: Bool, data: S, beat: UInt, full_block: Bool) = { + if(data.refillCycles > 1) { + val (multi_cnt, multi_done) = Counter(full_block && inc, data.refillCycles) + (Mux(!full_block, beat, multi_cnt), Mux(!full_block, inc, multi_done)) + } else { (UInt(0), inc) } + } + + def connectInternalDataBeatCounter[T <: L2HellaCacheBundle with HasL2BeatAddr]( + in: DecoupledIO[T], + beat: UInt = UInt(0), + full_block: Bool = Bool(true)): (UInt, Bool) = { + connectDataBeatCounter(in.fire(), in.bits, beat, full_block) + } + + def connectInternalDataBeatCounter[T <: L2HellaCacheBundle with HasL2Data]( + in: ValidIO[T], + full_block: Bool): Bool = { + connectDataBeatCounter(in.valid, in.bits, UInt(0), full_block)._2 + } + + def addPendingBitInternal[T <: L2HellaCacheBundle with HasL2BeatAddr](in: DecoupledIO[T]) = + Fill(in.bits.refillCycles, in.fire()) & UIntToOH(in.bits.addr_beat) + + def addPendingBitInternal[T <: L2HellaCacheBundle with HasL2BeatAddr](in: ValidIO[T]) = + Fill(in.bits.refillCycles, in.valid) & UIntToOH(in.bits.addr_beat) + + def dropPendingBit[T <: L2HellaCacheBundle with HasL2BeatAddr] (in: DecoupledIO[T]) = + ~Fill(in.bits.refillCycles, in.fire()) | ~UIntToOH(in.bits.addr_beat) + + def dropPendingBitInternal[T <: L2HellaCacheBundle with HasL2BeatAddr] (in: ValidIO[T]) = + ~Fill(in.bits.refillCycles, in.valid) | ~UIntToOH(in.bits.addr_beat) + + // TODO: Deal with the possibility that rowBits != tlDataBits + def mergeDataInternal[T <: L2HellaCacheBundle with HasL2Data with HasL2BeatAddr](in: ValidIO[T]) { + when(in.valid) { mergeData(rowBits)(in.bits.addr_beat, in.bits.data) } + } +} + +trait ReadsFromOuterCacheDataArray extends HasCoherenceMetadataBuffer + with HasRowBeatCounters + with HasDataBuffer { + def io: HasL2DataRWIO + + val pending_reads = Reg(init=Bits(0, width = innerDataBeats)) + val pending_resps = Reg(init=Bits(0, width = innerDataBeats)) + val curr_read_beat = PriorityEncoder(pending_reads) + + def readDataArray(drop_pending_bit: UInt, + add_pending_bit: UInt = UInt(0), + block_pending_read: Bool = Bool(false), + can_update_pending: Bool = Bool(true)) { + val port = io.data + when (can_update_pending) { + pending_reads := (pending_reads & + dropPendingBit(port.read) & drop_pending_bit) | + add_pending_bit + } + port.read.valid := state === s_busy && pending_reads.orR && !block_pending_read + port.read.bits := L2DataReadReq( + id = UInt(trackerId), + way_en = xact_way_en, + addr_idx = xact_addr_idx, + addr_beat = curr_read_beat) + + pending_resps := (pending_resps & dropPendingBitInternal(port.resp)) | + addPendingBitInternal(port.read) + + scoreboard += (pending_reads.orR, pending_resps.orR) + + mergeDataInternal(port.resp) + } +} + +trait WritesToOuterCacheDataArray extends HasCoherenceMetadataBuffer + with HasRowBeatCounters + with HasDataBuffer { + def io: HasL2DataRWIO + + val pending_writes = Reg(init=Bits(0, width = innerDataBeats)) + val curr_write_beat = PriorityEncoder(pending_writes) + + def writeDataArray(add_pending_bit: UInt = UInt(0), + block_pending_write: Bool = Bool(false), + can_update_pending: Bool = Bool(true)) { + val port = io.data + when (can_update_pending) { + pending_writes := (pending_writes & dropPendingBit(port.write)) | + add_pending_bit + } + port.write.valid := state === s_busy && pending_writes.orR && !block_pending_write + port.write.bits := L2DataWriteReq( + id = UInt(trackerId), + way_en = xact_way_en, + addr_idx = xact_addr_idx, + addr_beat = curr_write_beat, + wmask = ~UInt(0, port.write.bits.wmask.getWidth), + data = data_buffer(curr_write_beat)) + + scoreboard += pending_writes.orR + } +} + +trait HasAMOALU extends HasAcquireMetadataBuffer + with HasByteWriteMaskBuffer + with HasRowBeatCounters { + val io: L2XactTrackerIO + + // Provide a single ALU per tracker to merge Puts and AMOs with data being + // refilled, written back, or extant in the cache + val amoalu = Module(new AMOALU(rhsIsAligned = true)) + val amo_result = Reg(init = UInt(0, innerDataBits)) + + def initializeAMOALUIOs() { + amoalu.io.addr := Cat(xact_addr_block, xact_addr_beat, xact_addr_byte) + amoalu.io.cmd := xact_op_code + amoalu.io.typ := xact_op_size + amoalu.io.lhs := io.data.resp.bits.data // default, overwritten by calls to mergeData + amoalu.io.rhs := data_buffer.head // default, overwritten by calls to mergeData + } + + // Utility function for applying any buffered stored data to the cache line + // before storing it back into the data array + override def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) { + val old_data = incoming // Refilled, written back, or de-cached data + val new_data = data_buffer(beat) // Newly Put data is already in the buffer + val amo_shift_bits = xact_amo_shift_bytes << 3 + amoalu.io.lhs := old_data >> amo_shift_bits + amoalu.io.rhs := new_data >> amo_shift_bits + val wmask = FillInterleaved(8, wmask_buffer(beat)) + data_buffer(beat) := ~wmask & old_data | + wmask & Mux(xact_iacq.isAtomic(), amoalu.io.out << amo_shift_bits, new_data) + when(xact_iacq.isAtomic() && xact_addr_beat === beat) { amo_result := old_data } + } +} + +trait HasCoherenceMetadataBuffer extends HasOuterCacheParameters + with HasBlockAddressBuffer + with HasXactTrackerStates { + def trackerId: Int + + val xact_way_en = Reg{ Bits(width = nWays) } + val xact_old_meta = Reg{ new L2Metadata } + val pending_coh = Reg{ xact_old_meta.coh } + val pending_meta_write = Reg{ Bool() } // pending_meta_write has own state (s_meta_write) + + val inner_coh = pending_coh.inner + val outer_coh = pending_coh.outer + + val xact_addr_idx = xact_addr_block(idxMSB,idxLSB) + val xact_addr_tag = xact_addr_block >> UInt(tagLSB) + + // Utility function for updating the metadata that will be kept in this cache + def updatePendingCohWhen(flag: Bool, next: HierarchicalMetadata) { + when(flag && pending_coh =/= next) { + pending_meta_write := Bool(true) + pending_coh := next + } + } + + def metaRead(port: HasL2MetaReadIO, next_state: UInt, way_en_known: Bool = Bool(false)) { + port.read.valid := state === s_meta_read + port.read.bits.id := UInt(trackerId) + port.read.bits.idx := xact_addr_idx + port.read.bits.tag := xact_addr_tag + port.read.bits.way_en := Mux(way_en_known, xact_way_en, ~UInt(0, nWays)) + + when(state === s_meta_read && port.read.ready) { state := s_meta_resp } + + when(state === s_meta_resp && port.resp.valid) { + xact_old_meta := port.resp.bits.meta + when (!way_en_known) { xact_way_en := port.resp.bits.way_en } + state := next_state + } + } + + def metaWrite(port: HasL2MetaWriteIO, to_write: L2Metadata, next_state: UInt) { + port.write.valid := state === s_meta_write + port.write.bits.id := UInt(trackerId) + port.write.bits.idx := xact_addr_idx + port.write.bits.way_en := xact_way_en + port.write.bits.data := to_write + + when(state === s_meta_write && port.write.ready) { state := next_state } + } +} + +trait TriggersWritebacks extends HasCoherenceMetadataBuffer { + def triggerWriteback(wb: L2WritebackIO, next_state: UInt) { + wb.req.valid := state === s_wb_req + wb.req.bits.id := UInt(trackerId) + wb.req.bits.idx := xact_addr_idx + wb.req.bits.tag := xact_old_meta.tag + wb.req.bits.way_en := xact_way_en + + when(state === s_wb_req && wb.req.ready) { state := s_wb_resp } + when(state === s_wb_resp && wb.resp.valid) { state := s_outer_acquire } + } +} + +class CacheVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters) + extends VoluntaryReleaseTracker(trackerId)(p) + with HasDataBuffer + with WritesToOuterCacheDataArray { + val io = new L2XactTrackerIO + pinAllReadyValidLow(io) + + // Avoid metatdata races with writebacks + routeInParent( + iacqMatches = inSameSet(_, xact_addr_block), + irelCanAlloc = Bool(true)) + + // Initialize and accept pending Release beats + innerRelease( + block_vol_ignt = pending_writes.orR, + next = s_meta_read) + + io.inner.release.ready := state === s_idle || irel_can_merge || irel_same_xact + + // Begin a transaction by getting the current block metadata + metaRead(io.meta, s_busy) + + // Write the voluntarily written back data to this cache + writeDataArray(add_pending_bit = addPendingBitWhenBeatHasData(io.inner.release), + can_update_pending = state =/= s_idle || io.alloc.irel.should) + + // End a transaction by updating the block metadata + metaWrite( + io.meta, + L2Metadata( + tag = xact_addr_tag, + inner = xact_old_meta.coh.inner.onRelease(xact_vol_irel), + outer = Mux(xact_vol_irel.hasData(), + xact_old_meta.coh.outer.onHit(M_XWR), + xact_old_meta.coh.outer)), + s_idle) + + when(io.inner.release.fire()) { data_buffer(io.irel().addr_beat) := io.irel().data } + + when(irel_is_allocating) { + pending_writes := addPendingBitWhenBeatHasData(io.inner.release) + } + + quiesce(s_meta_write) {} + + // Checks for illegal behavior + assert(!(state === s_meta_resp && io.meta.resp.valid && !io.meta.resp.bits.tag_match), + "VoluntaryReleaseTracker accepted Release for a block not resident in this cache!") +} + +class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters) + extends AcquireTracker(trackerId)(p) + with HasByteWriteMaskBuffer + with HasAMOALU + with TriggersWritebacks + with ReadsFromOuterCacheDataArray + with WritesToOuterCacheDataArray { + val io = new L2XactTrackerIO + pinAllReadyValidLow(io) + initializeAMOALUIOs() + + + val pending_coh_on_ognt = HierarchicalMetadata( + ManagerMetadata.onReset, + pending_coh.outer.onGrant(io.outer.grant.bits, xact_op_code)) + + val pending_coh_on_ignt = HierarchicalMetadata( + pending_coh.inner.onGrant(io.ignt()), + Mux(ognt_counter.down.done, + pending_coh_on_ognt.outer, + pending_coh.outer)) + + val pending_coh_on_irel = HierarchicalMetadata( + pending_coh.inner.onRelease(io.irel()), // Drop sharer + Mux(io.irel().hasData(), // Dirty writeback + pending_coh.outer.onHit(M_XWR), + pending_coh.outer)) + + val pending_coh_on_hit = HierarchicalMetadata( + io.meta.resp.bits.meta.coh.inner, + io.meta.resp.bits.meta.coh.outer.onHit(xact_op_code)) + + val pending_coh_on_miss = HierarchicalMetadata.onReset + + val before_wb_req = state.isOneOf(s_meta_read, s_meta_resp) + + routeInParent( + iacqMatches = inSameSet(_, xact_addr_block), + irelMatches = (irel: HasCacheBlockAddress) => + Mux(before_wb_req, inSameSet(irel, xact_addr_block), exactAddrMatch(irel)), + iacqCanAlloc = Bool(true)) + + // TileLink allows for Gets-under-Get + // and Puts-under-Put, and either may also merge with a preceding prefetch + // that requested the correct permissions (via op_code) + def acquiresAreMergeable(sec: AcquireMetadata): Bool = { + val allowedTypes = List((Acquire.getType, Acquire.getType), + (Acquire.putType, Acquire.putType), + (Acquire.putBlockType, Acquire.putBlockType), + (Acquire.getPrefetchType, Acquire.getPrefetchType), + (Acquire.putPrefetchType, Acquire.putPrefetchType), + (Acquire.getPrefetchType, Acquire.getType), + (Acquire.putPrefetchType, Acquire.putType), + (Acquire.putPrefetchType, Acquire.putBlockType)) + allowedTypes.map { case(a, b) => xact_iacq.isBuiltInType(a) && sec.isBuiltInType(b) }.reduce(_||_) && + xact_op_code === sec.op_code() && + sec.conflicts(xact_addr_block) && + xact_allocate + } + + // First, take care of accpeting new acquires or secondary misses + // Handling of primary and secondary misses' data and write mask merging + def iacq_can_merge = acquiresAreMergeable(io.iacq()) && + state =/= s_idle && + state =/= s_meta_resp && + state =/= s_meta_write && + !all_pending_done && + !io.inner.release.fire() && + !io.outer.grant.fire() && + !io.data.resp.valid && + ignt_q.io.enq.ready && ignt_q.io.deq.valid + + innerAcquire( + can_alloc = Bool(true), + next = s_meta_read) + + io.inner.acquire.ready := state === s_idle || iacq_can_merge || + iacq_same_xact_multibeat + + // Begin a transaction by getting the current block metadata + // Defined here because of Chisel default wire demands, used in s_meta_resp + val coh = io.meta.resp.bits.meta.coh + val tag_match = io.meta.resp.bits.tag_match + val is_hit = (if(!isLastLevelCache) tag_match && coh.outer.isHit(xact_op_code) + else tag_match && coh.outer.isValid()) + val needs_writeback = !tag_match && + xact_allocate && + (coh.outer.requiresVoluntaryWriteback() || + coh.inner.requiresProbesOnVoluntaryWriteback()) + val needs_inner_probes = tag_match && coh.inner.requiresProbes(xact_iacq) + val should_update_meta = !tag_match && xact_allocate || + is_hit && pending_coh_on_hit =/= coh + def full_representation = coh.inner.full() + + metaRead( + io.meta, + Mux(needs_writeback, s_wb_req, + Mux(needs_inner_probes, s_inner_probe, + Mux(!is_hit, s_outer_acquire, s_busy)))) + + updatePendingCohWhen( + io.meta.resp.valid, + Mux(is_hit, pending_coh_on_hit, + Mux(tag_match, coh, pending_coh_on_miss))) + + // Issue a request to the writeback unit + triggerWriteback(io.wb, s_outer_acquire) + + // Track which clients yet need to be probed and make Probe message + // If we're probing, we know the tag matches, so if this is the + // last level cache, we can use the data without upgrading permissions + val skip_outer_acquire = + (if(!isLastLevelCache) xact_old_meta.coh.outer.isHit(xact_op_code) + else xact_old_meta.coh.outer.isValid()) + + innerProbe( + inner_coh.makeProbe(curr_probe_dst, xact_iacq, xact_addr_block), + Mux(!skip_outer_acquire, s_outer_acquire, s_busy)) + + // Handle incoming releases from clients, which may reduce sharer counts + // and/or write back dirty data, and may be unexpected voluntary releases + + innerRelease() // Don't block on pending_writes because they won't happen until s_busy + + def irel_can_merge = io.irel().conflicts(xact_addr_block) && + io.irel().isVoluntary() && + !state.isOneOf(s_idle, s_meta_read, s_meta_resp, s_meta_write) && + !all_pending_done && + !io.outer.grant.fire() && + !io.inner.grant.fire() && + !vol_ignt_counter.pending + + io.inner.release.ready := irel_can_merge || irel_same_xact + + updatePendingCohWhen(io.inner.release.fire(), pending_coh_on_irel) + + mergeDataInner(io.inner.release) + + // Send outer request + outerAcquire( + caching = xact_allocate, + coh = xact_old_meta.coh.outer, // TODO outer_coh? + data = data_buffer(ognt_counter.up.idx), + wmask = wmask_buffer(ognt_counter.up.idx), + next = s_busy) + + // Handle the response from outer memory + updatePendingCohWhen(ognt_counter.down.done, pending_coh_on_ognt) + mergeDataOuter(io.outer.grant) + + // Send read request and get resp + // Going back to the original inner transaction: + // We read from the the cache at this level if data wasn't written back or refilled. + // We may still merge further Gets, requiring further beats to be read. + // If ECC requires a full writemask, we'll read out data on partial writes as well. + readDataArray( + drop_pending_bit = (dropPendingBitWhenBeatHasData(io.inner.release) & + dropPendingBitWhenBeatHasData(io.outer.grant)), + add_pending_bit = addPendingBitWhenBeatNeedsRead(io.inner.acquire, Bool(alwaysWriteFullBeat)), + block_pending_read = ognt_counter.pending, + can_update_pending = state =/= s_idle || io.alloc.irel.should) + + // No override for first accepted acquire + val alloc_override = xact_allocate && (state =/= s_idle) + + // Do write + // We write data to the cache at this level if it was Put here with allocate flag, + // written back dirty, or refilled from outer memory. + writeDataArray( + add_pending_bit = (addPendingBitWhenBeatHasDataAndAllocs(io.inner.acquire, alloc_override) | + addPendingBitWhenBeatHasData(io.inner.release) | + addPendingBitWhenBeatHasData(io.outer.grant, xact_allocate)), + block_pending_write = (ognt_counter.pending || + pending_put_data.orR || + pending_reads(curr_write_beat) || + pending_resps(curr_write_beat)), + can_update_pending = state =/= s_idle || io.alloc.iacq.should || io.alloc.irel.should) + + // Acknowledge or respond with data + innerGrant( + data = Mux(xact_iacq.isAtomic(), amo_result, data_buffer(ignt_data_idx)), + external_pending = pending_writes.orR || ognt_counter.pending, + add_pending_bits = addPendingBitInternal(io.data.resp)) + + updatePendingCohWhen(io.inner.grant.fire() && io.ignt().last(), pending_coh_on_ignt) + + // End a transaction by updating the block metadata + metaWrite(io.meta, L2Metadata(xact_addr_tag, pending_coh), s_idle) + + // Initialization of some scoreboard logic based on the original + // Acquire message on on the results of the metadata read: + when(state === s_meta_resp && io.meta.resp.valid) { + // If some kind of Put is marked no-allocate but is already in the cache, + // we need to write its data to the data array + when(is_hit && !xact_allocate && xact_iacq.hasData()) { + pending_writes := addPendingBitsFromAcquire(xact_iacq) + xact_allocate := Bool(true) + } + when (needs_inner_probes) { initializeProbes() } + pending_meta_write := should_update_meta //TODO what edge case was this covering? + } + + // Initialize more transaction metadata. Pla + when(iacq_is_allocating) { + amo_result := UInt(0) + pending_meta_write := Bool(false) + pending_reads := Mux( // Pick out the specific beats of data that need to be read + io.iacq().isBuiltInType(Acquire.getBlockType) || !io.iacq().isBuiltInType(), + ~UInt(0, width = innerDataBeats), + addPendingBitWhenBeatNeedsRead(io.inner.acquire, Bool(alwaysWriteFullBeat))) + pending_writes := addPendingBitWhenBeatHasDataAndAllocs(io.inner.acquire) + pending_resps := UInt(0) + } + + initDataInner(io.inner.acquire, iacq_is_allocating || iacq_is_merging) + + // Wait for everything to quiesce + quiesce(Mux(pending_meta_write, s_meta_write, s_idle)) { clearWmaskBuffer() } +} + +class L2WritebackReq(implicit p: Parameters) + extends L2HellaCacheBundle()(p) with HasL2Id { + val tag = Bits(width = tagBits) + val idx = Bits(width = idxBits) + val way_en = Bits(width = nWays) +} + +class L2WritebackResp(implicit p: Parameters) extends L2HellaCacheBundle()(p) with HasL2Id + +class L2WritebackIO(implicit p: Parameters) extends L2HellaCacheBundle()(p) { + val req = Decoupled(new L2WritebackReq) + val resp = Valid(new L2WritebackResp).flip +} + +trait HasL2WritebackIO extends HasOuterCacheParameters { + val wb = new L2WritebackIO() +} + +class L2WritebackUnitIO(implicit p: Parameters) + extends HierarchicalXactTrackerIO()(p) with HasL2DataRWIO { + val wb = new L2WritebackIO().flip() + val meta = new L2MetaReadOnlyIO +} + +class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p) + with AcceptsVoluntaryReleases + with EmitsVoluntaryReleases + with EmitsInnerProbes + with ReadsFromOuterCacheDataArray + with RoutesInParent { + val io = new L2WritebackUnitIO + pinAllReadyValidLow(io) + + val xact_id = Reg{ io.wb.req.bits.id } + + val pending_coh_on_irel = HierarchicalMetadata( + inner_coh.onRelease(io.irel()), // Drop sharer + Mux(io.irel().hasData(), // Dirty writeback + outer_coh.onHit(M_XWR), + outer_coh)) + + routeInParent() + + // Start the writeback sub-transaction + io.wb.req.ready := state === s_idle + + val coh = io.meta.resp.bits.meta.coh + val needs_inner_probes = coh.inner.requiresProbesOnVoluntaryWriteback() + val needs_outer_release = coh.outer.requiresVoluntaryWriteback() + def full_representation = coh.inner.full() + + // Even though we already read the metadata in the acquire tracker that + // sent the writeback request, we have to read it again in the writeback + // unit, since it may have been updated in the meantime. + metaRead(io.meta, + next_state = Mux(needs_inner_probes, s_inner_probe, s_busy), + way_en_known = Bool(true)) + + // Track which clients yet need to be probed and make Probe message + innerProbe( + inner_coh.makeProbeForVoluntaryWriteback(curr_probe_dst, xact_addr_block), + s_busy) + + // Handle incoming releases from clients, which may reduce sharer counts + // and/or write back dirty data + innerRelease() + + def irel_can_merge = io.irel().conflicts(xact_addr_block) && + io.irel().isVoluntary() && + !state.isOneOf(s_idle, s_meta_read, s_meta_resp) && + !(state === s_busy && all_pending_done) && + !vol_ignt_counter.pending && + !blockInnerRelease() + + io.inner.release.ready := irel_can_merge || irel_same_xact + + updatePendingCohWhen(io.inner.release.fire(), pending_coh_on_irel) + + mergeDataInner(io.inner.release) + + // If a release didn't write back data, have to read it from data array + readDataArray( + drop_pending_bit = dropPendingBitWhenBeatHasData(io.inner.release)) + + // Once the data is buffered we can write it back to outer memory + outerRelease( + coh = outer_coh, + data = data_buffer(vol_ognt_counter.up.idx), + add_pending_data_bits = addPendingBitInternal(io.data.resp), + add_pending_send_bit = io.meta.resp.valid && needs_outer_release) + + + // Respond to the initiating transaction handler signalling completion of the writeback + io.wb.resp.valid := state === s_busy && all_pending_done + io.wb.resp.bits.id := xact_id + + quiesce() {} + + // State machine updates and transaction handler metadata intialization + when(state === s_idle && io.wb.req.valid) { + xact_id := io.wb.req.bits.id + xact_way_en := io.wb.req.bits.way_en + xact_addr_block := (if (cacheIdBits == 0) Cat(io.wb.req.bits.tag, io.wb.req.bits.idx) + else Cat(io.wb.req.bits.tag, io.wb.req.bits.idx, UInt(cacheId, cacheIdBits))) + state := s_meta_read + } + + when (state === s_meta_resp && io.meta.resp.valid) { + pending_reads := Fill(innerDataBeats, needs_outer_release) + pending_coh := coh + when(needs_inner_probes) { initializeProbes() } + } + + assert(!io.meta.resp.valid || io.meta.resp.bits.tag_match, + "L2 requested Writeback for block not present in cache") +} diff --git a/uncore/src/main/scala/agents/Ecc.scala b/uncore/src/main/scala/agents/Ecc.scala new file mode 100644 index 00000000..6e5fdba6 --- /dev/null +++ b/uncore/src/main/scala/agents/Ecc.scala @@ -0,0 +1,146 @@ +// See LICENSE for license details. + +package uncore.agents + +import Chisel._ + +abstract class Decoding +{ + def uncorrected: UInt + def corrected: UInt + def correctable: Bool + def uncorrectable: Bool + def error = correctable || uncorrectable +} + +abstract class Code +{ + def width(w0: Int): Int + def encode(x: UInt): UInt + def decode(x: UInt): Decoding +} + +class IdentityCode extends Code +{ + def width(w0: Int) = w0 + def encode(x: UInt) = x + def decode(y: UInt) = new Decoding { + def uncorrected = y + def corrected = y + def correctable = Bool(false) + def uncorrectable = Bool(false) + } +} + +class ParityCode extends Code +{ + def width(w0: Int) = w0+1 + def encode(x: UInt) = Cat(x.xorR, x) + def decode(y: UInt) = new Decoding { + def uncorrected = y(y.getWidth-2,0) + def corrected = uncorrected + def correctable = Bool(false) + def uncorrectable = y.xorR + } +} + +class SECCode extends Code +{ + def width(k: Int) = { + val m = log2Floor(k) + 1 + k + m + (if((1 << m) < m+k+1) 1 else 0) + } + def encode(x: UInt) = { + val k = x.getWidth + require(k > 0) + val n = width(k) + + val y = for (i <- 1 to n) yield { + if (isPow2(i)) { + val r = for (j <- 1 to n; if j != i && (j & i) != 0) + yield x(mapping(j)) + r reduce (_^_) + } else + x(mapping(i)) + } + Vec(y).toBits + } + def decode(y: UInt) = new Decoding { + val n = y.getWidth + require(n > 0 && !isPow2(n)) + + val p2 = for (i <- 0 until log2Up(n)) yield 1 << i + val syndrome = p2 map { i => + val r = for (j <- 1 to n; if (j & i) != 0) + yield y(j-1) + r reduce (_^_) + } + val s = Vec(syndrome).toBits + + private def swizzle(z: UInt) = Vec((1 to n).filter(i => !isPow2(i)).map(i => z(i-1))).toBits + def uncorrected = swizzle(y) + def corrected = swizzle(((y.toUInt << 1) ^ UIntToOH(s)) >> 1) + def correctable = s.orR + def uncorrectable = Bool(false) + } + private def mapping(i: Int) = i-1-log2Up(i) +} + +class SECDEDCode extends Code +{ + private val sec = new SECCode + private val par = new ParityCode + + def width(k: Int) = sec.width(k)+1 + def encode(x: UInt) = par.encode(sec.encode(x)) + def decode(x: UInt) = new Decoding { + val secdec = sec.decode(x(x.getWidth-2,0)) + val pardec = par.decode(x) + + def uncorrected = secdec.uncorrected + def corrected = secdec.corrected + def correctable = pardec.uncorrectable + def uncorrectable = !pardec.uncorrectable && secdec.correctable + } +} + +object ErrGen +{ + // generate a 1-bit error with approximate probability 2^-f + def apply(width: Int, f: Int): UInt = { + require(width > 0 && f >= 0 && log2Up(width) + f <= 16) + UIntToOH(LFSR16()(log2Up(width)+f-1,0))(width-1,0) + } + def apply(x: UInt, f: Int): UInt = x ^ apply(x.getWidth, f) +} + +class SECDEDTest extends Module +{ + val code = new SECDEDCode + val k = 4 + val n = code.width(k) + + val io = new Bundle { + val original = Bits(OUTPUT, k) + val encoded = Bits(OUTPUT, n) + val injected = Bits(OUTPUT, n) + val uncorrected = Bits(OUTPUT, k) + val corrected = Bits(OUTPUT, k) + val correctable = Bool(OUTPUT) + val uncorrectable = Bool(OUTPUT) + } + + val c = Counter(Bool(true), 1 << k) + val numErrors = Counter(c._2, 3)._1 + val e = code.encode(c._1) + val i = e ^ Mux(numErrors < UInt(1), UInt(0), ErrGen(n, 1)) ^ Mux(numErrors < UInt(2), UInt(0), ErrGen(n, 1)) + val d = code.decode(i) + + io.original := c._1 + io.encoded := e + io.injected := i + io.uncorrected := d.uncorrected + io.corrected := d.corrected + io.correctable := d.correctable + io.uncorrectable := d.uncorrectable +} diff --git a/uncore/src/main/scala/agents/Mmio.scala b/uncore/src/main/scala/agents/Mmio.scala new file mode 100644 index 00000000..a3b2ab03 --- /dev/null +++ b/uncore/src/main/scala/agents/Mmio.scala @@ -0,0 +1,73 @@ +package uncore.agents + +import Chisel._ +import uncore.tilelink._ +import cde.Parameters + +class MMIOTileLinkManagerData(implicit p: Parameters) + extends TLBundle()(p) + with HasClientId + with HasClientTransactionId + +class MMIOTileLinkManager(implicit p: Parameters) + extends CoherenceAgentModule()(p) { + val io = new ManagerTLIO + + // MMIO requests should never need probe or release + io.inner.probe.valid := Bool(false) + io.inner.release.ready := Bool(false) + + val multibeat_fire = io.outer.acquire.fire() && io.oacq().hasMultibeatData() + val multibeat_start = multibeat_fire && io.oacq().addr_beat === UInt(0) + val multibeat_end = multibeat_fire && io.oacq().addr_beat === UInt(outerDataBeats - 1) + + // Acquire and Grant are basically passthru, + // except client_id and client_xact_id need to be converted. + // Associate the inner client_id and client_xact_id + // with the outer client_xact_id. + val xact_pending = Reg(init = UInt(0, maxManagerXacts)) + val xact_id_sel = PriorityEncoder(~xact_pending) + val xact_id_reg = RegEnable(xact_id_sel, multibeat_start) + val xact_multibeat = Reg(init = Bool(false)) + val outer_xact_id = Mux(xact_multibeat, xact_id_reg, xact_id_sel) + val xact_free = !xact_pending.andR + val xact_buffer = Reg(Vec(maxManagerXacts, new MMIOTileLinkManagerData)) + + io.inner.acquire.ready := io.outer.acquire.ready && xact_free + io.outer.acquire.valid := io.inner.acquire.valid && xact_free + io.outer.acquire.bits := io.inner.acquire.bits + io.outer.acquire.bits.client_xact_id := outer_xact_id + + def isLastBeat[T <: TileLinkChannel with HasTileLinkBeatId](in: T): Bool = + !in.hasMultibeatData() || in.addr_beat === UInt(outerDataBeats - 1) + + def addPendingBitOnAcq[T <: AcquireMetadata](in: DecoupledIO[T]): UInt = + Mux(in.fire() && isLastBeat(in.bits), UIntToOH(in.bits.client_xact_id), UInt(0)) + + def clearPendingBitOnGnt[T <: GrantMetadata](in: DecoupledIO[T]): UInt = + ~Mux(in.fire() && isLastBeat(in.bits) && !in.bits.requiresAck(), + UIntToOH(in.bits.manager_xact_id), UInt(0)) + + def clearPendingBitOnFin(in: DecoupledIO[Finish]): UInt = + ~Mux(in.fire(), UIntToOH(in.bits.manager_xact_id), UInt(0)) + + xact_pending := (xact_pending | addPendingBitOnAcq(io.outer.acquire)) & + clearPendingBitOnFin(io.inner.finish) & + clearPendingBitOnGnt(io.inner.grant) + + when (io.outer.acquire.fire() && isLastBeat(io.outer.acquire.bits)) { + xact_buffer(outer_xact_id) := io.iacq() + } + + when (multibeat_start) { xact_multibeat := Bool(true) } + when (multibeat_end) { xact_multibeat := Bool(false) } + + val gnt_xact = xact_buffer(io.ognt().client_xact_id) + io.outer.grant.ready := io.inner.grant.ready + io.inner.grant.valid := io.outer.grant.valid + io.inner.grant.bits := io.outer.grant.bits + io.inner.grant.bits.client_id := gnt_xact.client_id + io.inner.grant.bits.client_xact_id := gnt_xact.client_xact_id + io.inner.grant.bits.manager_xact_id := io.ognt().client_xact_id + io.inner.finish.ready := Bool(true) +} diff --git a/uncore/src/main/scala/agents/StatelessBridge.scala b/uncore/src/main/scala/agents/StatelessBridge.scala new file mode 100644 index 00000000..0ed818cf --- /dev/null +++ b/uncore/src/main/scala/agents/StatelessBridge.scala @@ -0,0 +1,69 @@ +// See LICENSE for license details. + +package uncore.agents + +import Chisel._ +import uncore.coherence._ +import uncore.tilelink._ +import uncore.constants._ +import uncore.devices._ +import cde.{Parameters, Field, Config} + +/** The ManagerToClientStateless Bridge does not maintain any state for the messages + * which pass through it. It simply passes the messages back and forth without any + * tracking or translation. + * + * This can reduce area and timing in very constrained situations: + * - The Manager and Client implement the same coherence protocol + * - There are no probe or finish messages. + * - The outer transaction ID is large enough to handle all possible inner + * transaction IDs, such that no remapping state must be maintained. + * + * This bridge DOES NOT keep the uncached channel coherent with the cached + * channel. Uncached requests to blocks cached by the L1 will not probe the L1. + * As a result, uncached reads to cached blocks will get stale data until + * the L1 performs a voluntary writeback, and uncached writes to cached blocks + * will get lost, as the voluntary writeback from the L1 will overwrite the + * changes. If your tile relies on probing the L1 data cache in order to + * share data between the instruction cache and data cache (e.g. you are using + * a non-blocking L1 D$) or if the tile has uncached channels capable of + * writes (e.g. Hwacha and other RoCC accelerators), DO NOT USE THIS BRIDGE. + */ + +class ManagerToClientStatelessBridge(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) { + val icid = io.inner.tlClientIdBits + val ixid = io.inner.tlClientXactIdBits + val oxid = io.outer.tlClientXactIdBits + + val innerCoh = io.inner.tlCoh.getClass + val outerCoh = io.outer.tlCoh.getClass + + // Stateless Bridge is only usable in certain constrained situations. + // Sanity check its usage here. + + require(io.inner.tlNCachingClients <= 1) + require(icid + ixid <= oxid) + require(innerCoh eq outerCoh, + s"Coherence policies do not match: inner is ${innerCoh.getSimpleName}, outer is ${outerCoh.getSimpleName}") + + io.outer.acquire.valid := io.inner.acquire.valid + io.inner.acquire.ready := io.outer.acquire.ready + io.outer.acquire.bits := io.inner.acquire.bits + io.outer.acquire.bits.client_xact_id := Cat(io.inner.acquire.bits.client_id, io.inner.acquire.bits.client_xact_id) + + io.outer.release.valid := io.inner.release.valid + io.inner.release.ready := io.outer.release.ready + io.outer.release.bits := io.inner.release.bits + io.outer.release.bits.client_xact_id := Cat(io.inner.release.bits.client_id, io.inner.release.bits.client_xact_id) + + io.inner.grant.valid := io.outer.grant.valid + io.outer.grant.ready := io.inner.grant.ready + io.inner.grant.bits := io.outer.grant.bits + io.inner.grant.bits.client_xact_id := io.outer.grant.bits.client_xact_id(ixid-1, 0) + io.inner.grant.bits.client_id := io.outer.grant.bits.client_xact_id(icid+ixid-1, ixid) + + io.inner.probe.valid := Bool(false) + io.inner.finish.ready := Bool(true) + + disconnectOuterProbeAndFinish() +} diff --git a/uncore/src/main/scala/agents/StoreDataQueue.scala b/uncore/src/main/scala/agents/StoreDataQueue.scala new file mode 100644 index 00000000..e2079772 --- /dev/null +++ b/uncore/src/main/scala/agents/StoreDataQueue.scala @@ -0,0 +1,119 @@ +// See LICENSE for license details. + +package uncore.agents +import Chisel._ +import uncore.tilelink._ +import cde.{Parameters, Field} + +case object L2StoreDataQueueDepth extends Field[Int] + +trait HasStoreDataQueueParameters extends HasCoherenceAgentParameters { + val sdqDepth = p(L2StoreDataQueueDepth)*innerDataBeats + val dqIdxBits = math.max(log2Up(nReleaseTransactors) + 1, log2Up(sdqDepth)) + val nDataQueueLocations = 3 //Stores, VoluntaryWBs, Releases +} + +class DataQueueLocation(implicit p: Parameters) extends CoherenceAgentBundle()(p) + with HasStoreDataQueueParameters { + val idx = UInt(width = dqIdxBits) + val loc = UInt(width = log2Ceil(nDataQueueLocations)) +} + +object DataQueueLocation { + def apply(idx: UInt, loc: UInt)(implicit p: Parameters) = { + val d = Wire(new DataQueueLocation) + d.idx := idx + d.loc := loc + d + } +} + +trait HasStoreDataQueue extends HasStoreDataQueueParameters { + val io: HierarchicalTLIO + val trackerIOsList: Seq[HierarchicalXactTrackerIO] + + val internalDataBits = new DataQueueLocation().getWidth + val inStoreQueue :: inVolWBQueue :: inClientReleaseQueue :: Nil = Enum(UInt(), nDataQueueLocations) + + val usingStoreDataQueue = p.alterPartial({ + case TLKey(`innerTLId`) => innerTLParams.copy(overrideDataBitsPerBeat = Some(internalDataBits)) + case TLKey(`outerTLId`) => outerTLParams.copy(overrideDataBitsPerBeat = Some(internalDataBits)) + }) + + // Queue to store impending Put data + lazy val sdq = Reg(Vec(sdqDepth, io.iacq().data)) + lazy val sdq_val = Reg(init=Bits(0, sdqDepth)) + lazy val sdq_alloc_id = PriorityEncoder(~sdq_val) + lazy val sdq_rdy = !sdq_val.andR + lazy val sdq_enq = trackerIOsList.map( t => + (t.alloc.iacq.should || t.alloc.iacq.matches) && + t.inner.acquire.fire() && + t.iacq().hasData() + ).reduce(_||_) + + lazy val sdqLoc = List.fill(nTransactors) { + DataQueueLocation(sdq_alloc_id, inStoreQueue).toBits + } + + /* + doInputRoutingWithAllocation( + in = io.inner.acquire, + outs = trackerList.map(_.io.inner.acquire), + allocs = trackerList.map(_.io.alloc._iacq), + dataOverride = Some(sdqLoc), + allocOverride = Some(sdq_rdy && !irel_vs_iacq_conflict)) + */ + + // Queue to store impending Voluntary Release data + lazy val voluntary = io.irel().isVoluntary() + lazy val vwbdq_enq = io.inner.release.fire() && voluntary && io.irel().hasData() + lazy val (rel_data_cnt, rel_data_done) = Counter(vwbdq_enq, innerDataBeats) //TODO Zero width + lazy val vwbdq = Reg(Vec(innerDataBeats, io.irel().data)) //TODO Assumes nReleaseTransactors == 1 + + + lazy val vwbqLoc = (0 until nTransactors).map(i => + (DataQueueLocation(rel_data_cnt, + (if(i < nReleaseTransactors) inVolWBQueue + else inClientReleaseQueue)).toBits)) + /* + doInputRoutingWithAllocation( + io.inner.release, + trackerList.map(_.io.inner.release), + trackerList.map(_.io.matches.irel), + trackerList.map(_.io.alloc.irel), + Some(vwbqLoc)) + */ + + val outer_arb: ClientTileLinkIOArbiter + lazy val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.data) + /* + val outer_arb = Module(new ClientTileLinkIOArbiter(trackerList.size) + (usingStoreDataQueue.alterPartial({ case TLId => p(OuterTLId) }))) + outer_arb.io.in <> trackerList + */ + // Get the pending data out of the store data queue + lazy val is_in_sdq = outer_data_ptr.loc === inStoreQueue + lazy val free_sdq = io.outer.acquire.fire() && + io.outer.acquire.bits.hasData() && + outer_data_ptr.loc === inStoreQueue + /* + io.outer <> outer_arb.io.out + io.outer.acquire.bits.data := MuxLookup(outer_data_ptr.loc, io.irel().data, Array( + inStoreQueue -> sdq(outer_data_ptr.idx), + inVolWBQueue -> vwbdq(outer_data_ptr.idx))) + */ + + // Enqueue SDQ data + def sdqEnqueue() { + when (sdq_enq) { sdq(sdq_alloc_id) := io.iacq().data } + when(vwbdq_enq) { vwbdq(rel_data_cnt) := io.irel().data } + } + + // Update SDQ valid bits + def sdqUpdate() { + when (io.outer.acquire.valid || sdq_enq) { + sdq_val := sdq_val & ~(UIntToOH(outer_data_ptr.idx) & Fill(sdqDepth, free_sdq)) | + PriorityEncoderOH(~sdq_val(sdqDepth-1,0)) & Fill(sdqDepth, sdq_enq) + } + } +} diff --git a/uncore/src/main/scala/agents/Trackers.scala b/uncore/src/main/scala/agents/Trackers.scala new file mode 100644 index 00000000..80d7a409 --- /dev/null +++ b/uncore/src/main/scala/agents/Trackers.scala @@ -0,0 +1,651 @@ +// See LICENSE for license details. + +package uncore.agents + +import Chisel._ +import uncore.coherence._ +import uncore.tilelink._ +import uncore.util._ +import uncore.Util._ +import junctions._ +import cde.{Field, Parameters} +import scala.math.max + +case object EnableL2Logging extends Field[Boolean] + +class TrackerAllocation extends Bundle { + val matches = Bool(OUTPUT) + val can = Bool(OUTPUT) + val should = Bool(INPUT) +} + +class TrackerAllocationIO(implicit val p: Parameters) + extends ParameterizedBundle()(p) + with HasCacheBlockAddress { + val iacq = new TrackerAllocation + val irel = new TrackerAllocation + val oprb = new TrackerAllocation + val idle = Bool(OUTPUT) + override val addr_block = UInt(OUTPUT, tlBlockAddrBits) +} + +trait HasTrackerAllocationIO extends Bundle { + implicit val p: Parameters + val alloc = new TrackerAllocationIO +} + +class ManagerXactTrackerIO(implicit p: Parameters) extends ManagerTLIO()(p) + with HasTrackerAllocationIO + +class HierarchicalXactTrackerIO(implicit p: Parameters) extends HierarchicalTLIO()(p) + with HasTrackerAllocationIO + +abstract class XactTracker(implicit p: Parameters) extends CoherenceAgentModule()(p) + with HasXactTrackerStates + with HasPendingBitHelpers { + override val s_idle :: s_meta_read :: s_meta_resp :: s_wb_req :: s_wb_resp :: s_inner_probe :: s_outer_acquire :: s_busy :: s_meta_write :: Nil = Enum(UInt(), 9) + val state = Reg(init=s_idle) + + def quiesce(next: UInt = s_idle)(restore: => Unit) { + all_pending_done := !scoreboard.foldLeft(Bool(false))(_||_) + when(state === s_busy && all_pending_done) { + state := next + restore + } + } + + def pinAllReadyValidLow[T <: Data](b: Bundle) { + b.elements.foreach { + _._2 match { + case d: DecoupledIO[_] => + if(d.ready.dir == OUTPUT) d.ready := Bool(false) + else if(d.valid.dir == OUTPUT) d.valid := Bool(false) + case v: ValidIO[_] => if(v.valid.dir == OUTPUT) v.valid := Bool(false) + case b: Bundle => pinAllReadyValidLow(b) + case _ => + } + } + } +} + +trait HasXactTrackerStates { + def state: UInt + def s_idle: UInt = UInt(0) + def s_meta_read: UInt + def s_meta_resp: UInt + def s_wb_req: UInt + def s_wb_resp: UInt + def s_inner_probe: UInt + def s_outer_acquire: UInt + def s_busy: UInt + def s_meta_write: UInt +} + +trait HasPendingBitHelpers extends HasDataBeatCounters { + val scoreboard = scala.collection.mutable.ListBuffer.empty[Bool] + val all_pending_done = Wire(Bool()) + + def addPendingBitWhenBeat[T <: HasBeat](inc: Bool, in: T): UInt = + Fill(in.tlDataBeats, inc) & UIntToOH(in.addr_beat) + + def dropPendingBitWhenBeat[T <: HasBeat](dec: Bool, in: T): UInt = + ~Fill(in.tlDataBeats, dec) | ~UIntToOH(in.addr_beat) + + def addPendingBitWhenId[T <: HasClientId](inc: Bool, in: T): UInt = + Fill(in.tlNCachingClients, inc) & UIntToOH(in.client_id) + + def dropPendingBitWhenId[T <: HasClientId](dec: Bool, in: T): UInt = + ~Fill(in.tlNCachingClients, dec) | ~UIntToOH(in.client_id) + + def addPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T], inc: Bool = Bool(true)): UInt = + addPendingBitWhenBeat(in.fire() && in.bits.hasData() && inc, in.bits) + + def addPendingBitWhenBeatHasDataAndAllocs( + in: DecoupledIO[AcquireFromSrc], + alloc_override: Bool = Bool(false)): UInt = + addPendingBitWhenBeatHasData(in, in.bits.allocate() || alloc_override) + + def addPendingBitWhenBeatNeedsRead(in: DecoupledIO[AcquireFromSrc], inc: Bool = Bool(true)): UInt = { + val a = in.bits + val needs_read = (a.isGet() || a.isAtomic() || a.hasPartialWritemask()) || inc + addPendingBitWhenBeat(in.fire() && needs_read, a) + } + + def addPendingBitWhenBeatHasPartialWritemask(in: DecoupledIO[AcquireFromSrc]): UInt = + addPendingBitWhenBeat(in.fire() && in.bits.hasPartialWritemask(), in.bits) + + def addPendingBitsFromAcquire(a: SecondaryMissInfo): UInt = + Mux(a.hasMultibeatData(), Fill(a.tlDataBeats, UInt(1, 1)), UIntToOH(a.addr_beat)) + + def dropPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T]): UInt = + dropPendingBitWhenBeat(in.fire() && in.bits.hasData(), in.bits) + + def dropPendingBitAtDest[T <: HasId](in: DecoupledIO[T]): UInt = + dropPendingBitWhenId(in.fire(), in.bits) + + def dropPendingBitAtDestWhenVoluntary[T <: HasId with MightBeVoluntary](in: DecoupledIO[T]): UInt = + dropPendingBitWhenId(in.fire() && in.bits.isVoluntary(), in.bits) + + def addPendingBitAtSrc[T <: HasId](in: DecoupledIO[T]): UInt = + addPendingBitWhenId(in.fire(), in.bits) + + def addPendingBitAtSrcWhenVoluntary[T <: HasId with MightBeVoluntary](in: DecoupledIO[T]): UInt = + addPendingBitWhenId(in.fire() && in.bits.isVoluntary(), in.bits) + + def addOtherBits(en: Bool, nBits: Int): UInt = + Mux(en, Cat(Fill(nBits - 1, UInt(1, 1)), UInt(0, 1)), UInt(0, nBits)) + + def addPendingBitsOnFirstBeat(in: DecoupledIO[Acquire]): UInt = + addOtherBits(in.fire() && + in.bits.hasMultibeatData() && + in.bits.addr_beat === UInt(0), + in.bits.tlDataBeats) + + def dropPendingBitsOnFirstBeat(in: DecoupledIO[Acquire]): UInt = + ~addPendingBitsOnFirstBeat(in) +} + +trait HasDataBuffer extends HasCoherenceAgentParameters { + val data_buffer = Reg(init=Vec.fill(innerDataBeats)(UInt(0, width = innerDataBits))) + + type TLDataBundle = TLBundle with HasTileLinkData with HasTileLinkBeatId + + def initDataInner[T <: Acquire](in: DecoupledIO[T], alloc: Bool) { + when(in.fire() && in.bits.hasData() && alloc) { + data_buffer(in.bits.addr_beat) := in.bits.data + } + } + + // TODO: provide func for accessing when innerDataBeats =/= outerDataBeats or internalDataBeats + def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) { + data_buffer(beat) := incoming + } + + def mergeDataInner[T <: TLDataBundle](in: DecoupledIO[T]) { + when(in.fire() && in.bits.hasData()) { + mergeData(innerDataBits)(in.bits.addr_beat, in.bits.data) + } + } + + def mergeDataOuter[T <: TLDataBundle](in: DecoupledIO[T]) { + when(in.fire() && in.bits.hasData()) { + mergeData(outerDataBits)(in.bits.addr_beat, in.bits.data) + } + } +} + +trait HasByteWriteMaskBuffer extends HasDataBuffer { + val wmask_buffer = Reg(init=Vec.fill(innerDataBeats)(UInt(0, width = innerWriteMaskBits))) + + override def initDataInner[T <: Acquire](in: DecoupledIO[T], alloc: Bool) { + when(in.fire() && in.bits.hasData() && alloc) { + val beat = in.bits.addr_beat + val full = FillInterleaved(8, in.bits.wmask()) + data_buffer(beat) := (~full & data_buffer(beat)) | (full & in.bits.data) + wmask_buffer(beat) := in.bits.wmask() | wmask_buffer(beat) // assumes wmask_buffer is zeroed + } + } + + override def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) { + val old_data = incoming // Refilled, written back, or de-cached data + val new_data = data_buffer(beat) // Newly Put data is already in the buffer + val wmask = FillInterleaved(8, wmask_buffer(beat)) + data_buffer(beat) := ~wmask & old_data | wmask & new_data + } + + def clearWmaskBuffer() { + wmask_buffer.foreach { w => w := UInt(0) } + } +} + +trait HasBlockAddressBuffer extends HasCoherenceAgentParameters { + val xact_addr_block = Reg(init = UInt(0, width = blockAddrBits)) +} + + +trait HasAcquireMetadataBuffer extends HasBlockAddressBuffer { + val xact_allocate = Reg{ Bool() } + val xact_amo_shift_bytes = Reg{ UInt() } + val xact_op_code = Reg{ UInt() } + val xact_addr_byte = Reg{ UInt() } + val xact_op_size = Reg{ UInt() } + val xact_addr_beat = Wire(UInt()) + val xact_iacq = Wire(new SecondaryMissInfo) +} + +trait HasVoluntaryReleaseMetadataBuffer extends HasBlockAddressBuffer + with HasPendingBitHelpers + with HasXactTrackerStates { + def io: HierarchicalXactTrackerIO + + val xact_vol_ir_r_type = Reg{ UInt() } + val xact_vol_ir_src = Reg{ UInt() } + val xact_vol_ir_client_xact_id = Reg{ UInt() } + + def xact_vol_irel = Release( + src = xact_vol_ir_src, + voluntary = Bool(true), + r_type = xact_vol_ir_r_type, + client_xact_id = xact_vol_ir_client_xact_id, + addr_block = xact_addr_block) + (p.alterPartial({ case TLId => p(InnerTLId) })) +} + +trait AcceptsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer { + def inner_coh: ManagerMetadata + + val pending_irel_data = Reg(init=Bits(0, width = innerDataBeats)) + val vol_ignt_counter = Wire(new TwoWayBeatCounterStatus) + + def irel_can_merge: Bool + def irel_same_xact: Bool + def irel_is_allocating: Bool = state === s_idle && io.alloc.irel.should && io.inner.release.valid + def irel_is_merging: Bool = (irel_can_merge || irel_same_xact) && io.inner.release.valid + + def innerRelease(block_vol_ignt: Bool = Bool(false), next: UInt = s_busy) { + connectTwoWayBeatCounters( + status = vol_ignt_counter, + up = io.inner.release, + down = io.inner.grant, + trackUp = (r: Release) => { + Mux(state === s_idle, io.alloc.irel.should, io.alloc.irel.matches) && r.isVoluntary() && r.requiresAck() + }, + trackDown = (g: Grant) => (state =/= s_idle) && g.isVoluntary()) + + + when(irel_is_allocating) { + xact_addr_block := io.irel().addr_block + // Set all of them to pending in the beginning as a precaution + // If it turns out we don't need some or all of the beats, they will + // be overridden below + pending_irel_data := ~UInt(0, innerDataBeats) + state := next + } + + val irel_fire = (irel_is_allocating || irel_is_merging) && io.inner.release.ready + when (irel_fire) { + when (io.irel().first()) { + when (io.irel().isVoluntary()) { + xact_vol_ir_r_type := io.irel().r_type + xact_vol_ir_src := io.irel().client_id + xact_vol_ir_client_xact_id := io.irel().client_xact_id + } + // If this release has data, set all the pending bits except the first. + // Otherwise, clear all the pending bits + pending_irel_data := Mux(io.irel().hasMultibeatData(), + dropPendingBitWhenBeatHasData(io.inner.release), + UInt(0)) + } .otherwise { + pending_irel_data := (pending_irel_data & dropPendingBitWhenBeatHasData(io.inner.release)) + } + if (p(EnableL2Logging)) { + when (io.irel().hasData()) { + printf("[release] addr_block=%x addr_beat=%d data=%x\n", + io.irel().addr_block, io.irel().addr_beat, io.irel().data) + } + } + } + + io.inner.grant.valid := state.isOneOf(s_wb_req, s_wb_resp, s_inner_probe, s_busy) && + vol_ignt_counter.pending && + !(pending_irel_data.orR || block_vol_ignt) + + io.inner.grant.bits := inner_coh.makeGrant(xact_vol_irel) + + scoreboard += (pending_irel_data.orR, vol_ignt_counter.pending) + } + +} + +trait EmitsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer { + val pending_orel_send = Reg(init=Bool(false)) + val pending_orel_data = Reg(init=Bits(0, width = innerDataBeats)) + val vol_ognt_counter = Wire(new TwoWayBeatCounterStatus) + val pending_orel = pending_orel_send || pending_orel_data.orR || vol_ognt_counter.pending + val sending_orel = Reg(init = Bool(false)) + + // Block acceptance of inner releases if we have already started sending + // outer releases, but have not yet sent out the beat corresponding to the + // inner release. This function must be included in io.inner.release.ready + // if it is possible to start accepting a new inner release as the previous + // outer release is still being sent. DO NOT include this in the + // io.inner.release.ready if the releases are not buffered + // (i.e. io.inner.release and io.outer.release combinationally linked). + def blockInnerRelease(rel: ReleaseMetadata = io.irel()): Bool = { + val waiting_to_send = sending_orel && pending_orel_data(rel.addr_beat) + val sending_now = io.outer.release.fire() && rel.addr_beat === io.orel().addr_beat + rel.hasData() && (waiting_to_send || sending_now) + } + + def outerRelease( + coh: ClientMetadata, + buffering: Bool = Bool(true), + data: UInt = io.irel().data, + add_pending_data_bits: UInt = UInt(0), + add_pending_send_bit: Bool = Bool(false), + block_orel: Bool = Bool(false)) { + + when (state =/= s_idle || io.alloc.irel.should) { + pending_orel_data := (pending_orel_data | + addPendingBitWhenBeatHasData(io.inner.release) | + add_pending_data_bits) & + dropPendingBitWhenBeatHasData(io.outer.release) + } + when (add_pending_send_bit) { pending_orel_send := Bool(true) } + when (io.outer.release.fire()) { + when (io.outer.release.bits.first()) { sending_orel := Bool(true) } + when (io.outer.release.bits.last()) { sending_orel := Bool(false) } + pending_orel_send := Bool(false) + } + + connectTwoWayBeatCounters( + status = vol_ognt_counter, + up = io.outer.release, + down = io.outer.grant, + trackUp = (r: Release) => r.isVoluntary() && r.requiresAck(), + trackDown = (g: Grant) => g.isVoluntary()) + + io.outer.release.valid := !block_orel && Mux(buffering, + (state === s_busy) && Mux(io.orel().hasData(), + pending_orel_data(vol_ognt_counter.up.idx), + pending_orel_send), + // only writebacks need to be forwarded to the outer interface + state =/= s_idle && io.alloc.irel.matches && + io.irel().hasData() && io.inner.release.valid) + + io.outer.release.bits := coh.makeVoluntaryWriteback( + client_xact_id = UInt(0), // TODO was tracker id, but not needed? + addr_block = xact_addr_block, + addr_beat = vol_ognt_counter.up.idx, + data = data) + + when (vol_ognt_counter.pending) { io.outer.grant.ready := Bool(true) } + + scoreboard += (pending_orel, vol_ognt_counter.pending) + } +} + +trait EmitsInnerProbes extends HasBlockAddressBuffer + with HasXactTrackerStates + with HasPendingBitHelpers { + def io: HierarchicalXactTrackerIO + + val needs_probes = (innerNCachingClients > 0) + val pending_iprbs = Reg(UInt(width = max(innerNCachingClients, 1))) + val curr_probe_dst = PriorityEncoder(pending_iprbs) + + def full_representation: UInt + def initializeProbes() { + if (needs_probes) + pending_iprbs := full_representation & ~io.incoherent.toBits + else + pending_iprbs := UInt(0) + } + def irel_same_xact = io.irel().conflicts(xact_addr_block) && + !io.irel().isVoluntary() && + state === s_inner_probe + + def innerProbe(prb: Probe, next: UInt) { + if (needs_probes) { + val irel_counter = Wire(new TwoWayBeatCounterStatus) + + pending_iprbs := pending_iprbs & dropPendingBitAtDest(io.inner.probe) + io.inner.probe.valid := state === s_inner_probe && pending_iprbs.orR + io.inner.probe.bits := prb + + connectTwoWayBeatCounters( + status = irel_counter, + up = io.inner.probe, + down = io.inner.release, + max = innerNCachingClients, + trackDown = (r: Release) => (state =/= s_idle) && !r.isVoluntary()) + + when(state === s_inner_probe && !(pending_iprbs.orR || irel_counter.pending)) { + state := next + } + } else { + when (state === s_inner_probe) { state := next } + } + + //N.B. no pending bits added to scoreboard because all handled in s_inner_probe + } +} + +trait RoutesInParent extends HasBlockAddressBuffer + with HasXactTrackerStates { + def io: HierarchicalXactTrackerIO + type AddrComparison = HasCacheBlockAddress => Bool + def exactAddrMatch(a: HasCacheBlockAddress): Bool = a.conflicts(xact_addr_block) + def routeInParent(iacqMatches: AddrComparison = exactAddrMatch, + irelMatches: AddrComparison = exactAddrMatch, + oprbMatches: AddrComparison = exactAddrMatch, + iacqCanAlloc: Bool = Bool(false), + irelCanAlloc: Bool = Bool(false), + oprbCanAlloc: Bool = Bool(false)) { + io.alloc.iacq.matches := (state =/= s_idle) && iacqMatches(io.iacq()) + io.alloc.irel.matches := (state =/= s_idle) && irelMatches(io.irel()) + io.alloc.oprb.matches := (state =/= s_idle) && oprbMatches(io.oprb()) + io.alloc.iacq.can := state === s_idle && iacqCanAlloc + io.alloc.irel.can := state === s_idle && irelCanAlloc + io.alloc.oprb.can := state === s_idle && oprbCanAlloc + io.alloc.addr_block := xact_addr_block + io.alloc.idle := state === s_idle + } +} + +trait AcceptsInnerAcquires extends HasAcquireMetadataBuffer + with AcceptsVoluntaryReleases + with HasXactTrackerStates + with HasPendingBitHelpers { + def io: HierarchicalXactTrackerIO + def nSecondaryMisses: Int + def alwaysWriteFullBeat: Boolean + def inner_coh: ManagerMetadata + def trackerId: Int + + // Secondary miss queue holds transaction metadata used to make grants + lazy val ignt_q = Module(new Queue( + new SecondaryMissInfo()(p.alterPartial({ case TLId => p(InnerTLId) })), + 1 + nSecondaryMisses)) + + val pending_ignt = Wire(Bool()) + val ignt_data_idx = Wire(UInt()) + val ignt_data_done = Wire(Bool()) + val ifin_counter = Wire(new TwoWayBeatCounterStatus) + val pending_put_data = Reg(init=Bits(0, width = innerDataBeats)) + val pending_ignt_data = Reg(init=Bits(0, width = innerDataBeats)) + + def iacq_same_xact: Bool = + (xact_iacq.client_xact_id === io.iacq().client_xact_id) && + (xact_iacq.client_id === io.iacq().client_id) && + pending_ignt + def iacq_same_xact_multibeat = iacq_same_xact && io.iacq().hasMultibeatData() + def iacq_can_merge: Bool + def iacq_is_allocating: Bool = state === s_idle && io.alloc.iacq.should && io.inner.acquire.valid + def iacq_is_merging: Bool = (iacq_can_merge || iacq_same_xact) && io.inner.acquire.valid + + def innerAcquire(can_alloc: Bool, next: UInt) { + val iacq_matches_head = iacq_same_xact && xact_iacq.addr_beat === io.iacq().addr_beat + + // Enqueue some metadata information that we'll use to make coherence updates with later + ignt_q.io.enq.valid := iacq_is_allocating || + (!iacq_matches_head && pending_ignt && + io.inner.acquire.fire() && io.iacq().first()) + ignt_q.io.enq.bits := io.iacq() + + // Use the outputs of the queue to make further messages + xact_iacq := Mux(ignt_q.io.deq.valid, ignt_q.io.deq.bits, ignt_q.io.enq.bits) + xact_addr_beat := xact_iacq.addr_beat + pending_ignt := ignt_q.io.count > UInt(0) + + // Track whether any beats are missing from a PutBlock + when (state =/= s_idle || io.alloc.iacq.should) { + pending_put_data := (pending_put_data & + dropPendingBitWhenBeatHasData(io.inner.acquire)) | + addPendingBitsOnFirstBeat(io.inner.acquire) + } + + // Intialize transaction metadata for accepted Acquire + when(iacq_is_allocating) { + xact_addr_block := io.iacq().addr_block + xact_allocate := io.iacq().allocate() && can_alloc + xact_amo_shift_bytes := io.iacq().amo_shift_bytes() + xact_op_code := io.iacq().op_code() + xact_addr_byte := io.iacq().addr_byte() + xact_op_size := io.iacq().op_size() + // Make sure to collect all data from a PutBlock + pending_put_data := Mux( + io.iacq().isBuiltInType(Acquire.putBlockType), + dropPendingBitWhenBeatHasData(io.inner.acquire), + UInt(0)) + pending_ignt_data := UInt(0) + state := next + } + + scoreboard += (pending_put_data.orR) + } + + def innerGrant( + data: UInt = io.ognt().data, + external_pending: Bool = Bool(false), + buffering: Bool = Bool(true), + add_pending_bits: UInt = UInt(0)) { + // Track the number of outstanding inner.finishes + connectTwoWayBeatCounters( + status = ifin_counter, + up = io.inner.grant, + down = io.inner.finish, + max = nSecondaryMisses, + trackUp = (g: Grant) => g.requiresAck()) + + // Track which beats are ready for response + when(!iacq_is_allocating) { + pending_ignt_data := (pending_ignt_data & dropPendingBitWhenBeatHasData(io.inner.grant)) | + addPendingBitWhenBeatHasData(io.inner.release) | + addPendingBitWhenBeatHasData(io.outer.grant) | + add_pending_bits + } + + if (p(EnableL2Logging)) { + when (io.inner.grant.fire() && io.ignt().hasData()) { + printf("[get] addr_block=%x addr_beat=%d data=%x\n", + xact_addr_block, io.ignt().addr_beat, io.ignt().data) + } + } + + // Have we finished receiving the complete inner acquire transaction? + val iacq_finished = !(state === s_idle || + state === s_meta_read || + pending_put_data.orR) + + val ignt_from_iacq = inner_coh.makeGrant( + sec = ignt_q.io.deq.bits, + manager_xact_id = UInt(trackerId), + data = data) + + // Make the Grant message using the data stored in the secondary miss queue + val (cnt, done) = connectOutgoingDataBeatCounter(io.inner.grant, ignt_q.io.deq.bits.addr_beat) + ignt_data_idx := cnt + ignt_data_done := done + ignt_q.io.deq.ready := Bool(false) + when(!vol_ignt_counter.pending) { + ignt_q.io.deq.ready := ignt_data_done + io.inner.grant.bits := ignt_from_iacq + io.inner.grant.bits.addr_beat := ignt_data_idx // override based on outgoing counter + when (state === s_busy && pending_ignt) { + io.inner.grant.valid := !external_pending && + Mux(io.ignt().hasData(), + Mux(buffering, + pending_ignt_data(ignt_data_idx), + io.outer.grant.valid), + iacq_finished) + } + } + + // We must wait for as many Finishes as we sent Grants + io.inner.finish.ready := state === s_busy + + scoreboard += (pending_ignt, ifin_counter.pending) + } + +} + +trait EmitsOuterAcquires extends AcceptsInnerAcquires { + val ognt_counter = Wire(new TwoWayBeatCounterStatus) + + // Handle misses or coherence permission upgrades by initiating a new transaction in the outer memory: + // + // If we're allocating in this cache, we can use the current metadata + // to make an appropriate custom Acquire, otherwise we copy over the + // built-in Acquire from the inner TL to the outer TL + def outerAcquire( + caching: Bool, + coh: ClientMetadata, + block_outer_acquire: Bool = Bool(false), + buffering: Bool = Bool(true), + data: UInt = io.iacq().data, + wmask: UInt = io.iacq().wmask(), + next: UInt = s_busy) { + + // Tracks outstanding Acquires, waiting for their matching Grant. + connectTwoWayBeatCounters( + status = ognt_counter, + up = io.outer.acquire, + down = io.outer.grant, + beat = xact_addr_beat, + trackDown = (g: Grant) => !g.isVoluntary()) + + io.outer.acquire.valid := + state === s_outer_acquire && !block_outer_acquire && + (xact_allocate || + Mux(buffering, + !pending_put_data(ognt_counter.up.idx), + // If not buffering, we should only send an outer acquire if + // the ignt_q is not empty (pending_ignt) and the enqueued + // transaction does not have data or we are receiving the + // inner acquire and it is the same transaction as the one enqueued. + pending_ignt && (!xact_iacq.hasData() || + (io.inner.acquire.valid && iacq_same_xact)))) + + io.outer.acquire.bits := + Mux(caching, + coh.makeAcquire( + op_code = xact_op_code, + client_xact_id = UInt(0), + addr_block = xact_addr_block), + BuiltInAcquireBuilder( + a_type = xact_iacq.a_type, + client_xact_id = UInt(0), + addr_block = xact_addr_block, + addr_beat = ognt_counter.up.idx, + data = data, + addr_byte = xact_addr_byte, + operand_size = xact_op_size, + opcode = xact_op_code, + wmask = wmask, + alloc = Bool(false)) + (p.alterPartial({ case TLId => p(OuterTLId)}))) + + when(state === s_outer_acquire && ognt_counter.up.done) { state := next } + + when (ognt_counter.pending) { io.outer.grant.ready := Bool(true) } + + scoreboard += ognt_counter.pending + } +} + +abstract class VoluntaryReleaseTracker(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p) + with AcceptsVoluntaryReleases + with RoutesInParent { + def irel_can_merge = Bool(false) + def irel_same_xact = io.irel().conflicts(xact_addr_block) && + io.irel().isVoluntary() && + pending_irel_data.orR +} + +abstract class AcquireTracker(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p) + with AcceptsInnerAcquires + with EmitsOuterAcquires + with EmitsInnerProbes + with RoutesInParent { +} diff --git a/uncore/src/main/scala/coherence/Directory.scala b/uncore/src/main/scala/coherence/Directory.scala new file mode 100644 index 00000000..86e4fde5 --- /dev/null +++ b/uncore/src/main/scala/coherence/Directory.scala @@ -0,0 +1,43 @@ +// See LICENSE for license details. + +package uncore.coherence +import Chisel._ + +// This class encapsulates transformations on different directory information +// storage formats +abstract class DirectoryRepresentation(val width: Int) { + def pop(prev: UInt, id: UInt): UInt + def push(prev: UInt, id: UInt): UInt + def flush: UInt + def none(s: UInt): Bool + def one(s: UInt): Bool + def count(s: UInt): UInt + def next(s: UInt): UInt + def full(s: UInt): UInt +} + +abstract trait HasDirectoryRepresentation { + val dir: DirectoryRepresentation +} + +class NullRepresentation(nClients: Int) extends DirectoryRepresentation(1) { + def pop(prev: UInt, id: UInt) = UInt(0) + def push(prev: UInt, id: UInt) = UInt(0) + def flush = UInt(0) + def none(s: UInt) = Bool(false) + def one(s: UInt) = Bool(false) + def count(s: UInt) = UInt(nClients) + def next(s: UInt) = UInt(0) + def full(s: UInt) = SInt(-1, width = nClients).toUInt +} + +class FullRepresentation(nClients: Int) extends DirectoryRepresentation(nClients) { + def pop(prev: UInt, id: UInt) = prev & ~UIntToOH(id) + def push(prev: UInt, id: UInt) = prev | UIntToOH(id) + def flush = UInt(0, width = width) + def none(s: UInt) = s === UInt(0) + def one(s: UInt) = PopCount(s) === UInt(1) + def count(s: UInt) = PopCount(s) + def next(s: UInt) = PriorityEncoder(s) + def full(s: UInt) = s +} diff --git a/uncore/src/main/scala/coherence/Metadata.scala b/uncore/src/main/scala/coherence/Metadata.scala new file mode 100644 index 00000000..c0d7a6bf --- /dev/null +++ b/uncore/src/main/scala/coherence/Metadata.scala @@ -0,0 +1,344 @@ +// See LICENSE for license details. + +package uncore.coherence + +import Chisel._ +import uncore.tilelink._ +import uncore.constants._ +import cde.{Parameters, Field} + +/** Identifies the TLId of the inner network in a hierarchical cache controller */ +case object InnerTLId extends Field[String] +/** Identifies the TLId of the outer network in a hierarchical cache controller */ +case object OuterTLId extends Field[String] + +/** Base class to represent coherence information in clients and managers */ +abstract class CoherenceMetadata(implicit p: Parameters) extends TLBundle()(p) { + val co = tlCoh +} + +/** Stores the client-side coherence information, + * such as permissions on the data and whether the data is dirty. + * Its API can be used to make TileLink messages in response to + * memory operations or [[uncore.Probe]] messages. + */ +class ClientMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) { + /** Actual state information stored in this bundle */ + val state = UInt(width = co.clientStateWidth) + + /** Metadata equality */ + def ===(rhs: ClientMetadata): Bool = this.state === rhs.state + def =/=(rhs: ClientMetadata): Bool = !this.===(rhs) + + /** Is the block's data present in this cache */ + def isValid(dummy: Int = 0): Bool = co.isValid(this) + /** Does this cache have permissions on this block sufficient to perform op */ + def isHit(op_code: UInt): Bool = co.isHit(op_code, this) + /** Does this cache lack permissions on this block sufficient to perform op */ + def isMiss(op_code: UInt): Bool = !co.isHit(op_code, this) + /** Does a secondary miss on the block require another Acquire message */ + def requiresAcquireOnSecondaryMiss(first_op: UInt, second_op: UInt): Bool = + co.requiresAcquireOnSecondaryMiss(first_op, second_op, this) + /** Does op require a Release to be made to outer memory */ + def requiresReleaseOnCacheControl(op_code: UInt): Bool = + co.requiresReleaseOnCacheControl(op_code: UInt, this) + /** Does an eviction require a Release to be made to outer memory */ + def requiresVoluntaryWriteback(dummy: Int = 0): Bool = + co.requiresReleaseOnCacheControl(M_FLUSH, this) + + /** Constructs an Acquire message based on this metdata and a memory operation + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]] + */ + def makeAcquire( + op_code: UInt, + client_xact_id: UInt, + addr_block: UInt): Acquire = { + Acquire( + is_builtin_type = Bool(false), + a_type = co.getAcquireType(op_code, this), + client_xact_id = client_xact_id, + addr_block = addr_block, + union = Cat(op_code, Bool(true)))(p) + } + + /** Constructs a Release message based on this metadata on cache control op + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (which beat) + * @param data data being written back + */ + def makeVoluntaryRelease( + op_code: UInt, + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)): Release = + Release( + voluntary = Bool(true), + r_type = co.getReleaseType(op_code, this), + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + data = data)(p) + + /** Constructs a Release message based on this metadata on an eviction + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (which beat) + * @param data data being written back + */ + def makeVoluntaryWriteback( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)): Release = + makeVoluntaryRelease( + op_code = M_FLUSH, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + data = data) + + /** Constructs a Release message based on this metadata and a [[uncore.Probe]] + * + * @param the incoming [[uncore.Probe]] + * @param addr_beat sub-block address (which beat) + * @param data data being released + */ + def makeRelease( + prb: Probe, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)): Release = + Release( + voluntary = Bool(false), + r_type = co.getReleaseType(prb, this), + client_xact_id = UInt(0), + addr_block = prb.addr_block, + addr_beat = addr_beat, + data = data)(p) + + /** New metadata after receiving a [[uncore.Grant]] + * + * @param incoming the incoming [[uncore.Grant]] + * @param pending the mem op that triggered this transaction + */ + def onGrant(incoming: Grant, pending: UInt): ClientMetadata = + co.clientMetadataOnGrant(incoming, pending, this) + + /** New metadata after receiving a [[uncore.Probe]] + * + * @param incoming the incoming [[uncore.Probe]] + */ + def onProbe(incoming: Probe): ClientMetadata = + co.clientMetadataOnProbe(incoming, this) + + /** New metadata after a op_code hits this block + * + * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]] + */ + def onHit(op_code: UInt): ClientMetadata = + co.clientMetadataOnHit(op_code, this) + + /** New metadata after op_code releases permissions on this block + * + * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]] + */ + def onCacheControl(op_code: UInt): ClientMetadata = + co.clientMetadataOnCacheControl(op_code, this) +} + +/** Factories for ClientMetadata, including on reset */ +object ClientMetadata { + def apply(state: UInt)(implicit p: Parameters) = { + val meta = Wire(new ClientMetadata) + meta.state := state + meta + } + def onReset(implicit p: Parameters) = ClientMetadata(UInt(0))(p) // TODO: assumes clientInvalid === 0 +} + +/** Stores manager-side information about the status + * of a cache block, including whether it has any known sharers. + * + * Its API can be used to create [[uncore.Probe]] and [[uncore.Grant]] messages. + */ +class ManagerMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) { + // Currently no coherence policies assume manager-side state information + // val state = UInt(width = co.masterStateWidth) TODO: Fix 0-width wires in Chisel + + /** The directory information for this block */ + val sharers = UInt(width = co.dir.width) + + /** Metadata equality */ + def ===(rhs: ManagerMetadata): Bool = //this.state === rhs.state && TODO: Fix 0-width wires in Chisel + this.sharers === rhs.sharers + def =/=(rhs: ManagerMetadata): Bool = !this.===(rhs) + + /** Converts the directory info into an N-hot sharer bitvector (i.e. full representation) */ + def full(dummy: Int = 0): UInt = co.dir.full(this.sharers) + + /** Does this [[uncore.Acquire]] require [[uncore.Probe Probes]] to be sent */ + def requiresProbes(acq: HasAcquireType): Bool = co.requiresProbes(acq, this) + /** Does this memory op require [[uncore.Probe Probes]] to be sent */ + def requiresProbes(op_code: UInt): Bool = co.requiresProbes(op_code, this) + /** Does an eviction require [[uncore.Probe Probes]] to be sent */ + def requiresProbesOnVoluntaryWriteback(dummy: Int = 0): Bool = + co.requiresProbes(M_FLUSH, this) + + /** Construct an appropriate [[uncore.ProbeToDst]] for a given [[uncore.Acquire]] + * + * @param dst Destination client id for this Probe + * @param acq Acquire message triggering this Probe + * @param addr_block address of the cache block being probed + */ + def makeProbe(dst: UInt, acq: HasAcquireType, addr_block: UInt): ProbeToDst = + Probe(dst, co.getProbeType(acq, this), addr_block)(p) + + /** Construct an appropriate [[uncore.ProbeToDst]] for a given [[uncore.Acquire]] + * + * @param dst Destination client id for this Probe + * @param acq Acquire message triggering this Probe + */ + def makeProbe(dst: UInt, acq: AcquireMetadata): ProbeToDst = + Probe(dst, co.getProbeType(acq, this), acq.addr_block)(p) + + /** Construct an appropriate [[uncore.ProbeToDst]] for a given mem op + * + * @param dst Destination client id for this Probe + * @param op_code memory operation triggering this Probe + * @param addr_block address of the cache block being probed + */ + def makeProbe(dst: UInt, op_code: UInt, addr_block: UInt): ProbeToDst = + Probe(dst, co.getProbeType(op_code, this), addr_block)(p) + + /** Construct an appropriate [[uncore.ProbeToDst]] for an eviction + * + * @param dst Destination client id for this Probe + * @param addr_block address of the cache block being probed prior to eviction + */ + def makeProbeForVoluntaryWriteback(dst: UInt, addr_block: UInt): ProbeToDst = + makeProbe(dst, M_FLUSH, addr_block) + + /** Construct an appropriate [[uncore.GrantToDst]] to acknowledge an [[uncore.Release]] + * + * @param rel Release message being acknowledged by this Grant + */ + def makeGrant(rel: ReleaseMetadata with HasClientId): GrantToDst = + Grant( + dst = rel.client_id, + is_builtin_type = Bool(true), + g_type = Grant.voluntaryAckType, + client_xact_id = rel.client_xact_id, + manager_xact_id = UInt(0))(p) + + /** Construct an appropriate [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]] + * + * May contain single or multiple beats of data, or just be a permissions upgrade. + * + * @param acq Acquire message being responded to by this Grant + * @param manager_xact_id manager's transaction id + * @param addr_beat beat id of the data + * @param data data being refilled to the original requestor + */ + def makeGrant( + acq: AcquireMetadata with HasClientId, + manager_xact_id: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)): GrantToDst = + Grant( + dst = acq.client_id, + is_builtin_type = acq.isBuiltInType(), + g_type = co.getGrantType(acq, this), + client_xact_id = acq.client_xact_id, + manager_xact_id = manager_xact_id, + addr_beat = addr_beat, + data = data)(p) + + /** Construct an [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]] with some overrides + * + * Used to respond to secondary misses merged into this transaction. + * May contain single or multiple beats of data. + * + * @param sec Secondary miss info + * @param manager_xact_id manager's transaction id + * @param data data being refilled to the original requestor + */ + def makeGrant( + sec: SecondaryMissInfo, + manager_xact_id: UInt, + data: UInt): GrantToDst = { + Grant( + dst = sec.client_id, + is_builtin_type = sec.isBuiltInType(), + g_type = co.getGrantType(sec, this), + client_xact_id = sec.client_xact_id, + manager_xact_id = manager_xact_id, + addr_beat = sec.addr_beat, + data = data)(p) + } + + /** New metadata after receiving a [[uncore.ReleaseFromSrc]] + * + * @param incoming the incoming [[uncore.ReleaseFromSrc]] + */ + def onRelease(incoming: ReleaseMetadata with HasClientId): ManagerMetadata = + co.managerMetadataOnRelease(incoming, incoming.client_id, this) + + /** New metadata after sending a [[uncore.GrantToDst]] + * + * @param outgoing the outgoing [[uncore.GrantToDst]] + */ + def onGrant(outgoing: GrantMetadata with HasClientId): ManagerMetadata = + co.managerMetadataOnGrant(outgoing, outgoing.client_id, this) +} + +/** Factories for ManagerMetadata, including on reset */ +object ManagerMetadata { + def apply(sharers: UInt, state: UInt = UInt(width = 0))(implicit p: Parameters) = { + val meta = Wire(new ManagerMetadata) + //meta.state := state TODO: Fix 0-width wires in Chisel + meta.sharers := sharers + meta + } + def apply(implicit p: Parameters) = { + val meta = Wire(new ManagerMetadata) + //meta.state := UInt(width = 0) TODO: Fix 0-width wires in Chisel + meta.sharers := meta.co.dir.flush + meta + } + def onReset(implicit p: Parameters) = ManagerMetadata(p) +} + +/** HierarchicalMetadata is used in a cache in a multi-level memory hierarchy + * that is a manager with respect to some inner caches and a client with + * respect to some outer cache. + * + * This class makes use of two different sets of TileLink parameters, which are + * applied by contextually mapping [[uncore.TLId]] to one of + * [[uncore.InnerTLId]] or [[uncore.OuterTLId]]. + */ +class HierarchicalMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) { + val inner: ManagerMetadata = new ManagerMetadata()(p.alterPartial({case TLId => p(InnerTLId)})) + val outer: ClientMetadata = new ClientMetadata()(p.alterPartial({case TLId => p(OuterTLId)})) + def ===(rhs: HierarchicalMetadata): Bool = + this.inner === rhs.inner && this.outer === rhs.outer + def =/=(rhs: HierarchicalMetadata): Bool = !this.===(rhs) +} + +/** Factories for HierarchicalMetadata, including on reset */ +object HierarchicalMetadata { + def apply(inner: ManagerMetadata, outer: ClientMetadata) + (implicit p: Parameters): HierarchicalMetadata = { + val m = Wire(new HierarchicalMetadata) + m.inner := inner + m.outer := outer + m + } + def onReset(implicit p: Parameters): HierarchicalMetadata = + apply(ManagerMetadata.onReset, ClientMetadata.onReset) +} diff --git a/uncore/src/main/scala/coherence/Policies.scala b/uncore/src/main/scala/coherence/Policies.scala new file mode 100644 index 00000000..744b8e7d --- /dev/null +++ b/uncore/src/main/scala/coherence/Policies.scala @@ -0,0 +1,696 @@ +// See LICENSE for license details. + +package uncore.coherence + +import Chisel._ +import uncore.tilelink._ +import uncore.constants._ +import uncore.Util._ + +/** The entire CoherencePolicy API consists of the following three traits: + * HasCustomTileLinkMessageTypes, used to define custom messages + * HasClientSideCoherencePolicy, for client coherence agents + * HasManagerSideCoherencePolicy, for manager coherence agents + */ +abstract class CoherencePolicy(val dir: DirectoryRepresentation) + extends HasCustomTileLinkMessageTypes + with HasClientSideCoherencePolicy + with HasManagerSideCoherencePolicy + +/** This API defines the custom, coherence-policy-defined message types, + * as opposed to the built-in ones found in tilelink.scala. + * Policies must enumerate the custom messages to be sent over each + * channel, as well as which of them have associated data. + */ +trait HasCustomTileLinkMessageTypes { + val nAcquireTypes: Int + def acquireTypeWidth = log2Up(nAcquireTypes) + val nProbeTypes: Int + def probeTypeWidth = log2Up(nProbeTypes) + val nReleaseTypes: Int + def releaseTypeWidth = log2Up(nReleaseTypes) + val nGrantTypes: Int + def grantTypeWidth = log2Up(nGrantTypes) + + val acquireTypesWithData = Nil // Only built-in Acquire types have data for now + def releaseTypesWithData: Seq[UInt] + def grantTypesWithData: Seq[UInt] +} + +/** This API contains all functions required for client coherence agents. + * Policies must enumerate the number of client states and define their + * permissions with respect to memory operations. Policies must fill in functions + * to control which messages are sent and how metadata is updated in response + * to coherence events. These funtions are generally called from within the + * ClientMetadata class in metadata.scala + */ +trait HasClientSideCoherencePolicy { + // Client coherence states and their permissions + val nClientStates: Int + def clientStateWidth = log2Ceil(nClientStates) + def clientStatesWithReadPermission: Seq[UInt] + def clientStatesWithWritePermission: Seq[UInt] + def clientStatesWithDirtyData: Seq[UInt] + + // Transaction initiation logic + def isValid(meta: ClientMetadata): Bool + def isHit(cmd: UInt, meta: ClientMetadata): Bool = { + Mux(isWriteIntent(cmd), + meta.state isOneOf clientStatesWithWritePermission, + meta.state isOneOf clientStatesWithReadPermission) + } + //TODO: Assumes all states with write permissions also have read permissions + def requiresAcquireOnSecondaryMiss( + first_cmd: UInt, + second_cmd: UInt, + meta: ClientMetadata): Bool = { + isWriteIntent(second_cmd) && !isWriteIntent(first_cmd) + } + //TODO: Assumes all cache ctrl ops writeback dirty data, and + // doesn't issue transaction when e.g. downgrading Exclusive to Shared: + def requiresReleaseOnCacheControl(cmd: UInt, meta: ClientMetadata): Bool = + meta.state isOneOf clientStatesWithDirtyData + + // Determine which custom message type to use + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt + def getReleaseType(p: HasProbeType, meta: ClientMetadata): UInt + + // Mutate ClientMetadata based on messages or cmds + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata): ClientMetadata + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata): ClientMetadata + def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata): ClientMetadata + def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata): ClientMetadata +} + +/** This API contains all functions required for manager coherence agents. + * Policies must enumerate the number of manager states. Policies must fill + * in functions to control which Probe and Grant messages are sent and how + * metadata should be updated in response to coherence events. These funtions + * are generally called from within the ManagerMetadata class in metadata.scala + */ +trait HasManagerSideCoherencePolicy extends HasDirectoryRepresentation { + val nManagerStates: Int + def masterStateWidth = log2Ceil(nManagerStates) + + // Transaction probing logic + def requiresProbes(acq: HasAcquireType, meta: ManagerMetadata): Bool + def requiresProbes(cmd: UInt, meta: ManagerMetadata): Bool + + // Determine which custom message type to use in response + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt + def getProbeType(acq: HasAcquireType, meta: ManagerMetadata): UInt + def getGrantType(acq: HasAcquireType, meta: ManagerMetadata): UInt + def getExclusiveGrantType(): UInt + + // Mutate ManagerMetadata based on messages or cmds + def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata): ManagerMetadata + def managerMetadataOnGrant(outgoing: HasGrantType, dst: UInt, meta: ManagerMetadata) = + ManagerMetadata(sharers=Mux(outgoing.isBuiltInType(), // Assumes all built-ins are uncached + meta.sharers, + dir.push(meta.sharers, dst)))(meta.p) + //state = meta.state) TODO: Fix 0-width wires in Chisel +} + +/** The following concrete implementations of CoherencePolicy each provide the + * functionality of one particular protocol. + */ + +/** A simple protocol with only two Client states. + * Data is always assumed to be dirty. + * Only a single client may ever have a copy of a block at a time. + */ +class MICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { + // Message types + val nAcquireTypes = 1 + val nProbeTypes = 2 + val nReleaseTypes = 4 + val nGrantTypes = 1 + + val acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes) + val probeInvalidate :: probeCopy :: Nil = Enum(UInt(), nProbeTypes) + val releaseInvalidateData :: releaseCopyData :: releaseInvalidateAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes) + val grantExclusive :: Nil = Enum(UInt(), nGrantTypes) + + def releaseTypesWithData = Seq(releaseInvalidateData, releaseCopyData) + def grantTypesWithData = Seq(grantExclusive) + + // Client states and functions + val nClientStates = 2 + val clientInvalid :: clientValid :: Nil = Enum(UInt(), nClientStates) + + def clientStatesWithReadPermission = Seq(clientValid) + def clientStatesWithWritePermission = Seq(clientValid) + def clientStatesWithDirtyData = Seq(clientValid) + + def isValid (meta: ClientMetadata): Bool = meta.state =/= clientInvalid + + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = acquireExclusive + + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = { + val dirty = meta.state isOneOf clientStatesWithDirtyData + MuxLookup(cmd, releaseCopyAck, Array( + M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck), + M_PRODUCE -> Mux(dirty, releaseCopyData, releaseCopyAck), + M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck))) + } + + def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt = + MuxLookup(incoming.p_type, releaseInvalidateAck, Array( + probeInvalidate -> getReleaseType(M_FLUSH, meta), + probeCopy -> getReleaseType(M_FLUSH, meta))) + + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = meta + + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) = + ClientMetadata(Mux(cmd === M_FLUSH, clientInvalid, meta.state))(meta.p) + + def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) = + ClientMetadata(Mux(incoming.isBuiltInType(), clientInvalid, clientValid))(meta.p) + + def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) = + ClientMetadata(Mux(incoming.p_type === probeInvalidate, + clientInvalid, meta.state))(meta.p) + + // Manager states and functions: + val nManagerStates = 0 // We don't actually need any states for this protocol + + def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt = + MuxLookup(cmd, probeCopy, Array( + M_FLUSH -> probeInvalidate)) + + def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), + MuxLookup(a.a_type, probeCopy, Array( + Acquire.getBlockType -> probeCopy, + Acquire.putBlockType -> probeInvalidate, + Acquire.getType -> probeCopy, + Acquire.putType -> probeInvalidate, + Acquire.getPrefetchType -> probeCopy, + Acquire.putPrefetchType -> probeInvalidate, + Acquire.putAtomicType -> probeInvalidate)), + probeInvalidate) + + def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type), grantExclusive) + def getExclusiveGrantType(): UInt = grantExclusive + + def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = { + val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p) + MuxCase(meta, Array( + incoming.is(releaseInvalidateData) -> popped, + incoming.is(releaseInvalidateAck) -> popped)) + } +} + +/** A simple protocol with only three Client states. + * Data is marked as dirty when written. + * Only a single client may ever have a copy of a block at a time. + */ +class MEICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { + // Message types + val nAcquireTypes = 2 + val nProbeTypes = 3 + val nReleaseTypes = 6 + val nGrantTypes = 1 + + val acquireExclusiveClean :: acquireExclusiveDirty :: Nil = Enum(UInt(), nAcquireTypes) + val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes) + val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes) + val grantExclusive :: Nil = Enum(UInt(), nGrantTypes) + + def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData) + def grantTypesWithData = Seq(grantExclusive) + + // Client states and functions + val nClientStates = 3 + val clientInvalid :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates) + + def clientStatesWithReadPermission = Seq(clientExclusiveClean, clientExclusiveDirty) + def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty) + def clientStatesWithDirtyData = Seq(clientExclusiveDirty) + + def isValid (meta: ClientMetadata) = meta.state =/= clientInvalid + + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = + Mux(isWriteIntent(cmd), acquireExclusiveDirty, acquireExclusiveClean) + + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = { + val dirty = meta.state isOneOf clientStatesWithDirtyData + MuxLookup(cmd, releaseCopyAck, Array( + M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck), + M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck), + M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck))) + } + + def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt = + MuxLookup(incoming.p_type, releaseInvalidateAck, Array( + probeInvalidate -> getReleaseType(M_FLUSH, meta), + probeDowngrade -> getReleaseType(M_FLUSH, meta), + probeCopy -> getReleaseType(M_FLUSH, meta))) + + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = + ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p) + + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(cmd, meta.state, Array( + M_FLUSH -> clientInvalid, + M_CLEAN -> Mux(meta.state === clientExclusiveDirty, clientExclusiveClean, meta.state))))(meta.p) + + def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + Mux(incoming.isBuiltInType(), clientInvalid, + Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean)))(meta.p) + + def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(incoming.p_type, meta.state, Array( + probeInvalidate -> clientInvalid, + probeDowngrade -> clientInvalid, + probeCopy -> clientInvalid)))(meta.p) + + // Manager states and functions: + val nManagerStates = 0 // We don't actually need any states for this protocol + + def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) = !dir.none(meta.sharers) + def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt = + MuxLookup(cmd, probeCopy, Array( + M_FLUSH -> probeInvalidate, + M_PRODUCE -> probeDowngrade)) + + def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), + MuxLookup(a.a_type, probeCopy, Array( + Acquire.getBlockType -> probeCopy, + Acquire.putBlockType -> probeInvalidate, + Acquire.getType -> probeCopy, + Acquire.putType -> probeInvalidate, + Acquire.getPrefetchType -> probeCopy, + Acquire.putPrefetchType -> probeInvalidate, + Acquire.putAtomicType -> probeInvalidate)), + probeInvalidate) + + def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type), grantExclusive) + def getExclusiveGrantType(): UInt = grantExclusive + + def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = { + val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p) + MuxCase(meta, Array( + incoming.is(releaseInvalidateData) -> popped, + incoming.is(releaseInvalidateAck) -> popped)) + } +} + +/** A protocol with only three Client states. + * Data is always assumed to be dirty. + * Multiple clients may share read permissions on a block at the same time. + */ +class MSICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { + // Message types + val nAcquireTypes = 2 + val nProbeTypes = 3 + val nReleaseTypes = 6 + val nGrantTypes = 3 + + val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes) + val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes) + val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes) + val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes) + + def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData) + def grantTypesWithData = Seq(grantShared, grantExclusive) + + // Client states and functions + val nClientStates = 3 + val clientInvalid :: clientShared :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates) + + def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveDirty) + def clientStatesWithWritePermission = Seq(clientExclusiveDirty) + def clientStatesWithDirtyData = Seq(clientExclusiveDirty) + + def isValid(meta: ClientMetadata): Bool = meta.state =/= clientInvalid + + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = + Mux(isWriteIntent(cmd), acquireExclusive, acquireShared) + + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = { + val dirty = meta.state isOneOf clientStatesWithDirtyData + MuxLookup(cmd, releaseCopyAck, Array( + M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck), + M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck), + M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck))) + } + + def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt = + MuxLookup(incoming.p_type, releaseInvalidateAck, Array( + probeInvalidate -> getReleaseType(M_FLUSH, meta), + probeDowngrade -> getReleaseType(M_PRODUCE, meta), + probeCopy -> getReleaseType(M_PRODUCE, meta))) + + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = + ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p) + + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(cmd, meta.state, Array( + M_FLUSH -> clientInvalid, + M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission, + clientShared, meta.state))))(meta.p) + + def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + Mux(incoming.isBuiltInType(), clientInvalid, + MuxLookup(incoming.g_type, clientInvalid, Array( + grantShared -> clientShared, + grantExclusive -> clientExclusiveDirty, + grantExclusiveAck -> clientExclusiveDirty))))(meta.p) + + def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(incoming.p_type, meta.state, Array( + probeInvalidate -> clientInvalid, + probeDowngrade -> clientShared, + probeCopy -> clientShared)))(meta.p) + + // Manager states and functions: + val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing + // only a single sharer (also would need + // notification msg to track clean drops) + // Also could avoid probes on outer WBs. + + def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) = + Mux(dir.none(meta.sharers), Bool(false), + Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive + Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared))) + + def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt = + MuxLookup(cmd, probeCopy, Array( + M_FLUSH -> probeInvalidate, + M_PRODUCE -> probeDowngrade)) + + def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), + MuxLookup(a.a_type, probeCopy, Array( + Acquire.getBlockType -> probeCopy, + Acquire.putBlockType -> probeInvalidate, + Acquire.getType -> probeCopy, + Acquire.putType -> probeInvalidate, + Acquire.getPrefetchType -> probeCopy, + Acquire.putPrefetchType -> probeInvalidate, + Acquire.putAtomicType -> probeInvalidate)), + MuxLookup(a.a_type, probeCopy, Array( + acquireShared -> probeDowngrade, + acquireExclusive -> probeInvalidate))) + + def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type), + Mux(a.a_type === acquireShared, + Mux(!dir.none(meta.sharers), grantShared, grantExclusive), + grantExclusive)) + def getExclusiveGrantType(): UInt = grantExclusive + + def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = { + val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p) + MuxCase(meta, Array( + incoming.is(releaseInvalidateData) -> popped, + incoming.is(releaseInvalidateAck) -> popped)) + } +} + +/** A protocol with four Client states. + * Data is marked as dirty when written. + * Multiple clients may share read permissions on a block at the same time. + */ +class MESICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { + // Message types + val nAcquireTypes = 2 + val nProbeTypes = 3 + val nReleaseTypes = 6 + val nGrantTypes = 3 + + val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes) + val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes) + val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes) + val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes) + + def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData) + def grantTypesWithData = Seq(grantShared, grantExclusive) + + // Client states and functions + val nClientStates = 4 + val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates) + + def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveClean, clientExclusiveDirty) + def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty) + def clientStatesWithDirtyData = Seq(clientExclusiveDirty) + + def isValid(meta: ClientMetadata): Bool = meta.state =/= clientInvalid + + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = + Mux(isWriteIntent(cmd), acquireExclusive, acquireShared) + + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = { + val dirty = meta.state isOneOf clientStatesWithDirtyData + MuxLookup(cmd, releaseCopyAck, Array( + M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck), + M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck), + M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck))) + } + + def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt = + MuxLookup(incoming.p_type, releaseInvalidateAck, Array( + probeInvalidate -> getReleaseType(M_FLUSH, meta), + probeDowngrade -> getReleaseType(M_PRODUCE, meta), + probeCopy -> getReleaseType(M_PRODUCE, meta))) + + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = + ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p) + + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(cmd, meta.state, Array( + M_FLUSH -> clientInvalid, + M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission, + clientShared, meta.state), + M_CLEAN -> Mux(meta.state === clientExclusiveDirty, + clientExclusiveClean, meta.state))))(meta.p) + + def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + Mux(incoming.isBuiltInType(), clientInvalid, + MuxLookup(incoming.g_type, clientInvalid, Array( + grantShared -> clientShared, + grantExclusive -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean), + grantExclusiveAck -> clientExclusiveDirty))))(meta.p) + + def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(incoming.p_type, meta.state, Array( + probeInvalidate -> clientInvalid, + probeDowngrade -> clientShared, + probeCopy -> clientShared)))(meta.p) + + // Manager states and functions: + val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing + // only a single sharer (also would need + // notification msg to track clean drops) + // Also could avoid probes on outer WBs. + + def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) = + Mux(dir.none(meta.sharers), Bool(false), + Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive + Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared))) + + def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt = + MuxLookup(cmd, probeCopy, Array( + M_FLUSH -> probeInvalidate, + M_PRODUCE -> probeDowngrade)) + + def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), + MuxLookup(a.a_type, probeCopy, Array( + Acquire.getBlockType -> probeCopy, + Acquire.putBlockType -> probeInvalidate, + Acquire.getType -> probeCopy, + Acquire.putType -> probeInvalidate, + Acquire.getPrefetchType -> probeCopy, + Acquire.putPrefetchType -> probeInvalidate, + Acquire.putAtomicType -> probeInvalidate)), + MuxLookup(a.a_type, probeCopy, Array( + acquireShared -> probeDowngrade, + acquireExclusive -> probeInvalidate))) + + def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type), + Mux(a.a_type === acquireShared, + Mux(!dir.none(meta.sharers), grantShared, grantExclusive), + grantExclusive)) + def getExclusiveGrantType(): UInt = grantExclusive + + def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = { + val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p) + MuxCase(meta, Array( + incoming.is(releaseInvalidateData) -> popped, + incoming.is(releaseInvalidateAck) -> popped)) + } +} + +class MigratoryCoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { + // Message types + val nAcquireTypes = 3 + val nProbeTypes = 4 + val nReleaseTypes = 10 + val nGrantTypes = 4 + + val acquireShared :: acquireExclusive :: acquireInvalidateOthers :: Nil = Enum(UInt(), nAcquireTypes) + val probeInvalidate :: probeDowngrade :: probeCopy :: probeInvalidateOthers :: Nil = Enum(UInt(), nProbeTypes) + val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: releaseDowngradeDataMigratory :: releaseDowngradeAckHasCopy :: releaseInvalidateDataMigratory :: releaseInvalidateAckMigratory :: Nil = Enum(UInt(), nReleaseTypes) + val grantShared :: grantExclusive :: grantExclusiveAck :: grantReadMigratory :: Nil = Enum(UInt(), nGrantTypes) + + def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData, releaseInvalidateDataMigratory, releaseDowngradeDataMigratory) + def grantTypesWithData = Seq(grantShared, grantExclusive, grantReadMigratory) + + // Client states and functions + val nClientStates = 7 + val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: clientSharedByTwo :: clientMigratoryClean :: clientMigratoryDirty :: Nil = Enum(UInt(), nClientStates) + + def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveClean, clientExclusiveDirty, clientSharedByTwo, clientMigratoryClean, clientMigratoryDirty) + def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty, clientMigratoryClean, clientMigratoryDirty) + def clientStatesWithDirtyData = Seq(clientExclusiveDirty, clientMigratoryDirty) + + def isValid (meta: ClientMetadata): Bool = meta.state =/= clientInvalid + + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = + Mux(isWriteIntent(cmd), + Mux(meta.state === clientInvalid, acquireExclusive, acquireInvalidateOthers), + acquireShared) + + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = { + val dirty = meta.state isOneOf clientStatesWithDirtyData + MuxLookup(cmd, releaseCopyAck, Array( + M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck), + M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck), + M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck))) + } + + def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt = { + val dirty = meta.state isOneOf clientStatesWithDirtyData + val with_data = MuxLookup(incoming.p_type, releaseInvalidateData, Array( + probeInvalidate -> Mux(meta.state isOneOf (clientExclusiveDirty, clientMigratoryDirty), + releaseInvalidateDataMigratory, releaseInvalidateData), + probeDowngrade -> Mux(meta.state === clientMigratoryDirty, + releaseDowngradeDataMigratory, releaseDowngradeData), + probeCopy -> releaseCopyData)) + val without_data = MuxLookup(incoming.p_type, releaseInvalidateAck, Array( + probeInvalidate -> Mux(clientExclusiveClean === meta.state, + releaseInvalidateAckMigratory, releaseInvalidateAck), + probeInvalidateOthers -> Mux(clientSharedByTwo === meta.state, + releaseInvalidateAckMigratory, releaseInvalidateAck), + probeDowngrade -> Mux(meta.state =/= clientInvalid, + releaseDowngradeAckHasCopy, releaseDowngradeAck), + probeCopy -> Mux(meta.state =/= clientInvalid, + releaseDowngradeAckHasCopy, releaseDowngradeAck))) + Mux(dirty, with_data, without_data) + } + + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + Mux(isWrite(cmd), MuxLookup(meta.state, clientExclusiveDirty, Array( + clientExclusiveClean -> clientExclusiveDirty, + clientMigratoryClean -> clientMigratoryDirty)), + meta.state))(meta.p) + + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(cmd, meta.state, Array( + M_FLUSH -> clientInvalid, + M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission, + clientShared, meta.state), + M_CLEAN -> MuxLookup(meta.state, meta.state, Array( + clientExclusiveDirty -> clientExclusiveClean, + clientMigratoryDirty -> clientMigratoryClean)))))(meta.p) + + def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + Mux(incoming.isBuiltInType(), clientInvalid, + MuxLookup(incoming.g_type, clientInvalid, Array( + grantShared -> clientShared, + grantExclusive -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean), + grantExclusiveAck -> clientExclusiveDirty, + grantReadMigratory -> Mux(isWrite(cmd), + clientMigratoryDirty, clientMigratoryClean)))))(meta.p) + + def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) = { + val downgradeState = MuxLookup(meta.state, clientShared, Array( + clientExclusiveClean -> clientSharedByTwo, + clientExclusiveDirty -> clientSharedByTwo, + clientSharedByTwo -> clientShared, + clientMigratoryClean -> clientSharedByTwo, + clientMigratoryDirty -> clientInvalid)) + ClientMetadata( + MuxLookup(incoming.p_type, meta.state, Array( + probeInvalidate -> clientInvalid, + probeInvalidateOthers -> clientInvalid, + probeDowngrade -> downgradeState, + probeCopy -> downgradeState)))(meta.p) + } + + // Manager states and functions: + val nManagerStates = 0 // TODO: we could add some states to reduce the number of message types + + def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) = + Mux(dir.none(meta.sharers), Bool(false), + Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive + Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared))) + + def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt = + MuxLookup(cmd, probeCopy, Array( + M_FLUSH -> probeInvalidate, + M_PRODUCE -> probeDowngrade)) + + def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), + MuxLookup(a.a_type, probeCopy, Array( + Acquire.getBlockType -> probeCopy, + Acquire.putBlockType -> probeInvalidate, + Acquire.getType -> probeCopy, + Acquire.putType -> probeInvalidate, + Acquire.getPrefetchType -> probeCopy, + Acquire.putPrefetchType -> probeInvalidate, + Acquire.putAtomicType -> probeInvalidate)), + MuxLookup(a.a_type, probeCopy, Array( + acquireShared -> probeDowngrade, + acquireExclusive -> probeInvalidate, + acquireInvalidateOthers -> probeInvalidateOthers))) + + def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type), + MuxLookup(a.a_type, grantShared, Array( + acquireShared -> Mux(!dir.none(meta.sharers), grantShared, grantExclusive), + acquireExclusive -> grantExclusive, + acquireInvalidateOthers -> grantExclusiveAck))) //TODO: add this to MESI for broadcast? + def getExclusiveGrantType(): UInt = grantExclusive + + def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = { + val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p) + MuxCase(meta, Array( + incoming.is(releaseInvalidateData) -> popped, + incoming.is(releaseInvalidateAck) -> popped, + incoming.is(releaseInvalidateDataMigratory) -> popped, + incoming.is(releaseInvalidateAckMigratory) -> popped)) + } +} diff --git a/uncore/src/main/scala/converters/Ahb.scala b/uncore/src/main/scala/converters/Ahb.scala new file mode 100644 index 00000000..0fca9517 --- /dev/null +++ b/uncore/src/main/scala/converters/Ahb.scala @@ -0,0 +1,425 @@ +package uncore.converters + +import Chisel._ +import junctions._ +import uncore.tilelink._ +import uncore.util._ +import uncore.constants._ +import cde.{Parameters, Field} +import HastiConstants._ + +/* We need to translate TileLink requests into operations we can actually execute on AHB. + * The general plan of attack is: + * get => one AHB=>TL read + * put => [multiple AHB write fragments=>nill], one AHB write=>TL + * getBlock => AHB burst reads =>TL + * putBlock => AHB burst writes=>TL + * getPrefetch => noop=>TL + * putPrefetch => noop=>TL + * putAtomic => one AHB=>TL read, one idle, one AHB atom_write=>nill, one idle + * + * This requires that we support a pipeline of optional AHB requests with optional TL responses + */ +class AHBRequestIO(implicit p: Parameters) extends HastiMasterIO + with HasGrantType + with HasClientTransactionId + with HasTileLinkBeatId { + val executeAHB = Bool() + val respondTL = Bool() + val latchAtom = Bool() + val firstBurst = Bool() + val finalBurst = Bool() + val cmd = Bits(width = M_SZ) // atomic op +} + +// AHB stage1: translate TileLink Acquires into AHBRequests +class AHBTileLinkIn(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module + with HasHastiParameters + with HasTileLinkParameters + with HasAddrMapParameters { + val io = new Bundle { + val acquire = new DecoupledIO(new Acquire).flip // NOTE: acquire must be either a Queue or a Pipe + val request = new DecoupledIO(new AHBRequestIO) + } + + // Match the AHB burst with a TileLink {Put,Get}Block + val burstSize = tlDataBeats match { + case 1 => HBURST_SINGLE + // case 2 not supported by AHB + case 4 => HBURST_WRAP4 + case 8 => HBURST_WRAP8 + case 16 => HBURST_WRAP16 + case _ => throw new java.lang.AssertionError("TileLink beats unsupported by AHB") + } + + // Bursts start at 0 and wrap-around back to 0 + val finalBurst = UInt(tlDataBeats-1, width = log2Up(tlDataBeats)).asUInt + val firstBurst = UInt(0, width = log2Up(tlDataBeats)) + val next_wmask = Wire(UInt(width = tlDataBytes)) // calculated below + + // State variables for processing more complicated TileLink Acquires + val s_atom_r :: s_atom_idle1 :: s_atom_w :: s_atom_idle2 :: Nil = Enum(UInt(), 4) + val atom_state = Reg(init = s_atom_r) // never changes if !supportAtomics + val done_wmask = Reg(init = UInt(0, width = tlDataBytes)) + val burst = Reg(init = firstBurst) + + // Grab some view of the TileLink acquire + val acq_wmask = io.acquire.bits.wmask() + val isReadBurst = io.acquire.bits.is(Acquire.getBlockType) + val isWriteBurst = io.acquire.bits.is(Acquire.putBlockType) + val isBurst = isWriteBurst || isReadBurst + val isAtomic = io.acquire.bits.is(Acquire.putAtomicType) && Bool(supportAtomics) + val isPut = io.acquire.bits.is(Acquire.putType) + + // Final states? + val last_wmask = next_wmask === acq_wmask + val last_atom = atom_state === s_atom_idle2 + val last_burst = burst === finalBurst + + // Block the incoming request until we've fully consumed it + // NOTE: the outgoing grant.valid may happen while acquire.ready is still false; + // for this reason it is essential to have a Queue or a Pipe infront of acquire + io.acquire.ready := io.request.ready && MuxLookup(io.acquire.bits.a_type, Bool(true), Array( + Acquire.getType -> Bool(true), + Acquire.getBlockType -> last_burst, // hold it until the last beat is burst + Acquire.putType -> last_wmask, // only accept the put if we can fully consume its wmask + Acquire.putBlockType -> Bool(true), + Acquire.putAtomicType -> last_atom, // atomic operation stages complete + Acquire.getPrefetchType -> Bool(true), + Acquire.putPrefetchType -> Bool(true))) + + // Advance the fragment state + when (io.request.ready && io.acquire.valid && isPut) { + when (last_wmask) { // if this was the last fragment, restart FSM + done_wmask := UInt(0) + } .otherwise { + done_wmask := next_wmask + } + } + + // Advance the burst state + // We assume here that TileLink gives us all putBlock beats with nothing between them + when (io.request.ready && io.acquire.valid && isBurst) { + when (last_burst) { + burst := UInt(0) + } .otherwise { + burst := burst + UInt(1) + } + } + + // Advance the atomic state machine + when (io.request.ready && io.acquire.valid && isAtomic) { + switch (atom_state) { + is (s_atom_r) { atom_state := s_atom_idle1 } + is (s_atom_idle1) { atom_state := s_atom_w } // idle1 => AMOALU runs on a different clock than AHB slave read + is (s_atom_w) { atom_state := s_atom_idle2 } + is (s_atom_idle2) { atom_state := s_atom_r } // idle2 state is required by AHB after hmastlock is lowered + } + } + + // Returns (range=0, range=-1, aligned_wmask, size) + def mask_helper(in_0 : Bool, range : UInt): (Bool, Bool, UInt, UInt) = { + val len = range.getWidth + if (len == 1) { + (range === UInt(0), range === UInt(1), in_0.asUInt() & range, UInt(0)) + } else { + val mid = len / 2 + val lo = range(mid-1, 0) + val hi = range(len-1, mid) + val (lo_0, lo_1, lo_m, lo_s) = mask_helper(in_0, lo) + val (hi_0, hi_1, hi_m, hi_s) = mask_helper(in_0 && lo_0, hi) + val out_0 = lo_0 && hi_0 + val out_1 = lo_1 && hi_1 + val out_m = Cat(hi_m, lo_m) | Fill(len, (in_0 && out_1).asUInt()) + val out_s = Mux(out_1, UInt(log2Up(len)), Mux(lo_0, hi_s, lo_s)) + (out_0, out_1, out_m, out_s) + } + } + + val pending_wmask = acq_wmask & ~done_wmask + val put_addr = PriorityEncoder(pending_wmask) + val (wmask_0, _, exec_wmask, put_size) = mask_helper(Bool(true), pending_wmask) + next_wmask := done_wmask | exec_wmask + + // Calculate the address, with consideration to put fragments and bursts + val addr_block = io.acquire.bits.addr_block + val addr_beatin= io.acquire.bits.addr_beat + val addr_burst = Mux(isReadBurst, addr_beatin + burst, addr_beatin) + val addr_byte = Mux(isPut, put_addr, io.acquire.bits.addr_byte()) + val addr_beat = Mux(isWriteBurst, UInt(0), addr_burst) + val ahbAddr = Cat(addr_block, addr_burst, addr_byte) + val ahbSize = Mux(isPut, put_size, Mux(isBurst, UInt(log2Ceil(tlDataBytes)), io.acquire.bits.op_size())) + + val ahbBurst = MuxLookup(io.acquire.bits.a_type, HBURST_SINGLE, Array( + Acquire.getType -> HBURST_SINGLE, + Acquire.getBlockType -> burstSize, + Acquire.putType -> HBURST_SINGLE, + Acquire.putBlockType -> burstSize, + Acquire.putAtomicType -> HBURST_SINGLE, + Acquire.getPrefetchType -> HBURST_SINGLE, + Acquire.putPrefetchType -> HBURST_SINGLE)) + + val ahbWrite = MuxLookup(io.acquire.bits.a_type, Bool(false), Array( + Acquire.getType -> Bool(false), + Acquire.getBlockType -> Bool(false), + Acquire.putType -> Bool(true), + Acquire.putBlockType -> Bool(true), + Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array( + s_atom_r -> Bool(false), + s_atom_idle1 -> Bool(false), // don't care + s_atom_w -> Bool(true), + s_atom_idle2 -> Bool(true))), // don't care + Acquire.getPrefetchType -> Bool(false), // don't care + Acquire.putPrefetchType -> Bool(true))) // don't care + + val ahbExecute = MuxLookup(io.acquire.bits.a_type, Bool(false), Array( + Acquire.getType -> Bool(true), + Acquire.getBlockType -> Bool(true), + Acquire.putType -> !wmask_0, // handle the case of a Put with no bytes! + Acquire.putBlockType -> Bool(true), + Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array( + s_atom_r -> Bool(true), + s_atom_idle1 -> Bool(false), + s_atom_w -> Bool(true), + s_atom_idle2 -> Bool(false))), + Acquire.getPrefetchType -> Bool(false), + Acquire.putPrefetchType -> Bool(false))) + + val respondTL = MuxLookup(io.acquire.bits.a_type, Bool(false), Array( + Acquire.getType -> Bool(true), + Acquire.getBlockType -> Bool(true), + Acquire.putType -> last_wmask, + Acquire.putBlockType -> last_burst, + Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array( + s_atom_r -> Bool(true), // they want the old data + s_atom_idle1 -> Bool(false), + s_atom_w -> Bool(false), + s_atom_idle2 -> Bool(false))), + Acquire.getPrefetchType -> Bool(true), + Acquire.putPrefetchType -> Bool(true))) + + io.request.valid := io.acquire.valid + io.request.bits.htrans := HTRANS_IDLE // unused/ignored + io.request.bits.haddr := ahbAddr + io.request.bits.hmastlock := isAtomic && atom_state =/= s_atom_idle2 + io.request.bits.hwrite := ahbWrite + io.request.bits.hburst := ahbBurst + io.request.bits.hsize := ahbSize + io.request.bits.hprot := HPROT_DATA | HPROT_PRIVILEGED + io.request.bits.hwdata := io.acquire.bits.data + io.request.bits.executeAHB := ahbExecute + io.request.bits.respondTL := respondTL + io.request.bits.latchAtom := isAtomic && atom_state === s_atom_r + io.request.bits.firstBurst := burst === firstBurst + io.request.bits.finalBurst := burst === finalBurst || !isBurst + io.request.bits.cmd := io.acquire.bits.op_code() + io.request.bits.is_builtin_type := Bool(true) + io.request.bits.g_type := io.acquire.bits.getBuiltInGrantType() + io.request.bits.client_xact_id := io.acquire.bits.client_xact_id + io.request.bits.addr_beat := addr_beat + + val debugBurst = Reg(UInt()) + when (io.request.valid) { + debugBurst := addr_burst - burst + } + + // We only support built-in TileLink requests + assert(!io.acquire.valid || io.acquire.bits.is_builtin_type, "AHB bridge only supports builtin TileLink types") + // Ensure alignment of address to size + assert(!io.acquire.valid || (ahbAddr & ((UInt(1) << ahbSize) - UInt(1))) === UInt(0), "TileLink operation misaligned") + // If this is a putBlock, make sure it moves properly + assert(!io.acquire.valid || !isBurst || burst === firstBurst || debugBurst === addr_burst - burst, "TileLink putBlock beats not sequential") + // We better not get an incomplete TileLink acquire + assert(!io.acquire.valid || isBurst || burst === firstBurst, "TileLink never completed a putBlock") + // If we disabled atomic support, we better not see a request + assert(!io.acquire.bits.is(Acquire.putAtomicType) || Bool(supportAtomics)) +} + +// AHB stage2: execute AHBRequests +class AHBBusMaster(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module + with HasHastiParameters + with HasTileLinkParameters + with HasAddrMapParameters { + val io = new Bundle { + val request = new DecoupledIO(new AHBRequestIO).flip + val grant = new DecoupledIO(new Grant) + val ahb = new HastiMasterIO() + } + + // All AHB outputs are registered (they might be IOs) + val midBurst = Reg(init = Bool(false)) + val htrans = Reg(init = HTRANS_IDLE) + val haddr = Reg(UInt()) + val hmastlock = Reg(init = Bool(false)) + val hwrite = Reg(Bool()) + val hburst = Reg(UInt()) + val hsize = Reg(init = UInt(0, width = SZ_HSIZE)) + val hprot = Reg(UInt()) + val hwdata0 = Reg(Bits()) + val hwdata1 = Reg(Bits()) + val hrdata = Reg(Bits()) + + io.ahb.htrans := htrans + io.ahb.haddr := haddr + io.ahb.hmastlock := hmastlock + io.ahb.hwrite := hwrite + io.ahb.hburst := hburst + io.ahb.hsize := hsize + io.ahb.hprot := hprot + io.ahb.hwdata := hwdata1 // one cycle after the address phase + + // TileLink response data needed in data phase + val respondTL0 = Reg(init = Bool(false)) + val respondTL1 = Reg(init = Bool(false)) + val latchAtom0 = Reg(init = Bool(false)) + val latchAtom1 = Reg(init = Bool(false)) + val executeAHB0 = Reg(init = Bool(false)) + val executeAHB1 = Reg(init = Bool(false)) + val bubble = Reg(init = Bool(true)) // nothing useful in address phase + val cmd = Reg(Bits()) + val g_type0 = Reg(UInt()) + val g_type1 = Reg(UInt()) + val client_xact_id0 = Reg(Bits()) + val client_xact_id1 = Reg(Bits()) + val addr_beat0 = Reg(UInt()) + val addr_beat1 = Reg(UInt()) + val grant1 = Reg(new Grant) + + // It is allowed to progress from Idle/Busy during a wait state + val addrReady = io.ahb.hready || bubble || (!executeAHB1 && !executeAHB0) + val dataReady = io.ahb.hready || !executeAHB1 + + // Only accept a new AHBRequest if we have enough buffer space in the pad + // to accomodate a persistent drop in TileLink's grant.ready + io.request.ready := addrReady && io.grant.ready + + // htrans must be updated even if no request is valid + when (addrReady) { + when (io.request.fire() && io.request.bits.executeAHB) { + midBurst := !io.request.bits.finalBurst + when (io.request.bits.firstBurst) { + htrans := HTRANS_NONSEQ + } .otherwise { + htrans := HTRANS_SEQ + } + } .otherwise { + when (midBurst) { + htrans := HTRANS_BUSY + } .otherwise { + htrans := HTRANS_IDLE + } + } + } + + // Address phase, clear repondTL when we have nothing to do + when (addrReady) { + when (io.request.fire()) { + respondTL0 := io.request.bits.respondTL + latchAtom0 := io.request.bits.latchAtom + executeAHB0:= io.request.bits.executeAHB + bubble := Bool(false) + } .otherwise { + respondTL0 := Bool(false) + latchAtom0 := Bool(false) + executeAHB0:= Bool(false) + bubble := Bool(true) // an atom-injected Idle is not a bubble! + } + } + + // Transfer bulk address phase + when (io.request.fire()) { + haddr := io.request.bits.haddr + hmastlock := io.request.bits.hmastlock + hwrite := io.request.bits.hwrite + hburst := io.request.bits.hburst + hsize := io.request.bits.hsize + hprot := io.request.bits.hprot + hwdata0 := io.request.bits.hwdata + cmd := io.request.bits.cmd + g_type0 := io.request.bits.g_type + client_xact_id0 := io.request.bits.client_xact_id + addr_beat0 := io.request.bits.addr_beat + } + + // Execute Atomic ops; unused and optimized away if !supportAtomics + val amo_p = p.alterPartial({ + case CacheBlockOffsetBits => hastiAddrBits + case AmoAluOperandBits => hastiDataBits + }) + val alu = Module(new AMOALU(rhsIsAligned = true)(amo_p)) + alu.io.addr := haddr + alu.io.cmd := cmd + alu.io.typ := hsize + alu.io.rhs := hwdata0 + alu.io.lhs := hrdata + + // Transfer bulk data phase + when (dataReady) { + when (addrReady) { + respondTL1 := respondTL0 + latchAtom1 := latchAtom0 + executeAHB1 := executeAHB0 + } .otherwise { + respondTL1 := Bool(false) + latchAtom1 := Bool(false) + executeAHB1 := Bool(false) + } + hwdata1 := Mux(Bool(supportAtomics), alu.io.out, hwdata0) + g_type1 := g_type0 + client_xact_id1 := client_xact_id0 + addr_beat1 := addr_beat0 + } + + // Latch the read result for an atomic operation + when (dataReady && latchAtom1) { + hrdata := io.ahb.hrdata + } + + // Only issue TL grant when the slave has provided data + io.grant.valid := dataReady && respondTL1 + io.grant.bits := Grant( + is_builtin_type = Bool(true), + g_type = g_type1, + client_xact_id = client_xact_id1, + manager_xact_id = UInt(0), + addr_beat = addr_beat1, + data = io.ahb.hrdata) + + // We cannot support errors from AHB to TileLink + assert(!io.ahb.hresp, "AHB hresp error detected and cannot be reported via TileLink") +} + +class AHBBridge(supportAtomics: Boolean = true)(implicit val p: Parameters) extends Module + with HasHastiParameters + with HasTileLinkParameters + with HasAddrMapParameters { + val io = new Bundle { + val tl = new ClientUncachedTileLinkIO().flip + val ahb = new HastiMasterIO() + } + + // Hasti and TileLink widths must agree at this point in the topology + require (tlDataBits == hastiDataBits) + require (p(PAddrBits) == hastiAddrBits) + + // AHB does not permit bursts to cross a 1KB boundary + require (tlDataBits * tlDataBeats <= 1024*8) + // tlDataBytes must be a power of 2 + require (1 << log2Ceil(tlDataBytes) == tlDataBytes) + + // Create the sub-blocks + val fsm = Module(new AHBTileLinkIn(supportAtomics)) + val bus = Module(new AHBBusMaster(supportAtomics)) + val pad = Module(new Queue(new Grant, 4)) + + fsm.io.acquire <> Queue(io.tl.acquire, 2) // Pipe is also acceptable + bus.io.request <> fsm.io.request + io.ahb <> bus.io.ahb + io.tl.grant <> pad.io.deq + + // The pad is needed to absorb AHB progress while !grant.ready + // We are only 'ready' if the pad has at least 3 cycles of space + bus.io.grant.ready := pad.io.count <= UInt(1) + pad.io.enq.bits := bus.io.grant.bits + pad.io.enq.valid := bus.io.grant.valid +} diff --git a/uncore/src/main/scala/converters/Nasti.scala b/uncore/src/main/scala/converters/Nasti.scala new file mode 100644 index 00000000..fa090083 --- /dev/null +++ b/uncore/src/main/scala/converters/Nasti.scala @@ -0,0 +1,399 @@ +package uncore.converters + +import Chisel._ +import junctions._ +import uncore.tilelink._ +import uncore.constants._ +import cde.Parameters +import scala.math.min + +class IdMapper(val inIdBits: Int, val outIdBits: Int, + val forceMapping: Boolean = false) + (implicit val p: Parameters) extends Module { + + val io = new Bundle { + val req = new Bundle { + val valid = Bool(INPUT) + val ready = Bool(OUTPUT) + val in_id = UInt(INPUT, inIdBits) + val out_id = UInt(OUTPUT, outIdBits) + } + val resp = new Bundle { + val valid = Bool(INPUT) + val matches = Bool(OUTPUT) + val out_id = UInt(INPUT, outIdBits) + val in_id = UInt(OUTPUT, inIdBits) + } + } + val maxInXacts = 1 << inIdBits + + if (inIdBits <= outIdBits && !forceMapping) { + io.req.ready := Bool(true) + io.req.out_id := io.req.in_id + io.resp.matches := Bool(true) + io.resp.in_id := io.resp.out_id + } else { + val nInXacts = 1 << inIdBits + // No point in allowing more out xacts than in xacts + val nOutXacts = min(1 << outIdBits, nInXacts) + + val out_id_free = Reg(init = Vec.fill(nOutXacts){Bool(true)}) + val in_id_free = Reg(init = Vec.fill(nInXacts){Bool(true)}) + val next_out_id = PriorityEncoder(out_id_free) + val id_mapping = Reg(Vec(nOutXacts, UInt(0, inIdBits))) + + val req_fire = io.req.valid && io.req.ready + when (req_fire) { + out_id_free(io.req.out_id) := Bool(false) + in_id_free(io.req.in_id) := Bool(false) + id_mapping(io.req.out_id) := io.req.in_id + } + when (io.resp.valid) { + out_id_free(io.resp.out_id) := Bool(true) + in_id_free(io.resp.in_id) := Bool(true) + } + + io.req.ready := out_id_free.reduce(_ || _) && in_id_free(io.req.in_id) + io.req.out_id := next_out_id + + io.resp.in_id := id_mapping(io.resp.out_id) + io.resp.matches := !out_id_free(io.resp.out_id) + } +} + +class NastiIOTileLinkIOConverterInfo(implicit p: Parameters) extends TLBundle()(p) { + val addr_beat = UInt(width = tlBeatAddrBits) + val subblock = Bool() +} + +class NastiIOTileLinkIOConverter(implicit p: Parameters) extends TLModule()(p) + with HasNastiParameters { + val io = new Bundle { + val tl = new ClientUncachedTileLinkIO().flip + val nasti = new NastiIO + } + + private def opSizeToXSize(ops: UInt) = MuxLookup(ops, UInt("b111"), Seq( + MT_B -> UInt(0), + MT_BU -> UInt(0), + MT_H -> UInt(1), + MT_HU -> UInt(1), + MT_W -> UInt(2), + MT_WU -> UInt(2), + MT_D -> UInt(3), + MT_Q -> UInt(log2Up(tlDataBytes)))) + + val dataBits = tlDataBits*tlDataBeats + require(tlDataBits == nastiXDataBits, "Data sizes between LLC and MC don't agree") // TODO: remove this restriction + require(tlDataBeats < (1 << nastiXLenBits), "Can't have that many beats") + + val has_data = io.tl.acquire.bits.hasData() + + val is_subblock = io.tl.acquire.bits.isSubBlockType() + val is_multibeat = io.tl.acquire.bits.hasMultibeatData() + val (tl_cnt_out, tl_wrap_out) = Counter( + io.tl.acquire.fire() && is_multibeat, tlDataBeats) + + val get_valid = io.tl.acquire.valid && !has_data + val put_valid = io.tl.acquire.valid && has_data + + // Reorder queue saves extra information needed to send correct + // grant back to TL client + val roqIdBits = min(tlClientXactIdBits, nastiXIdBits) + val roq = Module(new ReorderQueue( + new NastiIOTileLinkIOConverterInfo, roqIdBits)) + + val get_id_mapper = Module(new IdMapper(tlClientXactIdBits, nastiXIdBits)) + val put_id_mapper = Module(new IdMapper(tlClientXactIdBits, nastiXIdBits)) + + val get_id_ready = get_id_mapper.io.req.ready + val put_id_mask = is_subblock || io.tl.acquire.bits.addr_beat === UInt(0) + val put_id_ready = put_id_mapper.io.req.ready || !put_id_mask + + // For Get/GetBlock, make sure Reorder queue can accept new entry + val get_helper = DecoupledHelper( + get_valid, + roq.io.enq.ready, + io.nasti.ar.ready, + get_id_ready) + + val w_inflight = Reg(init = Bool(false)) + val w_id = Reg(init = UInt(0, nastiXIdBits)) + + // For Put/PutBlock, make sure aw and w channel are both ready before + // we send the first beat + val aw_ready = w_inflight || io.nasti.aw.ready + val put_helper = DecoupledHelper( + put_valid, + aw_ready, + io.nasti.w.ready, + put_id_ready) + + val (nasti_cnt_out, nasti_wrap_out) = Counter( + io.nasti.r.fire() && !roq.io.deq.data.subblock, tlDataBeats) + + roq.io.enq.valid := get_helper.fire(roq.io.enq.ready) + roq.io.enq.bits.tag := io.nasti.ar.bits.id + roq.io.enq.bits.data.addr_beat := io.tl.acquire.bits.addr_beat + roq.io.enq.bits.data.subblock := is_subblock + roq.io.deq.valid := io.nasti.r.fire() && (nasti_wrap_out || roq.io.deq.data.subblock) + roq.io.deq.tag := io.nasti.r.bits.id + + get_id_mapper.io.req.valid := get_helper.fire(get_id_ready) + get_id_mapper.io.req.in_id := io.tl.acquire.bits.client_xact_id + get_id_mapper.io.resp.valid := io.nasti.r.fire() && io.nasti.r.bits.last + get_id_mapper.io.resp.out_id := io.nasti.r.bits.id + + put_id_mapper.io.req.valid := put_helper.fire(put_id_ready, put_id_mask) + put_id_mapper.io.req.in_id := io.tl.acquire.bits.client_xact_id + put_id_mapper.io.resp.valid := io.nasti.b.fire() + put_id_mapper.io.resp.out_id := io.nasti.b.bits.id + + // Decompose outgoing TL Acquires into Nasti address and data channels + io.nasti.ar.valid := get_helper.fire(io.nasti.ar.ready) + io.nasti.ar.bits := NastiReadAddressChannel( + id = get_id_mapper.io.req.out_id, + addr = io.tl.acquire.bits.full_addr(), + size = Mux(is_subblock, + opSizeToXSize(io.tl.acquire.bits.op_size()), + UInt(log2Ceil(tlDataBytes))), + len = Mux(is_subblock, UInt(0), UInt(tlDataBeats - 1))) + + def mask_helper(all_inside_0: Seq[Bool], defsize: Int): (Seq[Bool], UInt, UInt) = { + val len = all_inside_0.size + if (len == 1) { + (Seq(Bool(true)), UInt(0), UInt(defsize)) + } else { + val sub_inside_0 = Seq.tabulate (len/2) { i => all_inside_0(2*i) && all_inside_0(2*i+1) } + val (sub_outside_0, sub_offset, sub_size) = mask_helper(sub_inside_0, defsize+1) + val all_outside_0 = Seq.tabulate (len) { i => sub_outside_0(i/2) && all_inside_0(i^1) } + val odd_outside_0 = Seq.tabulate (len/2) { i => all_outside_0(2*i+1) } + val odd_outside = odd_outside_0.reduce (_ || _) + val all_outside = all_outside_0.reduce (_ || _) + val offset = Cat(sub_offset, odd_outside.toBits) + val size = Mux(all_outside, UInt(defsize), sub_size) + (all_outside_0, offset, size) + } + } + + val all_inside_0 = (~io.tl.acquire.bits.wmask()).toBools + val (_, put_offset, put_size) = mask_helper(all_inside_0, 0) + + io.nasti.aw.valid := put_helper.fire(aw_ready, !w_inflight) + io.nasti.aw.bits := NastiWriteAddressChannel( + id = put_id_mapper.io.req.out_id, + addr = io.tl.acquire.bits.full_addr() | + Mux(is_multibeat, UInt(0), put_offset), + size = Mux(is_multibeat, UInt(log2Ceil(tlDataBytes)), put_size), + len = Mux(is_multibeat, UInt(tlDataBeats - 1), UInt(0))) + + io.nasti.w.valid := put_helper.fire(io.nasti.w.ready) + io.nasti.w.bits := NastiWriteDataChannel( + id = w_id, + data = io.tl.acquire.bits.data, + strb = Some(io.tl.acquire.bits.wmask()), + last = Mux(w_inflight, + tl_cnt_out === UInt(tlDataBeats - 1), !is_multibeat)) + + io.tl.acquire.ready := Mux(has_data, + put_helper.fire(put_valid), + get_helper.fire(get_valid)) + + when (!w_inflight && io.tl.acquire.fire() && is_multibeat) { + w_inflight := Bool(true) + w_id := put_id_mapper.io.req.out_id + } + + when (w_inflight) { + when (tl_wrap_out) { w_inflight := Bool(false) } + } + + // Aggregate incoming NASTI responses into TL Grants + val (tl_cnt_in, tl_wrap_in) = Counter( + io.tl.grant.fire() && io.tl.grant.bits.hasMultibeatData(), tlDataBeats) + val gnt_arb = Module(new LockingArbiter(new GrantToDst, 2, + tlDataBeats, Some((gnt: GrantToDst) => gnt.hasMultibeatData()))) + io.tl.grant <> gnt_arb.io.out + + gnt_arb.io.in(0).valid := io.nasti.r.valid + io.nasti.r.ready := gnt_arb.io.in(0).ready + gnt_arb.io.in(0).bits := Grant( + is_builtin_type = Bool(true), + g_type = Mux(roq.io.deq.data.subblock, + Grant.getDataBeatType, Grant.getDataBlockType), + client_xact_id = get_id_mapper.io.resp.in_id, + manager_xact_id = UInt(0), + addr_beat = Mux(roq.io.deq.data.subblock, roq.io.deq.data.addr_beat, tl_cnt_in), + data = io.nasti.r.bits.data) + + assert(!roq.io.deq.valid || roq.io.deq.matches, + "TL -> NASTI converter ReorderQueue: NASTI tag error") + assert(!gnt_arb.io.in(0).valid || get_id_mapper.io.resp.matches, + "TL -> NASTI ID Mapper: NASTI tag error") + + gnt_arb.io.in(1).valid := io.nasti.b.valid + io.nasti.b.ready := gnt_arb.io.in(1).ready + gnt_arb.io.in(1).bits := Grant( + is_builtin_type = Bool(true), + g_type = Grant.putAckType, + client_xact_id = put_id_mapper.io.resp.in_id, + manager_xact_id = UInt(0), + addr_beat = UInt(0), + data = Bits(0)) + assert(!gnt_arb.io.in(1).valid || put_id_mapper.io.resp.matches, "NASTI tag error") + + assert(!io.nasti.r.valid || io.nasti.r.bits.resp === UInt(0), "NASTI read error") + assert(!io.nasti.b.valid || io.nasti.b.bits.resp === UInt(0), "NASTI write error") +} + +class TileLinkIONastiIOConverter(implicit p: Parameters) extends TLModule()(p) + with HasNastiParameters { + val io = new Bundle { + val nasti = (new NastiIO).flip + val tl = new ClientUncachedTileLinkIO + } + + val (s_idle :: s_put :: Nil) = Enum(Bits(), 2) + val state = Reg(init = s_idle) + + private val blockOffset = tlByteAddrBits + tlBeatAddrBits + + val aw_req = Reg(new NastiWriteAddressChannel) + val w_tl_id = Reg(io.tl.acquire.bits.client_xact_id) + + def is_singlebeat(chan: NastiAddressChannel): Bool = + chan.len === UInt(0) + + def is_multibeat(chan: NastiAddressChannel): Bool = + chan.len === UInt(tlDataBeats - 1) && chan.size === UInt(log2Up(tlDataBytes)) + + def nasti_addr_block(chan: NastiAddressChannel): UInt = + chan.addr(nastiXAddrBits - 1, blockOffset) + + def nasti_addr_beat(chan: NastiAddressChannel): UInt = + chan.addr(blockOffset - 1, tlByteAddrBits) + + def nasti_addr_byte(chan: NastiAddressChannel): UInt = + chan.addr(tlByteAddrBits - 1, 0) + + def nasti_operand_size(chan: NastiAddressChannel): UInt = + MuxLookup(chan.size, MT_Q, Seq( + UInt(0) -> MT_BU, + UInt(1) -> MT_HU, + UInt(2) -> MT_WU, + UInt(3) -> MT_D)) + + def size_mask(size: UInt): UInt = + (UInt(1) << (UInt(1) << size)) - UInt(1) + + def nasti_wmask(aw: NastiWriteAddressChannel, w: NastiWriteDataChannel): UInt = { + val base = w.strb & size_mask(aw.size) + val addr_byte = nasti_addr_byte(aw) + w.strb & (size_mask(aw.size) << addr_byte) + } + + def tl_last(gnt: GrantMetadata): Bool = + !gnt.hasMultibeatData() || gnt.addr_beat === UInt(tlDataBeats - 1) + + def tl_b_grant(gnt: GrantMetadata): Bool = + gnt.g_type === Grant.putAckType + + assert(!io.nasti.ar.valid || + is_singlebeat(io.nasti.ar.bits) || is_multibeat(io.nasti.ar.bits), + "NASTI read transaction cannot convert to TileLInk") + + assert(!io.nasti.aw.valid || + is_singlebeat(io.nasti.aw.bits) || is_multibeat(io.nasti.aw.bits), + "NASTI write transaction cannot convert to TileLInk") + + val put_count = Reg(init = UInt(0, tlBeatAddrBits)) + val get_id_mapper = Module(new IdMapper(nastiXIdBits, tlClientXactIdBits, true)) + val put_id_mapper = Module(new IdMapper(nastiXIdBits, tlClientXactIdBits, true)) + + when (io.nasti.aw.fire()) { + aw_req := io.nasti.aw.bits + w_tl_id := put_id_mapper.io.req.out_id + state := s_put + } + + when (io.nasti.w.fire()) { + put_count := put_count + UInt(1) + when (io.nasti.w.bits.last) { + put_count := UInt(0) + state := s_idle + } + } + + val get_acquire = Mux(is_multibeat(io.nasti.ar.bits), + GetBlock( + client_xact_id = get_id_mapper.io.req.out_id, + addr_block = nasti_addr_block(io.nasti.ar.bits)), + Get( + client_xact_id = get_id_mapper.io.req.out_id, + addr_block = nasti_addr_block(io.nasti.ar.bits), + addr_beat = nasti_addr_beat(io.nasti.ar.bits), + addr_byte = nasti_addr_byte(io.nasti.ar.bits), + operand_size = nasti_operand_size(io.nasti.ar.bits), + alloc = Bool(false))) + + val put_acquire = Mux(is_multibeat(aw_req), + PutBlock( + client_xact_id = w_tl_id, + addr_block = nasti_addr_block(aw_req), + addr_beat = put_count, + data = io.nasti.w.bits.data, + wmask = Some(io.nasti.w.bits.strb)), + Put( + client_xact_id = w_tl_id, + addr_block = nasti_addr_block(aw_req), + addr_beat = nasti_addr_beat(aw_req), + data = io.nasti.w.bits.data, + wmask = Some(nasti_wmask(aw_req, io.nasti.w.bits)))) + + val get_helper = DecoupledHelper( + io.nasti.ar.valid, + get_id_mapper.io.req.ready, + io.tl.acquire.ready) + + get_id_mapper.io.req.valid := get_helper.fire( + get_id_mapper.io.req.ready, state === s_idle) + get_id_mapper.io.req.in_id := io.nasti.ar.bits.id + get_id_mapper.io.resp.out_id := io.tl.grant.bits.client_xact_id + get_id_mapper.io.resp.valid := io.nasti.r.fire() && io.nasti.r.bits.last + + val aw_ok = (state === s_idle && !io.nasti.ar.valid) + + put_id_mapper.io.req.valid := aw_ok && io.nasti.aw.valid + put_id_mapper.io.req.in_id := io.nasti.aw.bits.id + put_id_mapper.io.resp.out_id := io.tl.grant.bits.client_xact_id + put_id_mapper.io.resp.valid := io.nasti.b.fire() + + io.tl.acquire.bits := Mux(state === s_put, put_acquire, get_acquire) + io.tl.acquire.valid := get_helper.fire(io.tl.acquire.ready, state === s_idle) || + (state === s_put && io.nasti.w.valid) + + io.nasti.ar.ready := get_helper.fire(io.nasti.ar.valid, state === s_idle) + io.nasti.aw.ready := aw_ok && put_id_mapper.io.req.ready + io.nasti.w.ready := (state === s_put && io.tl.acquire.ready) + + val nXacts = tlMaxClientXacts * tlMaxClientsPerPort + + io.nasti.b.valid := io.tl.grant.valid && tl_b_grant(io.tl.grant.bits) + io.nasti.b.bits := NastiWriteResponseChannel( + id = put_id_mapper.io.resp.in_id) + + assert(!io.nasti.b.valid || put_id_mapper.io.resp.matches, + "Put ID does not match") + + io.nasti.r.valid := io.tl.grant.valid && !tl_b_grant(io.tl.grant.bits) + io.nasti.r.bits := NastiReadDataChannel( + id = get_id_mapper.io.resp.in_id, + data = io.tl.grant.bits.data, + last = tl_last(io.tl.grant.bits)) + + assert(!io.nasti.r.valid || get_id_mapper.io.resp.matches, + "Get ID does not match") + + io.tl.grant.ready := Mux(tl_b_grant(io.tl.grant.bits), + io.nasti.b.ready, io.nasti.r.ready) +} diff --git a/uncore/src/main/scala/converters/Smi.scala b/uncore/src/main/scala/converters/Smi.scala new file mode 100644 index 00000000..6ec47950 --- /dev/null +++ b/uncore/src/main/scala/converters/Smi.scala @@ -0,0 +1,32 @@ +// See LICENSE for details + +package uncore.converters + +import Chisel._ +import junctions._ +import uncore.tilelink._ +import cde.Parameters + +/** Convert TileLink protocol to Smi protocol */ +class SmiIOTileLinkIOConverter(val dataWidth: Int, val addrWidth: Int) + (implicit p: Parameters) extends Module { + val io = new Bundle { + val tl = (new ClientUncachedTileLinkIO).flip + val smi = new SmiIO(dataWidth, addrWidth) + } + + def decoupledNastiConnect(outer: NastiIO, inner: NastiIO) { + outer.ar <> Queue(inner.ar) + outer.aw <> Queue(inner.aw) + outer.w <> Queue(inner.w) + inner.r <> Queue(outer.r) + inner.b <> Queue(outer.b) + } + + val tl2nasti = Module(new NastiIOTileLinkIOConverter()) + val nasti2smi = Module(new SmiIONastiIOConverter(dataWidth, addrWidth)) + + tl2nasti.io.tl <> io.tl + decoupledNastiConnect(nasti2smi.io.nasti, tl2nasti.io.nasti) + io.smi <> nasti2smi.io.smi +} diff --git a/uncore/src/main/scala/converters/Tilelink.scala b/uncore/src/main/scala/converters/Tilelink.scala new file mode 100644 index 00000000..6cb15f91 --- /dev/null +++ b/uncore/src/main/scala/converters/Tilelink.scala @@ -0,0 +1,691 @@ +package uncore.converters + +import Chisel._ +import junctions._ +import uncore.tilelink._ +import uncore.util._ +import uncore.constants._ +import cde.Parameters + +/** Utilities for safely wrapping a *UncachedTileLink by pinning probe.ready and release.valid low */ +object TileLinkIOWrapper { + def apply(tl: ClientUncachedTileLinkIO)(implicit p: Parameters): ClientTileLinkIO = { + val conv = Module(new ClientTileLinkIOWrapper) + conv.io.in <> tl + conv.io.out + } + def apply(tl: UncachedTileLinkIO)(implicit p: Parameters): TileLinkIO = { + val conv = Module(new TileLinkIOWrapper) + conv.io.in <> tl + conv.io.out + } + def apply(tl: ClientTileLinkIO): ClientTileLinkIO = tl + def apply(tl: TileLinkIO): TileLinkIO = tl +} + +class TileLinkIOWrapper(implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val in = new UncachedTileLinkIO().flip + val out = new TileLinkIO + } + io.out.acquire <> io.in.acquire + io.in.grant <> io.out.grant + io.out.finish <> io.in.finish + io.out.probe.ready := Bool(true) + io.out.release.valid := Bool(false) +} + +class ClientTileLinkIOWrapper(implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val in = new ClientUncachedTileLinkIO().flip + val out = new ClientTileLinkIO + } + io.out.acquire <> io.in.acquire + io.in.grant <> io.out.grant + io.out.probe.ready := Bool(true) + io.out.release.valid := Bool(false) +} + +class ClientTileLinkIOUnwrapper(implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val in = new ClientTileLinkIO().flip + val out = new ClientUncachedTileLinkIO + } + + val acqArb = Module(new LockingRRArbiter(new Acquire, 2, tlDataBeats, + Some((acq: Acquire) => acq.hasMultibeatData()))) + + val acqRoq = Module(new ReorderQueue(Bool(), tlClientXactIdBits)) + val relRoq = Module(new ReorderQueue(Bool(), tlClientXactIdBits)) + + val iacq = io.in.acquire.bits + val irel = io.in.release.bits + val ognt = io.out.grant.bits + + val acq_roq_enq = iacq.first() + val rel_roq_enq = irel.first() + + val acq_roq_ready = !acq_roq_enq || acqRoq.io.enq.ready + val rel_roq_ready = !rel_roq_enq || relRoq.io.enq.ready + + val acq_helper = DecoupledHelper( + io.in.acquire.valid, + acq_roq_ready, + acqArb.io.in(0).ready) + + val rel_helper = DecoupledHelper( + io.in.release.valid, + rel_roq_ready, + acqArb.io.in(1).ready) + + acqRoq.io.enq.valid := acq_helper.fire(acq_roq_ready, acq_roq_enq) + acqRoq.io.enq.bits.data := iacq.isBuiltInType() + acqRoq.io.enq.bits.tag := iacq.client_xact_id + + acqArb.io.in(0).valid := acq_helper.fire(acqArb.io.in(0).ready) + acqArb.io.in(0).bits := Acquire( + is_builtin_type = Bool(true), + a_type = Mux(iacq.isBuiltInType(), + iacq.a_type, Acquire.getBlockType), + client_xact_id = iacq.client_xact_id, + addr_block = iacq.addr_block, + addr_beat = iacq.addr_beat, + data = iacq.data, + union = Mux(iacq.isBuiltInType(), + iacq.union, Cat(MT_Q, M_XRD, Bool(true)))) + io.in.acquire.ready := acq_helper.fire(io.in.acquire.valid) + + relRoq.io.enq.valid := rel_helper.fire(rel_roq_ready, rel_roq_enq) + relRoq.io.enq.bits.data := irel.isVoluntary() + relRoq.io.enq.bits.tag := irel.client_xact_id + + acqArb.io.in(1).valid := rel_helper.fire(acqArb.io.in(1).ready) + acqArb.io.in(1).bits := PutBlock( + client_xact_id = irel.client_xact_id, + addr_block = irel.addr_block, + addr_beat = irel.addr_beat, + data = irel.data) + io.in.release.ready := rel_helper.fire(io.in.release.valid) + + io.out.acquire <> acqArb.io.out + + val grant_deq_roq = io.out.grant.fire() && ognt.last() + + acqRoq.io.deq.valid := acqRoq.io.deq.matches && grant_deq_roq + acqRoq.io.deq.tag := ognt.client_xact_id + + relRoq.io.deq.valid := !acqRoq.io.deq.matches && grant_deq_roq + relRoq.io.deq.tag := ognt.client_xact_id + + assert(!grant_deq_roq || acqRoq.io.deq.matches || relRoq.io.deq.matches, + "TileLink Unwrapper: client_xact_id mismatch") + + val gnt_builtin = acqRoq.io.deq.data + val gnt_voluntary = relRoq.io.deq.data + + val acq_grant = Grant( + is_builtin_type = gnt_builtin, + g_type = Mux(gnt_builtin, ognt.g_type, tlCoh.getExclusiveGrantType), + client_xact_id = ognt.client_xact_id, + manager_xact_id = ognt.manager_xact_id, + addr_beat = ognt.addr_beat, + data = ognt.data) + + assert(!io.in.release.valid || io.in.release.bits.isVoluntary(), "Unwrapper can only process voluntary releases.") + val rel_grant = Grant( + is_builtin_type = Bool(true), + g_type = Grant.voluntaryAckType, // We should only every be working with voluntary releases + client_xact_id = ognt.client_xact_id, + manager_xact_id = ognt.manager_xact_id, + addr_beat = ognt.addr_beat, + data = ognt.data) + + io.in.grant.valid := io.out.grant.valid + io.in.grant.bits := Mux(acqRoq.io.deq.matches, acq_grant, rel_grant) + io.out.grant.ready := io.in.grant.ready + + io.in.probe.valid := Bool(false) +} + +object TileLinkWidthAdapter { + def apply(in: ClientUncachedTileLinkIO, outerId: String)(implicit p: Parameters) = { + val outerDataBits = p(TLKey(outerId)).dataBitsPerBeat + if (outerDataBits > in.tlDataBits) { + val widener = Module(new TileLinkIOWidener(in.p(TLId), outerId)) + widener.io.in <> in + widener.io.out + } else if (outerDataBits < in.tlDataBits) { + val narrower = Module(new TileLinkIONarrower(in.p(TLId), outerId)) + narrower.io.in <> in + narrower.io.out + } else { in } + } + def apply(out: ClientUncachedTileLinkIO, in: ClientUncachedTileLinkIO)(implicit p: Parameters): Unit = { + require(out.tlDataBits * out.tlDataBeats == in.tlDataBits * in.tlDataBeats) + out <> apply(in, out.p(TLId)) + } +} + +class TileLinkIOWidener(innerTLId: String, outerTLId: String) + (implicit p: Parameters) extends TLModule()(p) { + + val paddrBits = p(PAddrBits) + val innerParams = p(TLKey(innerTLId)) + val outerParams = p(TLKey(outerTLId)) + val innerDataBeats = innerParams.dataBeats + val innerDataBits = innerParams.dataBitsPerBeat + val innerWriteMaskBits = innerParams.writeMaskBits + val innerByteAddrBits = log2Up(innerWriteMaskBits) + val innerMaxXacts = innerParams.maxClientXacts * innerParams.maxClientsPerPort + val innerXactIdBits = log2Up(innerMaxXacts) + val outerDataBeats = outerParams.dataBeats + val outerDataBits = outerParams.dataBitsPerBeat + val outerWriteMaskBits = outerParams.writeMaskBits + val outerByteAddrBits = log2Up(outerWriteMaskBits) + val outerBeatAddrBits = log2Up(outerDataBeats) + val outerBlockOffset = outerBeatAddrBits + outerByteAddrBits + val outerMaxClients = outerParams.maxClientsPerPort + val outerClientIdBits = log2Up(outerParams.maxClientXacts * outerMaxClients) + val outerManagerIdBits = log2Up(outerParams.maxManagerXacts) + val outerBlockAddrBits = paddrBits - outerBlockOffset + + require(outerDataBeats <= innerDataBeats) + require(outerDataBits >= innerDataBits) + require(outerDataBits % innerDataBits == 0) + require(outerDataBits * outerDataBeats == innerDataBits * innerDataBeats) + + val factor = innerDataBeats / outerDataBeats + + val io = new Bundle { + val in = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => innerTLId})).flip + val out = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => outerTLId})) + } + + val iacq = io.in.acquire.bits + val oacq = io.out.acquire.bits + val ognt = io.out.grant.bits + val ignt = io.in.grant.bits + + val shrink = iacq.a_type === Acquire.putBlockType + val stretch = ognt.g_type === Grant.getDataBlockType + val smallget = iacq.a_type === Acquire.getType + val smallput = iacq.a_type === Acquire.putType + val smallgnt = ognt.g_type === Grant.getDataBeatType + + val sending_put = Reg(init = Bool(false)) + val collecting = Reg(init = Bool(false)) + val put_block = Reg(UInt(width = outerBlockAddrBits)) + val put_id = Reg(UInt(width = outerClientIdBits)) + val put_data = Reg(Vec(factor, UInt(width = innerDataBits))) + val put_wmask = Reg(Vec(factor, UInt(width = innerWriteMaskBits))) + val put_allocate = Reg(Bool()) + val (put_beat, put_done) = Counter(io.out.acquire.fire() && oacq.hasMultibeatData(), outerDataBeats) + val (recv_idx, recv_done) = Counter(io.in.acquire.fire() && iacq.hasMultibeatData(), factor) + + val in_addr = iacq.full_addr() + val out_addr_block = in_addr(paddrBits - 1, outerBlockOffset) + val out_addr_beat = in_addr(outerBlockOffset - 1, outerByteAddrBits) + val out_addr_byte = in_addr(outerByteAddrBits - 1, 0) + + val switch_addr = in_addr(outerByteAddrBits - 1, innerByteAddrBits) + val smallget_switch = Reg(Vec(innerMaxXacts, switch_addr)) + + def align_data(addr: UInt, data: UInt): UInt = + data << Cat(addr, UInt(0, log2Up(innerDataBits))) + + def align_wmask(addr: UInt, wmask: UInt): UInt = + wmask << Cat(addr, UInt(0, log2Up(innerWriteMaskBits))) + + val outerConfig = p.alterPartial({ case TLId => outerTLId }) + + val get_acquire = Get( + client_xact_id = iacq.client_xact_id, + addr_block = out_addr_block, + addr_beat = out_addr_beat, + addr_byte = out_addr_byte, + operand_size = iacq.op_size(), + alloc = iacq.allocate())(outerConfig) + + val get_block_acquire = GetBlock( + client_xact_id = iacq.client_xact_id, + addr_block = out_addr_block, + alloc = iacq.allocate())(outerConfig) + + val put_acquire = Put( + client_xact_id = iacq.client_xact_id, + addr_block = out_addr_block, + addr_beat = out_addr_beat, + data = align_data(switch_addr, iacq.data), + wmask = Some(align_wmask(switch_addr, iacq.wmask())), + alloc = iacq.allocate())(outerConfig) + + val put_block_acquire = PutBlock( + client_xact_id = put_id, + addr_block = put_block, + addr_beat = put_beat, + data = put_data.toBits, + wmask = Some(put_wmask.toBits))(outerConfig) + + io.out.acquire.valid := sending_put || (!shrink && io.in.acquire.valid) + io.out.acquire.bits := MuxCase(get_block_acquire, Seq( + sending_put -> put_block_acquire, + smallget -> get_acquire, + smallput -> put_acquire)) + io.in.acquire.ready := !sending_put && (shrink || io.out.acquire.ready) + + when (io.in.acquire.fire() && shrink) { + when (!collecting) { + put_block := out_addr_block + put_id := iacq.client_xact_id + put_allocate := iacq.allocate() + collecting := Bool(true) + } + put_data(recv_idx) := iacq.data + put_wmask(recv_idx) := iacq.wmask() + } + + when (io.in.acquire.fire() && smallget) { + smallget_switch(iacq.client_xact_id) := switch_addr + } + + when (recv_done) { sending_put := Bool(true) } + when (sending_put && io.out.acquire.ready) { sending_put := Bool(false) } + when (put_done) { collecting := Bool(false) } + + val returning_data = Reg(init = Bool(false)) + val (send_idx, send_done) = Counter( + io.in.grant.ready && returning_data, factor) + + val gnt_beat = Reg(UInt(width = outerBeatAddrBits)) + val gnt_client_id = Reg(UInt(width = outerClientIdBits)) + val gnt_manager_id = Reg(UInt(width = outerManagerIdBits)) + val gnt_data = Reg(UInt(width = outerDataBits)) + + when (io.out.grant.fire() && stretch) { + gnt_data := ognt.data + gnt_client_id := ognt.client_xact_id + gnt_manager_id := ognt.manager_xact_id + gnt_beat := ognt.addr_beat + returning_data := Bool(true) + } + + when (send_done) { returning_data := Bool(false) } + + def select_data(data: UInt, sel: UInt): UInt = + data >> (sel << log2Up(innerDataBits)) + + val gnt_switch = smallget_switch(ognt.client_xact_id) + + val innerConfig = p.alterPartial({ case TLId => innerTLId }) + + val get_block_grant = Grant( + is_builtin_type = Bool(true), + g_type = Grant.getDataBlockType, + client_xact_id = gnt_client_id, + manager_xact_id = gnt_manager_id, + addr_beat = Cat(gnt_beat, send_idx), + data = select_data(gnt_data, send_idx))(innerConfig) + + val get_grant = Grant( + is_builtin_type = Bool(true), + g_type = Grant.getDataBeatType, + client_xact_id = ognt.client_xact_id, + manager_xact_id = ognt.manager_xact_id, + addr_beat = Cat(ognt.addr_beat, gnt_switch), + data = select_data(ognt.data, gnt_switch))(innerConfig) + + val default_grant = Grant( + is_builtin_type = Bool(true), + g_type = ognt.g_type, + client_xact_id = ognt.client_xact_id, + manager_xact_id = ognt.manager_xact_id, + addr_beat = ognt.addr_beat, + data = ognt.data)(innerConfig) + + io.in.grant.valid := returning_data || (!stretch && io.out.grant.valid) + io.in.grant.bits := MuxCase(default_grant, Seq( + returning_data -> get_block_grant, + smallgnt -> get_grant)) + io.out.grant.ready := !returning_data && (stretch || io.in.grant.ready) +} + +class TileLinkIONarrower(innerTLId: String, outerTLId: String) + (implicit p: Parameters) extends TLModule()(p) { + + val innerParams = p(TLKey(innerTLId)) + val outerParams = p(TLKey(outerTLId)) + val innerDataBeats = innerParams.dataBeats + val innerDataBits = innerParams.dataBitsPerBeat + val innerWriteMaskBits = innerParams.writeMaskBits + val innerByteAddrBits = log2Up(innerWriteMaskBits) + val outerDataBeats = outerParams.dataBeats + val outerDataBits = outerParams.dataBitsPerBeat + val outerWriteMaskBits = outerParams.writeMaskBits + val outerByteAddrBits = log2Up(outerWriteMaskBits) + val outerBeatAddrBits = log2Up(outerDataBeats) + val outerBlockOffset = outerBeatAddrBits + outerByteAddrBits + val outerMaxClients = outerParams.maxClientsPerPort + val outerIdBits = log2Up(outerParams.maxClientXacts * outerMaxClients) + + require(outerDataBeats > innerDataBeats) + require(outerDataBeats % innerDataBeats == 0) + require(outerDataBits < innerDataBits) + require(outerDataBits * outerDataBeats == innerDataBits * innerDataBeats) + + val factor = outerDataBeats / innerDataBeats + + val io = new Bundle { + val in = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => innerTLId})).flip + val out = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => outerTLId})) + } + + val iacq = io.in.acquire.bits + val ognt = io.out.grant.bits + + val stretch = iacq.a_type === Acquire.putBlockType + val shrink = iacq.a_type === Acquire.getBlockType + val smallput = iacq.a_type === Acquire.putType + val smallget = iacq.a_type === Acquire.getType + + val acq_data_buffer = Reg(UInt(width = innerDataBits)) + val acq_wmask_buffer = Reg(UInt(width = innerWriteMaskBits)) + val acq_client_id = Reg(iacq.client_xact_id) + val acq_addr_block = Reg(iacq.addr_block) + val acq_addr_beat = Reg(iacq.addr_beat) + val oacq_ctr = Counter(factor) + + val outer_beat_addr = iacq.full_addr()(outerBlockOffset - 1, outerByteAddrBits) + val outer_byte_addr = iacq.full_addr()(outerByteAddrBits - 1, 0) + + val mask_chunks = Vec.tabulate(factor) { i => + val lsb = i * outerWriteMaskBits + val msb = (i + 1) * outerWriteMaskBits - 1 + iacq.wmask()(msb, lsb) + } + + val data_chunks = Vec.tabulate(factor) { i => + val lsb = i * outerDataBits + val msb = (i + 1) * outerDataBits - 1 + iacq.data(msb, lsb) + } + + val beat_sel = Cat(mask_chunks.map(mask => mask.orR).reverse) + + val smallput_data = Mux1H(beat_sel, data_chunks) + val smallput_wmask = Mux1H(beat_sel, mask_chunks) + val smallput_beat = Cat(iacq.addr_beat, PriorityEncoder(beat_sel)) + + assert(!io.in.acquire.valid || !smallput || PopCount(beat_sel) <= UInt(1), + "Can't perform Put wider than outer width") + + val read_size_ok = MuxLookup(iacq.op_size(), Bool(false), Seq( + MT_B -> Bool(true), + MT_BU -> Bool(true), + MT_H -> Bool(outerDataBits >= 16), + MT_HU -> Bool(outerDataBits >= 16), + MT_W -> Bool(outerDataBits >= 32), + MT_WU -> Bool(outerDataBits >= 32), + MT_D -> Bool(outerDataBits >= 64), + MT_Q -> Bool(false))) + + assert(!io.in.acquire.valid || !smallget || read_size_ok, + "Can't perform Get wider than outer width") + + val outerConfig = p.alterPartial({ case TLId => outerTLId }) + val innerConfig = p.alterPartial({ case TLId => innerTLId }) + + val get_block_acquire = GetBlock( + client_xact_id = iacq.client_xact_id, + addr_block = iacq.addr_block, + alloc = iacq.allocate())(outerConfig) + + val put_block_acquire = PutBlock( + client_xact_id = acq_client_id, + addr_block = acq_addr_block, + addr_beat = if (factor > 1) + Cat(acq_addr_beat, oacq_ctr.value) + else acq_addr_beat, + data = acq_data_buffer(outerDataBits - 1, 0), + wmask = Some(acq_wmask_buffer(outerWriteMaskBits - 1, 0)))(outerConfig) + + val get_acquire = Get( + client_xact_id = iacq.client_xact_id, + addr_block = iacq.addr_block, + addr_beat = outer_beat_addr, + addr_byte = outer_byte_addr, + operand_size = iacq.op_size(), + alloc = iacq.allocate())(outerConfig) + + val put_acquire = Put( + client_xact_id = iacq.client_xact_id, + addr_block = iacq.addr_block, + addr_beat = smallput_beat, + data = smallput_data, + wmask = Some(smallput_wmask))(outerConfig) + + val sending_put = Reg(init = Bool(false)) + + val pass_valid = io.in.acquire.valid && !stretch + + io.out.acquire.bits := MuxCase(Wire(io.out.acquire.bits, init=iacq), Seq( + (sending_put, put_block_acquire), + (shrink, get_block_acquire), + (smallput, put_acquire), + (smallget, get_acquire))) + io.out.acquire.valid := sending_put || pass_valid + io.in.acquire.ready := !sending_put && (stretch || io.out.acquire.ready) + + when (io.in.acquire.fire() && stretch) { + acq_data_buffer := iacq.data + acq_wmask_buffer := iacq.wmask() + acq_client_id := iacq.client_xact_id + acq_addr_block := iacq.addr_block + acq_addr_beat := iacq.addr_beat + sending_put := Bool(true) + } + + when (sending_put && io.out.acquire.ready) { + acq_data_buffer := acq_data_buffer >> outerDataBits + acq_wmask_buffer := acq_wmask_buffer >> outerWriteMaskBits + when (oacq_ctr.inc()) { sending_put := Bool(false) } + } + + val ognt_block = ognt.hasMultibeatData() + val gnt_data_buffer = Reg(Vec(factor, UInt(width = outerDataBits))) + val gnt_client_id = Reg(ognt.client_xact_id) + val gnt_manager_id = Reg(ognt.manager_xact_id) + + val ignt_ctr = Counter(innerDataBeats) + val ognt_ctr = Counter(factor) + val sending_get = Reg(init = Bool(false)) + + val get_block_grant = Grant( + is_builtin_type = Bool(true), + g_type = Grant.getDataBlockType, + client_xact_id = gnt_client_id, + manager_xact_id = gnt_manager_id, + addr_beat = ignt_ctr.value, + data = gnt_data_buffer.toBits)(innerConfig) + + val smallget_grant = ognt.g_type === Grant.getDataBeatType + + val get_grant = Grant( + is_builtin_type = Bool(true), + g_type = Grant.getDataBeatType, + client_xact_id = ognt.client_xact_id, + manager_xact_id = ognt.manager_xact_id, + addr_beat = ognt.addr_beat >> UInt(log2Up(factor)), + data = Fill(factor, ognt.data))(innerConfig) + + io.in.grant.valid := sending_get || (io.out.grant.valid && !ognt_block) + io.out.grant.ready := !sending_get && (ognt_block || io.in.grant.ready) + + io.in.grant.bits := MuxCase(Wire(io.in.grant.bits, init=ognt), Seq( + sending_get -> get_block_grant, + smallget_grant -> get_grant)) + + when (io.out.grant.valid && ognt_block && !sending_get) { + gnt_data_buffer(ognt_ctr.value) := ognt.data + when (ognt_ctr.inc()) { + gnt_client_id := ognt.client_xact_id + gnt_manager_id := ognt.manager_xact_id + sending_get := Bool(true) + } + } + + when (io.in.grant.ready && sending_get) { + ignt_ctr.inc() + sending_get := Bool(false) + } +} + +class TileLinkFragmenterSource(implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val in = Decoupled(new Acquire).flip + val out = Decoupled(new Acquire) + val que = Decoupled(UInt(width = tlBeatAddrBits)) + } + + // Pipeline stage with acquire data; needed to ensure in.bits stay fixed when !in.ready + val acq_valid = RegInit(Bool(false)) + val acq_bits = Reg(new Acquire) + // The last beat of generate acquire to send + val acq_last_beat = Reg(UInt(width = tlBeatAddrBits)) + val acq_last = acq_bits.addr_beat === acq_last_beat + + // 'in' has the first beat? + val in_multi_put = io.in.bits.isBuiltInType(Acquire.putBlockType) + val in_multi_get = io.in.bits.isBuiltInType(Acquire.getBlockType) + val in_first_beat = !in_multi_put || io.in.bits.addr_beat === UInt(0) + + // Move stuff from acq to out whenever out is ready + io.out.valid := acq_valid + // When can acq accept a request? + val acq_ready = !acq_valid || (acq_last && io.out.ready) + // Move the first beat from in to acq only when both acq and que are ready + io.in.ready := (!in_first_beat || io.que.ready) && acq_ready + io.que.valid := (in_first_beat && io.in.valid) && acq_ready + + // in.fire moves data from in to acq and (optionally) que + // out.fire moves data from acq to out + + // Desired flow control results: + assert (!io.que.fire() || io.in.fire()) // 1. que.fire => in.fire + assert (!(io.in.fire() && in_first_beat) || io.que.fire()) // 2. in.fire && in_first_beat => que.fire + assert (!io.out.fire() || acq_valid) // 3. out.fire => acq_valid + assert (!io.in.fire() || (!acq_valid || (io.out.fire() && acq_last))) // 4. in.fire => !acq_valid || (out.fire && acq_last) + // Proofs: + // 1. que.fire => que.ready && in.valid && acq_ready => in.ready && in.valid + // 2. in.fire && in_first_beat => in.valid && acq_ready && [(!in_first_beat || que.ready) && in_first_beat] => + // in.valid && acq_ready && que.ready && in_first_beat => que.valid && que.ready + // 3. out.fire => out.valid => acq_valid + // 4. in.fire => acq_ready => !acq_valid || (acq_last && out.ready) => + // !acq_valid || (acq_valid && acq_last && out.ready) => !acq_valid || (acq_last && out.fire) + + val multi_size = SInt(-1, width = tlBeatAddrBits).asUInt // TL2: use in.bits.size()/beatBits-1 + val in_sizeMinus1 = Mux(in_multi_get || in_multi_put, multi_size, UInt(0)) + val in_insertSizeMinus1 = Mux(in_multi_get, multi_size, UInt(0)) + + when (io.in.fire()) { + // Theorem 4 makes this safe; we overwrite garbage, or replace the final acq + acq_valid := Bool(true) + acq_bits := io.in.bits + acq_last_beat := io.in.bits.addr_beat + in_insertSizeMinus1 + // Replace this with size truncation in TL2: + acq_bits.a_type := Mux(in_multi_put, Acquire.putType, Mux(in_multi_get, Acquire.getType, io.in.bits.a_type)) + } .elsewhen (io.out.fire()) { + acq_valid := !acq_last // false => !in.valid || (!que.ready && in_first_beat) + acq_bits.addr_beat := acq_bits.addr_beat + UInt(1) + // acq_last && out.fire => acq_last && out.ready && acq_valid => acq_ready + // Suppose in.valid, then !in.fire => !in.ready => !(!in_first_beat || que.ready) => !que.ready && in_first_beat + } + + // Safe by theorem 3 + io.out.bits := acq_bits + // Safe by theorem 1 + io.que.bits := in_sizeMinus1 +} + +class TileLinkFragmenterSink(implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val in = Decoupled(new Grant).flip + val out = Decoupled(new Grant) + val que = Decoupled(UInt(width = tlBeatAddrBits)).flip + } + + val count_valid = RegInit(Bool(false)) + val multi_op = Reg(Bool()) + val count_bits = Reg(UInt(width = tlBeatAddrBits)) + val last = count_bits === UInt(0) + + val in_put = io.in.bits.isBuiltInType(Grant.putAckType) + val in_get = io.in.bits.isBuiltInType(Grant.getDataBeatType) + val deliver = last || in_get + + // Accept the input, discarding the non-final put grant + io.in.ready := count_valid && (io.out.ready || !deliver) + // Output the grant whenever we want delivery + io.out.valid := count_valid && io.in.valid && deliver + // Take a new number whenever we deliver the last beat + io.que.ready := !count_valid || (io.in.valid && io.out.ready && last) + + // Desired flow control results: + assert (!io.out.fire() || (count_valid && io.in.fire())) // 1. out.fire => in.fire && count_valid + assert (!(io.in.fire() && deliver) || io.out.fire()) // 2. in.fire && deliver => out.fire + assert (!(io.out.fire() && last) || io.que.ready) // 3. out.fire && last => que.ready + assert (!io.que.fire() || (!count_valid || io.out.fire())) // 4. que.fire => !count_valid || (out.fire && last) + // Proofs: + // 1. out.fire => out.ready && (count_valid && in.valid && deliver) => (count_valid && out.ready) && in.valid => in.fire + // 2. in.fire && deliver => in.valid && count_valid && [(out.ready || !deliver) && deliver] => + // in.valid && count_valid && deliver && out.ready => out.fire + // 3. out.fire && last => out.valid && out.ready && last => in.valid && out.ready && last => que.ready + // 4. que.fire => que.valid && (!count_valid || (in.valid && out.ready && last)) + // => !count_valid || (count_valid && in.valid && out.ready && [last => deliver]) + // => !count_valid || (out.valid && out.ready && last) + + when (io.que.fire()) { + // Theorem 4 makes this safe; we overwrite garbage or last output + count_valid := Bool(true) + count_bits := io.que.bits + multi_op := io.que.bits =/= UInt(0) + } .elsewhen (io.in.fire()) { + count_valid := !last // false => !que.valid + count_bits := count_bits - UInt(1) + // Proof: in.fire && [last => deliver] =2=> out.fire && last =3=> que.ready + // !que.fire && que.ready => !que.valid + } + + // Safe by Theorem 1 + io.out.bits := io.in.bits + io.out.bits.g_type := Mux(multi_op, Mux(in_get, Grant.getDataBlockType, Grant.putAckType), io.in.bits.g_type) +} + +class TileLinkFragmenter(depth: Int = 1)(implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val in = new ClientUncachedTileLinkIO().flip + val out = new ClientUncachedTileLinkIO + } + + // TL2: + // supportsAcquire = false + // modify all outward managers to supportsMultibeat = true + // assert: all managers must behaveFIFO (not inspect duplicated id field) + + val source = Module(new TileLinkFragmenterSource) + val sink = Module(new TileLinkFragmenterSink) + sink.io.que <> Queue(source.io.que, depth) + + source.io.in <> io.in.acquire + io.out.acquire <> source.io.out + sink.io.in <> io.out.grant + io.in.grant <> sink.io.out +} + +object TileLinkFragmenter { + // Pass the source/client to fragment + def apply(source: ClientUncachedTileLinkIO, depth: Int = 1)(implicit p: Parameters): ClientUncachedTileLinkIO = { + val fragmenter = Module(new TileLinkFragmenter(depth)) + fragmenter.io.in <> source + fragmenter.io.out + } +} diff --git a/uncore/src/main/scala/devices/Bram.scala b/uncore/src/main/scala/devices/Bram.scala new file mode 100644 index 00000000..1b8c5194 --- /dev/null +++ b/uncore/src/main/scala/devices/Bram.scala @@ -0,0 +1,160 @@ +package uncore.devices + +import Chisel._ +import cde.{Parameters, Field} +import junctions._ +import uncore.tilelink._ +import uncore.util._ +import HastiConstants._ + +class BRAMSlave(depth: Int)(implicit val p: Parameters) extends Module + with HasTileLinkParameters { + val io = new ClientUncachedTileLinkIO().flip + + // For TL2: + // supportsAcquire = false + // supportsMultibeat = false + // supportsHint = false + // supportsAtomic = false + + // Timing-wise, we assume the input is coming out of registers + // since you probably needed a TileLinkFragmenter infront of us + + // Thus, only one pipeline stage: the grant result + val g_valid = RegInit(Bool(false)) + val g_bits = Reg(new Grant) + + // Just pass the pipeline straight through + io.grant.valid := g_valid + io.grant.bits := g_bits + io.acquire.ready := !g_valid || io.grant.ready + + val acq_get = io.acquire.bits.isBuiltInType(Acquire.getType) + val acq_put = io.acquire.bits.isBuiltInType(Acquire.putType) + val acq_addr = Cat(io.acquire.bits.addr_block, io.acquire.bits.addr_beat) + + val bram = Mem(depth, Bits(width = tlDataBits)) + + val ren = acq_get && io.acquire.fire() + val wen = acq_put && io.acquire.fire() + + when (io.grant.fire()) { + g_valid := Bool(false) + } + + when (io.acquire.fire()) { + g_valid := Bool(true) + g_bits := Grant( + is_builtin_type = Bool(true), + g_type = io.acquire.bits.getBuiltInGrantType(), + client_xact_id = io.acquire.bits.client_xact_id, + manager_xact_id = UInt(0), + addr_beat = io.acquire.bits.addr_beat, + data = UInt(0)) + } + + when (wen) { + bram.write(acq_addr, io.acquire.bits.data) + assert(io.acquire.bits.wmask().andR, "BRAMSlave: partial write masks not supported") + } + io.grant.bits.data := RegEnable(bram.read(acq_addr), ren) +} + +class HastiRAM(depth: Int)(implicit p: Parameters) extends HastiModule()(p) { + val io = new HastiSlaveIO + + val wdata = Vec.tabulate(hastiDataBytes)(i => io.hwdata(8*(i+1)-1,8*i)) + val waddr = Reg(UInt(width = hastiAddrBits)) + val wvalid = Reg(init = Bool(false)) + val wsize = Reg(UInt(width = SZ_HSIZE)) + val ram = SeqMem(depth, Vec(hastiDataBytes, Bits(width = 8))) + + val max_size = log2Ceil(hastiDataBytes) + val wmask_lut = MuxLookup(wsize, SInt(-1, hastiDataBytes).asUInt, + (0 until max_size).map(sz => (UInt(sz) -> UInt((1 << (1 << sz)) - 1)))) + val wmask = (wmask_lut << waddr(max_size - 1, 0))(hastiDataBytes - 1, 0) + + val is_trans = io.hsel && (io.htrans === HTRANS_NONSEQ || io.htrans === HTRANS_SEQ) + val raddr = io.haddr >> UInt(max_size) + val ren = is_trans && !io.hwrite + val bypass = Reg(init = Bool(false)) + + when (is_trans && io.hwrite) { + waddr := io.haddr + wsize := io.hsize + wvalid := Bool(true) + } .otherwise { wvalid := Bool(false) } + + when (ren) { bypass := wvalid && (waddr >> UInt(max_size)) === raddr } + + when (wvalid) { + ram.write(waddr >> UInt(max_size), wdata, wmask.toBools) + } + + val rdata = ram.read(raddr, ren) + io.hrdata := Cat(rdata.zip(wmask.toBools).zip(wdata).map { + case ((rbyte, wsel), wbyte) => Mux(wsel && bypass, wbyte, rbyte) + }.reverse) + + io.hready := Bool(true) + io.hresp := HRESP_OKAY +} + +/** + * This RAM is not meant to be particularly performant. + * It just supports the entire range of uncached TileLink operations in the + * simplest way possible. + */ +class TileLinkTestRAM(depth: Int)(implicit val p: Parameters) extends Module + with HasTileLinkParameters { + val io = new ClientUncachedTileLinkIO().flip + + val ram = Mem(depth, UInt(width = tlDataBits)) + + val responding = Reg(init = Bool(false)) + val acq = io.acquire.bits + val r_acq = Reg(io.acquire.bits) + val acq_addr = Cat(acq.addr_block, acq.addr_beat) + val r_acq_addr = Cat(r_acq.addr_block, r_acq.addr_beat) + + when (io.acquire.fire() && io.acquire.bits.last()) { + r_acq := io.acquire.bits + responding := Bool(true) + } + + when (io.grant.fire()) { + val is_getblk = r_acq.isBuiltInType(Acquire.getBlockType) + val last_beat = r_acq.addr_beat === UInt(tlDataBeats - 1) + when (is_getblk && !last_beat) { + r_acq.addr_beat := r_acq.addr_beat + UInt(1) + } .otherwise { responding := Bool(false) } + } + + io.acquire.ready := !responding + io.grant.valid := responding + io.grant.bits := Grant( + is_builtin_type = Bool(true), + g_type = r_acq.getBuiltInGrantType(), + client_xact_id = r_acq.client_xact_id, + manager_xact_id = UInt(0), + addr_beat = r_acq.addr_beat, + data = ram(r_acq_addr)) + + val old_data = ram(acq_addr) + val new_data = acq.data + + val amo_shift_bits = acq.amo_shift_bytes() << UInt(3) + val amoalu = Module(new AMOALU) + amoalu.io.addr := Cat(acq.addr_block, acq.addr_beat, acq.addr_byte()) + amoalu.io.cmd := acq.op_code() + amoalu.io.typ := acq.op_size() + amoalu.io.lhs := old_data >> amo_shift_bits + amoalu.io.rhs := new_data >> amo_shift_bits + + val result = Mux(acq.isAtomic(), amoalu.io.out << amo_shift_bits, new_data) + val wmask = FillInterleaved(8, acq.wmask()) + + when (io.acquire.fire() && acq.hasData()) { + ram(acq_addr) := (old_data & ~wmask) | (result & wmask) + } +} diff --git a/uncore/src/main/scala/devices/Debug.scala b/uncore/src/main/scala/devices/Debug.scala new file mode 100644 index 00000000..dc9468b1 --- /dev/null +++ b/uncore/src/main/scala/devices/Debug.scala @@ -0,0 +1,1003 @@ +// See LICENSE for license details. + +package uncore.devices + +import Chisel._ +import uncore.tilelink._ +import junctions._ +import cde.{Parameters, Config, Field} + +// ***************************************** +// Constants which are interesting even +// outside of this module +// ***************************************** + +object DbRegAddrs{ + + def DMCONTROL = UInt(0x10) + + def DMINFO = UInt(0x11) + def AUTHDATA0 = UInt(0x12) + def AUTHDATA1 = UInt(0x13) + def SERDATA = UInt(0x14) + def SERSTATUS = UInt(0x15) + def SBUSADDRESS0 = UInt(0x16) + def SBUSADDRESS1 = UInt(0x17) + def SBDATA0 = UInt(0x18) + def SBDATA1 = UInt(0x19) + //1a + def HALTSUM = UInt(0x1B) + //1c - 3b are the halt notification registers. + def SBADDRESS2 = UInt(0x3d) + // 3c + def SBDATA2 = UInt(0x3e) + def SBDATA3 = UInt(0x3f) +} + +/** Constant values used by both Debug Bus Response & Request + */ + +object DbBusConsts{ + + def dbDataSize = 34 + + def dbRamWordBits = 32 + + def dbOpSize = 2 + def db_OP_NONE = UInt("b00") + def db_OP_READ = UInt("b01") + def db_OP_READ_WRITE = UInt("b10") + def db_OP_READ_COND_WRITE = UInt("b11") + + def dbRespSize = 2 + def db_RESP_SUCCESS = UInt("b00") + def db_RESP_FAILURE = UInt("b01") + def db_RESP_HW_FAILURE = UInt("b10") + // This is used outside this block + // to indicate 'busy'. + def db_RESP_RESERVED = UInt("b11") + +} + +object DsbBusConsts { + + def sbAddrWidth = 12 + def sbIdWidth = 10 + + //These are the default ROM contents, which support RV32 and RV64. + // See $RISCV/riscv-tools/riscv-isa-sim/debug_rom/debug_rom.h/S + // The code assumes 64 bytes of Debug RAM. + + def defaultRomContents : Array[Byte] = Array( + 0x6f, 0x00, 0xc0, 0x04, 0x6f, 0x00, 0xc0, 0x00, 0x13, 0x04, 0xf0, 0xff, + 0x6f, 0x00, 0x80, 0x00, 0x13, 0x04, 0x00, 0x00, 0x0f, 0x00, 0xf0, 0x0f, + 0xf3, 0x24, 0x00, 0xf1, 0x63, 0xc6, 0x04, 0x00, 0x83, 0x24, 0xc0, 0x43, + 0x6f, 0x00, 0x80, 0x00, 0x83, 0x34, 0x80, 0x43, 0x23, 0x2e, 0x80, 0x42, + 0x73, 0x24, 0x40, 0xf1, 0x23, 0x20, 0x80, 0x10, 0x73, 0x24, 0x00, 0x7b, + 0x13, 0x74, 0x84, 0x00, 0x63, 0x12, 0x04, 0x04, 0x73, 0x24, 0x20, 0x7b, + 0x73, 0x00, 0x20, 0x7b, 0x73, 0x10, 0x24, 0x7b, 0x73, 0x24, 0x00, 0x7b, + 0x13, 0x74, 0x04, 0x1c, 0x13, 0x04, 0x04, 0xf4, 0x63, 0x1e, 0x04, 0x00, + 0x73, 0x24, 0x00, 0xf1, 0x63, 0x46, 0x04, 0x00, 0x23, 0x2e, 0x90, 0x42, + 0x67, 0x00, 0x00, 0x40, 0x23, 0x3c, 0x90, 0x42, 0x67, 0x00, 0x00, 0x40, + 0x73, 0x24, 0x40, 0xf1, 0x23, 0x26, 0x80, 0x10, 0x73, 0x60, 0x04, 0x7b, + 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x04, 0x02, 0xe3, 0x0c, 0x04, 0xfe, + 0x6f, 0xf0, 0x1f, 0xfd).map(_.toByte) + + // These ROM contents support only RV32 + // See $RISCV/riscv-tools/riscv-isa-sim/debug_rom/debug_rom.h/S + // The code assumes only 28 bytes of Debug RAM. + + def xlen32OnlyRomContents : Array[Byte] = Array( + 0x6f, 0x00, 0xc0, 0x03, 0x6f, 0x00, 0xc0, 0x00, 0x13, 0x04, 0xf0, 0xff, + 0x6f, 0x00, 0x80, 0x00, 0x13, 0x04, 0x00, 0x00, 0x0f, 0x00, 0xf0, 0x0f, + 0x83, 0x24, 0x80, 0x41, 0x23, 0x2c, 0x80, 0x40, 0x73, 0x24, 0x40, 0xf1, + 0x23, 0x20, 0x80, 0x10, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x84, 0x00, + 0x63, 0x1a, 0x04, 0x02, 0x73, 0x24, 0x20, 0x7b, 0x73, 0x00, 0x20, 0x7b, + 0x73, 0x10, 0x24, 0x7b, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x04, 0x1c, + 0x13, 0x04, 0x04, 0xf4, 0x63, 0x16, 0x04, 0x00, 0x23, 0x2c, 0x90, 0x40, + 0x67, 0x00, 0x00, 0x40, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x26, 0x80, 0x10, + 0x73, 0x60, 0x04, 0x7b, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x04, 0x02, + 0xe3, 0x0c, 0x04, 0xfe, 0x6f, 0xf0, 0x1f, 0xfe).map(_.toByte) + + // These ROM contents support only RV64 + // See $RISCV/riscv-tools/riscv-isa-sim/debug_rom/debug_rom.h/S + // The code assumes 64 bytes of Debug RAM. + + def xlen64OnlyRomContents : Array[Byte] = Array( + 0x6f, 0x00, 0xc0, 0x03, 0x6f, 0x00, 0xc0, 0x00, 0x13, 0x04, 0xf0, 0xff, + 0x6f, 0x00, 0x80, 0x00, 0x13, 0x04, 0x00, 0x00, 0x0f, 0x00, 0xf0, 0x0f, + 0x83, 0x34, 0x80, 0x43, 0x23, 0x2e, 0x80, 0x42, 0x73, 0x24, 0x40, 0xf1, + 0x23, 0x20, 0x80, 0x10, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x84, 0x00, + 0x63, 0x1a, 0x04, 0x02, 0x73, 0x24, 0x20, 0x7b, 0x73, 0x00, 0x20, 0x7b, + 0x73, 0x10, 0x24, 0x7b, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x04, 0x1c, + 0x13, 0x04, 0x04, 0xf4, 0x63, 0x16, 0x04, 0x00, 0x23, 0x3c, 0x90, 0x42, + 0x67, 0x00, 0x00, 0x40, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x26, 0x80, 0x10, + 0x73, 0x60, 0x04, 0x7b, 0x73, 0x24, 0x00, 0x7b, 0x13, 0x74, 0x04, 0x02, + 0xe3, 0x0c, 0x04, 0xfe, 0x6f, 0xf0, 0x1f, 0xfe).map(_.toByte) +} + + + +object DsbRegAddrs{ + + def CLEARDEBINT = UInt(0x100) + def SETHALTNOT = UInt(0x10C) + def SERINFO = UInt(0x110) + def SERBASE = UInt(0x114) + // For each serial, there are + // 3 registers starting here: + // SERSEND0 + // SERRECEIVE0 + // SERSTATUS0 + // ... + // SERSTATUS7 + def SERTX_OFFSET = UInt(0) + def SERRX_OFFSET = UInt(4) + def SERSTAT_OFFSET = UInt(8) + +} + + +// ***************************************** +// Configuration & Parameters for this Module +// +// ***************************************** + +/** Enumerations used both in the hardware + * and in the configuration specification. + */ + +object DebugModuleAuthType extends scala.Enumeration { + type DebugModuleAuthType = Value + val None, Password, ChallengeResponse, Reserved = Value +} +import DebugModuleAuthType._ + +object DebugModuleAccessType extends scala.Enumeration { + type DebugModuleAccessType = Value + val Access8Bit, Access16Bit, Access32Bit, Access64Bit, Access128Bit = Value +} +import DebugModuleAccessType._ + + +/** Parameters exposed to the top-level design, set based on + * external requirements, etc. + * + * This object checks that the parameters conform to the + * full specification. The implementation which receives this + * object can perform more checks on what that implementation + * actually supports. + * nComponents : The number of components to support debugging. + * nDebugBusAddrSize : Size of the Debug Bus Address + * nDebugRam Bytes: Size of the Debug RAM (depends on the XLEN of the machine). + * debugRomContents: Optional Sequence of bytes which form the Debug ROM contents. + * hasBusMaster: Whether or not a bus master should be included + * The size of the accesses supported by the Bus Master. + * nSerialPorts : Number of serial ports to instantiate + * authType : The Authorization Type + * Number of cycles to assert ndreset when pulsed. + **/ + + +case class DebugModuleConfig ( + nComponents : Int, + nDebugBusAddrSize : Int, + nDebugRamBytes : Int, + debugRomContents : Option[Seq[Byte]], + hasBusMaster : Boolean, + hasAccess128 : Boolean, + hasAccess64 : Boolean, + hasAccess32 : Boolean, + hasAccess16 : Boolean, + hasAccess8 : Boolean, + nSerialPorts : Int, + authType : DebugModuleAuthType, + nNDResetCycles : Int +) { + + if (hasBusMaster == false){ + require (hasAccess128 == false) + require (hasAccess64 == false) + require (hasAccess32 == false) + require (hasAccess16 == false) + require (hasAccess8 == false) + } + + require (nSerialPorts <= 8) + + require ((nDebugBusAddrSize >= 5) && (nDebugBusAddrSize <= 7)) + + private val maxComponents = nDebugBusAddrSize match { + case 5 => (32*4) + case 6 => (32*32) + case 7 => (32*32) + } + require (nComponents > 0 && nComponents <= maxComponents) + + private val maxRam = nDebugBusAddrSize match { + case 5 => (4 * 16) + case 6 => (4 * 16) + case 7 => (4 * 64) + } + + require (nDebugRamBytes > 0 && nDebugRamBytes <= maxRam) + + val hasHaltSum = (nComponents > 64) || (nSerialPorts > 0) + + val hasDebugRom = debugRomContents match{ + case Some(_) => true + case None => false + } + + if (hasDebugRom) { + require (debugRomContents.size > 0) + require (debugRomContents.size <= 512) + } + + require (nNDResetCycles > 0) + +} + +class DefaultDebugModuleConfig (val ncomponents : Int, val xlen:Int) + extends DebugModuleConfig( + nComponents = ncomponents, + nDebugBusAddrSize = 5, + // While smaller numbers are theoretically + // possible as noted in the Spec, + // the ROM image would need to be + // adjusted accordingly. + nDebugRamBytes = xlen match{ + case 32 => 28 + case 64 => 64 + case 128 => 64 + }, + debugRomContents = xlen match { + case 32 => Some(DsbBusConsts.xlen32OnlyRomContents) + case 64 => Some(DsbBusConsts.xlen64OnlyRomContents) + }, + hasBusMaster = false, + hasAccess128 = false, + hasAccess64 = false, + hasAccess32 = false, + hasAccess16 = false, + hasAccess8 = false, + nSerialPorts = 0, + authType = DebugModuleAuthType.None, + nNDResetCycles = 1) + +case object DMKey extends Field[DebugModuleConfig] + + +// ***************************************** +// Module Interfaces +// +// ***************************************** + + +/** Structure to define the contents of a Debug Bus Request + */ + +class DebugBusReq(addrBits : Int) extends Bundle { + val addr = UInt(width = addrBits) + val op = UInt(width = DbBusConsts.dbOpSize) + val data = UInt(width = DbBusConsts.dbDataSize) + + override def cloneType = new DebugBusReq(addrBits).asInstanceOf[this.type] +} + + +/** Structure to define the contents of a Debug Bus Response + */ +class DebugBusResp( ) extends Bundle { + val resp = UInt(width = DbBusConsts.dbRespSize) + val data = UInt(width = DbBusConsts.dbDataSize) +} + +/** Structure to define the top-level DebugBus interface + * of DebugModule. + * DebugModule is the consumer of this interface. + * Therefore it has the 'flipped' version of this. + */ + +class DebugBusIO(implicit val p: cde.Parameters) extends ParameterizedBundle()(p) { + val req = new DecoupledIO(new DebugBusReq(p(DMKey).nDebugBusAddrSize)) + val resp = new DecoupledIO(new DebugBusResp).flip() +} + +// ***************************************** +// The Module +// +// ***************************************** + +/** Parameterized version of the Debug Module defined in the + * RISC-V Debug Specification + * + * DebugModule is a slave to two masters: + * The Debug Bus -- implemented as a generic Decoupled IO with request + * and response channels + * The System Bus -- implemented as Uncached Tile Link. + * + * DebugModule is responsible for holding registers, RAM, and ROM + * to support debug interactions, as well as driving interrupts + * to a configurable number of components in the system. + * It is also responsible for some reset lines. + */ + +class DebugModule ()(implicit val p:cde.Parameters) + extends Module + with HasTileLinkParameters { + val cfg = p(DMKey) + + //-------------------------------------------------------------- + // Import constants for shorter variable names + //-------------------------------------------------------------- + + import DbRegAddrs._ + import DsbRegAddrs._ + import DsbBusConsts._ + import DbBusConsts._ + + //-------------------------------------------------------------- + // Sanity Check Configuration For this implementation. + //-------------------------------------------------------------- + + require (cfg.nComponents <= 128) + require (cfg.nSerialPorts == 0) + require (cfg.hasBusMaster == false) + require (cfg.nDebugRamBytes <= 64) + require (cfg.authType == DebugModuleAuthType.None) + + //-------------------------------------------------------------- + // Private Classes (Register Fields) + //-------------------------------------------------------------- + + class RAMFields() extends Bundle { + val interrupt = Bool() + val haltnot = Bool() + val data = Bits(width = 32) + + override def cloneType = new RAMFields().asInstanceOf[this.type] + } + + class CONTROLFields() extends Bundle { + val interrupt = Bool() + val haltnot = Bool() + val reserved0 = Bits(width = 31-22 + 1) + val buserror = Bits(width = 3) + val serial = Bits(width = 3) + val autoincrement = Bool() + val access = UInt(width = 3) + val hartid = Bits(width = 10) + val ndreset = Bool() + val fullreset = Bool() + + override def cloneType = new CONTROLFields().asInstanceOf[this.type] + + } + + class DMINFOFields() extends Bundle { + val reserved0 = Bits(width = 2) + val abussize = UInt(width = 7) + val serialcount = UInt(width = 4) + val access128 = Bool() + val access64 = Bool() + val access32 = Bool() + val access16 = Bool() + val accesss8 = Bool() + val dramsize = UInt(width = 6) + val haltsum = Bool() + val reserved1 = Bits(width = 3) + val authenticated = Bool() + val authbusy = Bool() + val authtype = UInt(width = 2) + val version = UInt(width = 2) + + override def cloneType = new DMINFOFields().asInstanceOf[this.type] + + } + + class HALTSUMFields() extends Bundle { + val serialfull = Bool() + val serialvalid = Bool() + val acks = Bits(width = 32) + + override def cloneType = new HALTSUMFields().asInstanceOf[this.type] + + } + + //-------------------------------------------------------------- + // Module I/O + //-------------------------------------------------------------- + + val io = new Bundle { + val db = new DebugBusIO()(p).flip() + val debugInterrupts = Vec(cfg.nComponents, Bool()).asOutput + val tl = new ClientUncachedTileLinkIO().flip + val ndreset = Bool(OUTPUT) + val fullreset = Bool(OUTPUT) + } + + //-------------------------------------------------------------- + // Register & Wire Declarations + //-------------------------------------------------------------- + + // --- Debug Bus Registers + val CONTROLReset = Wire(new CONTROLFields()) + val CONTROLWrEn = Wire(Bool()) + val CONTROLReg = Reg(new CONTROLFields()) + val CONTROLWrData = Wire (new CONTROLFields()) + val CONTROLRdData = Wire (new CONTROLFields()) + val ndresetCtrReg = Reg(UInt(cfg.nNDResetCycles)) + + val DMINFORdData = Wire (new DMINFOFields()) + + val HALTSUMRdData = Wire (new HALTSUMFields()) + + val RAMWrData = Wire (new RAMFields()) + val RAMRdData = Wire (new RAMFields()) + + // --- System Bus Registers + + val SETHALTNOTWrEn = Wire(Bool()) + val SETHALTNOTWrData = Wire(UInt(width = sbIdWidth)) + val CLEARDEBINTWrEn = Wire(Bool()) + val CLEARDEBINTWrData = Wire(UInt(width = sbIdWidth)) + + // --- Interrupt & Halt Notification Registers + + val interruptRegs = Reg(init=Vec.fill(cfg.nComponents){Bool(false)}) + + val haltnotRegs = Reg(init=Vec.fill(cfg.nComponents){Bool(false)}) + val numHaltnotStatus = ((cfg.nComponents - 1) / 32) + 1 + + val haltnotStatus = Wire(Vec(numHaltnotStatus, Bits(width = 32))) + val rdHaltnotStatus = Wire(Bits(width = 32)) + + val haltnotSummary = Cat(haltnotStatus.map(_.orR).reverse) + + // --- Debug RAM + + // Since the access size from Debug Bus and System Bus may not be consistent, + // use the maximum to build the RAM, and then select as needed for the smaller + // size. + + val dbRamDataWidth = DbBusConsts.dbRamWordBits + val sbRamDataWidth = tlDataBits + val dbRamAddrWidth = log2Up((cfg.nDebugRamBytes * 8) / dbRamDataWidth) + val sbRamAddrWidth = log2Up((cfg.nDebugRamBytes * 8) / sbRamDataWidth) + val sbRamAddrOffset = log2Up(tlDataBits/8) + + val ramDataWidth = dbRamDataWidth max sbRamDataWidth + val ramAddrWidth = dbRamAddrWidth min sbRamAddrWidth + val ramMem = Mem(1 << ramAddrWidth , UInt(width=ramDataWidth)) + val ramAddr = Wire(UInt(width=ramAddrWidth)) + val ramRdData = Wire(UInt(width=ramDataWidth)) + val ramWrData = Wire(UInt(width=ramDataWidth)) + val ramWrMask = Wire(UInt(width=ramDataWidth)) + val ramWrEn = Wire(Bool()) + + val dbRamAddr = Wire(UInt(width=dbRamAddrWidth)) + val dbRamRdData = Wire (UInt(width=dbRamDataWidth)) + val dbRamWrData = Wire(UInt(width=dbRamDataWidth)) + val dbRamWrEn = Wire(Bool()) + val dbRamRdEn = Wire(Bool()) + + val sbRamAddr = Wire(UInt(width=sbRamAddrWidth)) + val sbRamRdData = Wire (UInt(width=sbRamDataWidth)) + val sbRamWrData = Wire(UInt(width=sbRamDataWidth)) + val sbRamWrEn = Wire(Bool()) + val sbRamRdEn = Wire(Bool()) + + val sbRomRdData = Wire(UInt(width=tlDataBits)) + val sbRomAddrOffset = log2Up(tlDataBits/8) + + // --- Debug Bus Accesses + + val dbRdEn = Wire(Bool()) + val dbWrEn = Wire(Bool()) + val dbRdData = Wire(UInt(width = DbBusConsts.dbDataSize)) + + val s_DB_READY :: s_DB_RESP :: Nil = Enum(Bits(), 2) + val dbStateReg = Reg(init = s_DB_READY) + + val dbResult = Wire(io.db.resp.bits) + + val dbReq = Wire(io.db.req.bits) + val dbRespReg = Reg(io.db.resp.bits) + + val rdCondWrFailure = Wire(Bool()) + val dbWrNeeded = Wire(Bool()) + + // --- System Bus Access + val sbAddr = Wire(UInt(width=sbAddrWidth)) + val sbRdData = Wire(UInt(width=tlDataBits)) + val sbWrData = Wire(UInt(width=tlDataBits)) + val sbWrMask = Wire(UInt(width=tlDataBits)) + val sbWrEn = Wire(Bool()) + val sbRdEn = Wire(Bool()) + + val stallFromDb = Wire(Bool()) + val stallFromSb = Wire(Bool()) + //-------------------------------------------------------------- + // Interrupt Registers + //-------------------------------------------------------------- + + for (component <- 0 until cfg.nComponents) { + io.debugInterrupts(component) := interruptRegs(component) + } + + // Interrupt Registers are written by write to CONTROL or debugRAM addresses + // for Debug Bus, and cleared by writes to CLEARDEBINT by System Bus. + // It is "unspecified" what should happen if both + // SET and CLEAR happen at the same time. In this + // implementation, the SET wins. + + for (component <- 0 until cfg.nComponents) { + when (CONTROLWrEn) { + when (CONTROLWrData.hartid === UInt(component)) { + interruptRegs(component) := interruptRegs(component) | CONTROLWrData.interrupt + } + }.elsewhen (dbRamWrEn) { + when (CONTROLReg.hartid === UInt(component)){ + interruptRegs(component) := interruptRegs(component) | RAMWrData.interrupt + } + }.elsewhen (CLEARDEBINTWrEn){ + when (CLEARDEBINTWrData === UInt(component, width = sbIdWidth)) { + interruptRegs(component) := Bool(false) + } + } + } + + //-------------------------------------------------------------- + // Halt Notification Registers + //-------------------------------------------------------------- + + // Halt Notifications Registers are cleared by zero write to CONTROL or debugRAM addresses + // for Debug Bus, and set by write to SETHALTNOT by System Bus. + // It is "unspecified" what should happen if both + // SET and CLEAR happen at the same time. In this + // implementation, the SET wins. + + for (component <- 0 until cfg.nComponents) { + when (SETHALTNOTWrEn){ + when (SETHALTNOTWrData === UInt(component, width = sbIdWidth)) { + haltnotRegs(component) := Bool(true) + } + } .elsewhen (CONTROLWrEn) { + when (CONTROLWrData.hartid === UInt(component)) { + haltnotRegs(component) := haltnotRegs(component) & CONTROLWrData.haltnot + } + }.elsewhen (dbRamWrEn) { + when (CONTROLReg.hartid === UInt(component)){ + haltnotRegs(component) := haltnotRegs(component) & RAMWrData.haltnot + } + } + } + + for (ii <- 0 until numHaltnotStatus) { + haltnotStatus(ii) := Cat(haltnotRegs.slice(ii * 32, (ii + 1) * 32).reverse) + } + + //-------------------------------------------------------------- + // Other Registers + //-------------------------------------------------------------- + + CONTROLReset.interrupt := Bool(false) + CONTROLReset.haltnot := Bool(false) + CONTROLReset.reserved0 := Bits(0) + CONTROLReset.buserror := Bits(0) + CONTROLReset.serial := Bits(0) + CONTROLReset.autoincrement := Bool(false) + CONTROLReset.access := UInt(DebugModuleAccessType.Access32Bit.id) + CONTROLReset.hartid := Bits(0) + CONTROLReset.ndreset := Bool(false) + CONTROLReset.fullreset := Bool(false) + + // Because this version of DebugModule doesn't + // support authentication, this entire register is + // Read-Only constant wires. + DMINFORdData.reserved0 := Bits(0) + DMINFORdData.abussize := UInt(0) // Not Implemented. + DMINFORdData.serialcount := UInt(cfg.nSerialPorts) + DMINFORdData.access128 := Bool(cfg.hasAccess128) + DMINFORdData.access64 := Bool(cfg.hasAccess64) + DMINFORdData.access32 := Bool(cfg.hasAccess32) + DMINFORdData.access16 := Bool(cfg.hasAccess16) + DMINFORdData.accesss8 := Bool(cfg.hasAccess8) + DMINFORdData.dramsize := Bits((cfg.nDebugRamBytes >> 2) - 1) // Size in 32-bit words minus 1. + DMINFORdData.haltsum := Bool(cfg.hasHaltSum) + DMINFORdData.reserved1 := Bits(0) + DMINFORdData.authenticated := Bool(true) // Not Implemented. + DMINFORdData.authbusy := Bool(false) // Not Implemented. + DMINFORdData.authtype := UInt(cfg.authType.id) + DMINFORdData.version := UInt(1) // Conforms to RISC-V Debug Spec + + HALTSUMRdData.serialfull := Bool(false) // Not Implemented + HALTSUMRdData.serialvalid := Bool(false) // Not Implemented + HALTSUMRdData.acks := haltnotSummary + + //-------------------------------------------------------------- + // Debug RAM Access (Debug Bus & System Bus) + //-------------------------------------------------------------- + + dbReq := io.db.req.bits + // Debug Bus RAM Access + // From Specification: Debug RAM is 0x00 - 0x0F + // 0x40 - 0x6F Not Implemented + dbRamAddr := dbReq.addr( dbRamAddrWidth-1 , 0) + dbRamWrData := dbReq.data + sbRamAddr := sbAddr(sbRamAddrWidth + sbRamAddrOffset - 1, sbRamAddrOffset) + sbRamWrData := sbWrData + + require (dbRamAddrWidth >= ramAddrWidth) // SB accesses less than 32 bits Not Implemented. + val dbRamWrMask = Wire(init=Vec.fill(1 << (dbRamAddrWidth - ramAddrWidth)){Fill(dbRamDataWidth, UInt(1, width=1))}) + + if (dbRamDataWidth < ramDataWidth){ + + val dbRamSel = dbRamAddr(dbRamAddrWidth - ramAddrWidth - 1 , 0) + val rdDataWords = Vec.tabulate(1 << (dbRamAddrWidth - ramAddrWidth)){ ii => + ramRdData((ii+1)*dbRamDataWidth - 1 , ii*dbRamDataWidth)} + + dbRamWrMask := Vec.fill(1 << (dbRamAddrWidth - ramAddrWidth)){UInt(0, width = dbRamDataWidth)} + dbRamWrMask(dbRamSel) := Fill(dbRamDataWidth, UInt(1, width=1)) + dbRamRdData := rdDataWords(dbRamSel) + } else { + dbRamRdData := ramRdData + } + + sbRamRdData := ramRdData + + ramWrMask := Mux(sbRamWrEn, sbWrMask, dbRamWrMask.toBits()) + + assert (!((dbRamWrEn | dbRamRdEn) & (sbRamRdEn | sbRamWrEn)), "Stall logic should have prevented concurrent SB/DB RAM Access") + + // Make copies of DB RAM data before writing. + val dbRamWrDataVec = Fill(1 << (dbRamAddrWidth - ramAddrWidth), dbRamWrData) + ramWrData := Mux(sbRamWrEn, + (ramWrMask & sbRamWrData ) | (~ramWrMask & ramRdData), + (ramWrMask & dbRamWrDataVec.toBits) | (~ramWrMask & ramRdData)) + + ramAddr := Mux(sbRamWrEn | sbRamRdEn, sbRamAddr, + dbRamAddr >> (dbRamAddrWidth - ramAddrWidth)) + + ramRdData := ramMem(ramAddr) + when (ramWrEn) { ramMem(ramAddr) := ramWrData } + + ramWrEn := sbRamWrEn | dbRamWrEn + + //-------------------------------------------------------------- + // Debug Bus Access + //-------------------------------------------------------------- + + // 0x00 - 0x0F Debug RAM + // 0x10 - 0x1B Registers + // 0x1C - 0x3B Halt Notification Registers + // 0x3C - 0x3F Registers + // 0x40 - 0x6F Debug RAM + + + // ----------------------------------------- + // DB Access Write Decoder + + CONTROLWrData := new CONTROLFields().fromBits(dbReq.data) + RAMWrData := new RAMFields().fromBits(dbReq.data) + + dbRamWrEn := Bool(false) + CONTROLWrEn := Bool(false) + when ((dbReq.addr >> 4) === Bits(0)) { // 0x00 - 0x0F Debug RAM + dbRamWrEn := dbWrEn + }.elsewhen (dbReq.addr === DMCONTROL) { + CONTROLWrEn := dbWrEn + }.otherwise { + //Other registers/RAM are Not Implemented. + } + + when (reset) { + CONTROLReg := CONTROLReset + ndresetCtrReg := UInt(0) + }.elsewhen (CONTROLWrEn) { + // interrupt handled in other logic + // haltnot handled in other logic + if (cfg.hasBusMaster){ + // buserror is set 'until 0 is written to any bit in this field'. + CONTROLReg.buserror := Mux((CONTROLWrData.buserror === SInt(-1).toBits), CONTROLReg.buserror, UInt(0)) + CONTROLReg.autoincrement := CONTROLWrData.autoincrement + CONTROLReg.access := CONTROLWrData.access + } + if (cfg.nSerialPorts > 0){ + CONTROLReg.serial := CONTROLWrData.serial + } + CONTROLReg.hartid := CONTROLWrData.hartid + CONTROLReg.fullreset := CONTROLReg.fullreset | CONTROLWrData.fullreset + when (CONTROLWrData.ndreset){ + ndresetCtrReg := UInt(cfg.nNDResetCycles) + }.otherwise { + ndresetCtrReg := Mux(ndresetCtrReg === UInt(0) , UInt(0), ndresetCtrReg - UInt(1)) + } + }.otherwise { + ndresetCtrReg := Mux(ndresetCtrReg === UInt(0) , UInt(0), ndresetCtrReg - UInt(1)) + } + + // ----------------------------------------- + // DB Access Read Mux + + CONTROLRdData := CONTROLReg; + CONTROLRdData.interrupt := interruptRegs(CONTROLReg.hartid) + CONTROLRdData.haltnot := haltnotRegs(CONTROLReg.hartid) + CONTROLRdData.ndreset := ndresetCtrReg.orR + + RAMRdData.interrupt := interruptRegs(CONTROLReg.hartid) + RAMRdData.haltnot := haltnotRegs(CONTROLReg.hartid) + RAMRdData.data := dbRamRdData + + dbRdData := UInt(0) + + // Higher numbers of numHaltnotStatus Not Implemented. + // This logic assumes only up to 128 components. + rdHaltnotStatus := Bits(0) + for (ii <- 0 until numHaltnotStatus) { + when (dbReq.addr === UInt(ii)) { + rdHaltnotStatus := haltnotStatus(ii) + } + } + + dbRamRdEn := Bool(false) + when ((dbReq.addr >> 4) === Bits(0)) { // 0x00 - 0x0F Debug RAM + dbRdData := RAMRdData.toBits() + dbRamRdEn := dbRdEn + }.elsewhen (dbReq.addr === DMCONTROL) { + dbRdData := CONTROLRdData.toBits() + }.elsewhen (dbReq.addr === DMINFO) { + dbRdData := DMINFORdData.toBits() + }.elsewhen (dbReq.addr === HALTSUM) { + if (cfg.hasHaltSum){ + dbRdData := HALTSUMRdData.toBits() + } else { + dbRdData := UInt(0) + } + }.elsewhen ((dbReq.addr >> 2) === UInt(7)) { // 0x1C - 0x1F Haltnot + dbRdData := rdHaltnotStatus + } .otherwise { + //These Registers are not implemented in this version of DebugModule: + // AUTHDATA0 + // AUTHDATA1 + // SERDATA + // SERSTATUS + // SBUSADDRESS0 + // SBUSADDRESS1 + // SBDATA0 + // SBDATA1 + // SBADDRESS2 + // SBDATA2 + // SBDATA3 + // 0x20 - 0x3B haltnot + // Upper bytes of Debug RAM. + dbRdData := UInt(0) + } + + // Conditional write fails if MSB is set of the read data. + rdCondWrFailure := dbRdData(dbDataSize - 1 ) && + (dbReq.op === db_OP_READ_COND_WRITE) + + dbWrNeeded := (dbReq.op === db_OP_READ_WRITE) || + ((dbReq.op === db_OP_READ_COND_WRITE) && ~rdCondWrFailure) + + // This is only relevant at end of s_DB_READ. + dbResult.resp := Mux(rdCondWrFailure, + db_RESP_FAILURE, + db_RESP_SUCCESS) + dbResult.data := dbRdData + + // ----------------------------------------- + // DB Access State Machine Decode (Combo) + io.db.req.ready := !stallFromSb && ((dbStateReg === s_DB_READY) || + (dbStateReg === s_DB_RESP && io.db.resp.fire())) + + io.db.resp.valid := (dbStateReg === s_DB_RESP) + io.db.resp.bits := dbRespReg + + dbRdEn := io.db.req.fire() + dbWrEn := dbWrNeeded && io.db.req.fire() + + // ----------------------------------------- + // DB Access State Machine Update (Seq) + + when (dbStateReg === s_DB_READY){ + when (io.db.req.fire()){ + dbStateReg := s_DB_RESP + dbRespReg := dbResult + } + } .elsewhen (dbStateReg === s_DB_RESP){ + when (io.db.req.fire()){ + dbStateReg := s_DB_RESP + dbRespReg := dbResult + }.elsewhen (io.db.resp.fire()){ + dbStateReg := s_DB_READY + } + } + + + //-------------------------------------------------------------- + // Debug ROM + //-------------------------------------------------------------- + + sbRomRdData := UInt(0) + if (cfg.hasDebugRom) { + // Inspired by ROMSlave + val romContents = cfg.debugRomContents.get + val romByteWidth = tlDataBits / 8 + val romRows = (romContents.size + romByteWidth - 1)/romByteWidth + val romMem = Vec.tabulate(romRows) { ii => + val slice = romContents.slice(ii*romByteWidth, (ii+1)*romByteWidth) + UInt(slice.foldRight(BigInt(0)) { case (x,y) => ((y << 8) + (x.toInt & 0xFF))}, width = romByteWidth*8) + } + + val sbRomRdAddr = Wire(UInt()) + + if (romRows == 1) { + sbRomRdAddr := UInt(0) + } else { + sbRomRdAddr := sbAddr(log2Up(romRows) + sbRomAddrOffset - 1, sbRomAddrOffset) + } + sbRomRdData := romMem (sbRomRdAddr) + } + + //-------------------------------------------------------------- + // System Bus Access + //-------------------------------------------------------------- + + + // ----------------------------------------- + // SB Access Write Decoder + + sbRamWrEn := Bool(false) + SETHALTNOTWrEn := Bool(false) + CLEARDEBINTWrEn := Bool(false) + + if (tlDataBits == 32) { + SETHALTNOTWrData := sbWrData + CLEARDEBINTWrData := sbWrData + when (sbAddr(11, 8) === UInt(4)){ // 0x400-0x4ff is Debug RAM + sbRamWrEn := sbWrEn + sbRamRdEn := sbRdEn + }.elsewhen (sbAddr === SETHALTNOT){ + SETHALTNOTWrEn := sbWrEn + }.elsewhen (sbAddr === CLEARDEBINT){ + CLEARDEBINTWrEn := sbWrEn + }.otherwise { + //Other registers/RAM are Not Implemented. + } + } else { + + // Pick out the correct word based on the address. + val sbWrDataWords = Vec.tabulate (tlDataBits / 32) {ii => sbWrData((ii+1)*32 - 1, ii*32)} + val sbWrMaskWords = Vec.tabulate (tlDataBits / 32) {ii => sbWrMask ((ii+1)*32 -1, ii*32)} + + val sbWrSelTop = log2Up(tlDataBits/8) - 1 + val sbWrSelBottom = 2 + + SETHALTNOTWrData := sbWrDataWords(SETHALTNOT(sbWrSelTop, sbWrSelBottom)) + CLEARDEBINTWrData := sbWrDataWords(CLEARDEBINT(sbWrSelTop, sbWrSelBottom)) + + when (sbAddr(11,8) === UInt(4)){ //0x400-0x4ff is Debug RAM + sbRamWrEn := sbWrEn + sbRamRdEn := sbRdEn + } + + SETHALTNOTWrEn := sbAddr(sbAddrWidth - 1, sbWrSelTop + 1) === SETHALTNOT(sbAddrWidth-1, sbWrSelTop + 1) && + (sbWrMaskWords(SETHALTNOT(sbWrSelTop, sbWrSelBottom))).orR && + sbWrEn + + CLEARDEBINTWrEn := sbAddr(sbAddrWidth - 1, sbWrSelTop + 1) === CLEARDEBINT(sbAddrWidth-1, sbWrSelTop + 1) && + (sbWrMaskWords(CLEARDEBINT(sbWrSelTop, sbWrSelBottom))).orR && + sbWrEn + + } + + // ----------------------------------------- + // SB Access Read Mux + + sbRdData := UInt(0) + sbRamRdEn := Bool(false) + + dbRamRdEn := Bool(false) + when (sbAddr(11, 8) === UInt(4)) { //0x400-0x4FF Debug RAM + sbRdData := sbRamRdData + sbRamRdEn := sbRdEn + }.elsewhen (sbAddr(11,8) === UInt(8) || sbAddr(11,8) === UInt(9)){ //0x800-0x9FF Debug ROM + if (cfg.hasDebugRom) { + sbRdData := sbRomRdData + } else { + sbRdData := UInt(0) + } + }. otherwise { + // All readable registers are Not Implemented. + sbRdData := UInt(0) + } + + // ----------------------------------------- + // SB Access State Machine -- based on BRAM Slave + + val sbAcqReg = Reg(io.tl.acquire.bits) + val sbAcqValidReg = Reg(init = Bool(false)) + + val (sbReg_get :: sbReg_getblk :: sbReg_put :: sbReg_putblk :: Nil) = Seq( + Acquire.getType, Acquire.getBlockType, Acquire.putType, Acquire.putBlockType + ).map(sbAcqReg.isBuiltInType _) + + val sbMultibeat = sbReg_getblk & sbAcqValidReg; + + val sbBeatInc1 = sbAcqReg.addr_beat + UInt(1) + + val sbLast = (sbAcqReg.addr_beat === UInt(tlDataBeats - 1)) + + sbAddr := sbAcqReg.full_addr() + sbRdEn := (sbAcqValidReg && (sbReg_get || sbReg_getblk)) + sbWrEn := (sbAcqValidReg && (sbReg_put || sbReg_putblk)) + sbWrData := sbAcqReg.data + sbWrMask := sbAcqReg.full_wmask() + + // ----------------------------------------- + // SB Access State Machine Update (Seq) + + when (io.tl.acquire.fire()){ + sbAcqReg := io.tl.acquire.bits + sbAcqValidReg := Bool(true) + } .elsewhen (io.tl.grant.fire()) { + when (sbMultibeat){ + sbAcqReg.addr_beat := sbBeatInc1 + when (sbLast) { + sbAcqValidReg := Bool(false) + } + } . otherwise { + sbAcqValidReg := Bool(false) + } + } + + + io.tl.grant.valid := sbAcqValidReg + io.tl.grant.bits := Grant( + is_builtin_type = Bool(true), + g_type = sbAcqReg.getBuiltInGrantType(), + client_xact_id = sbAcqReg.client_xact_id, + manager_xact_id = UInt(0), + addr_beat = sbAcqReg.addr_beat, + data = sbRdData + ) + + stallFromDb := Bool(false) // SB always wins, and DB latches its read data so it is not necessary for SB to wait + + stallFromSb := sbRamRdEn || sbRamWrEn // pessimistically assume that DB/SB are going to conflict on the RAM, + // and SB doesn't latch its read data to it is necessary for DB hold + // off while SB is accessing the RAM and waiting to send its result. + + val sbStall = (sbMultibeat & !sbLast) || (io.tl.grant.valid && !io.tl.grant.ready) || stallFromDb + + io.tl.acquire.ready := !sbStall + + //-------------------------------------------------------------- + // Misc. Outputs + //-------------------------------------------------------------- + + io.ndreset := ndresetCtrReg.orR + io.fullreset := CONTROLReg.fullreset + +} + +object AsyncDebugBusFrom { // OutsideClockDomain + def apply(from_clock: Clock, from_reset: Bool, source: DebugBusIO, depth: Int = 0, sync: Int = 2)(implicit p: Parameters): DebugBusIO = { + val sink = Wire(new DebugBusIO) + sink.req <> AsyncDecoupledFrom(from_clock, from_reset, source.req) + source.resp <> AsyncDecoupledTo(from_clock, from_reset, sink.resp) + sink + } +} + +object AsyncDebugBusTo { // OutsideClockDomain + def apply(to_clock: Clock, to_reset: Bool, source: DebugBusIO, depth: Int = 0, sync: Int = 2)(implicit p: Parameters): DebugBusIO = { + val sink = Wire(new DebugBusIO) + sink.req <> AsyncDecoupledTo(to_clock, to_reset, source.req) + source.resp <> AsyncDecoupledFrom(to_clock, to_reset, sink.resp) + sink + } +} diff --git a/uncore/src/main/scala/devices/Dma.scala b/uncore/src/main/scala/devices/Dma.scala new file mode 100644 index 00000000..036bf95b --- /dev/null +++ b/uncore/src/main/scala/devices/Dma.scala @@ -0,0 +1,534 @@ +package uncore.devices + +import Chisel._ +import cde.{Parameters, Field} +import junctions._ +import junctions.NastiConstants._ +import uncore.tilelink._ + +case object NDmaTransactors extends Field[Int] +case object NDmaXacts extends Field[Int] +case object NDmaClients extends Field[Int] + +trait HasDmaParameters { + implicit val p: Parameters + val nDmaTransactors = p(NDmaTransactors) + val nDmaXacts = p(NDmaXacts) + val nDmaClients = p(NDmaClients) + val dmaXactIdBits = log2Up(nDmaXacts) + val dmaClientIdBits = log2Up(nDmaClients) + val addrBits = p(PAddrBits) + val dmaStatusBits = 2 + val dmaWordSizeBits = 2 +} + +abstract class DmaModule(implicit val p: Parameters) extends Module with HasDmaParameters +abstract class DmaBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) with HasDmaParameters + +class DmaRequest(implicit p: Parameters) extends DmaBundle()(p) { + val xact_id = UInt(width = dmaXactIdBits) + val client_id = UInt(width = dmaClientIdBits) + val cmd = UInt(width = DmaRequest.DMA_CMD_SZ) + val source = UInt(width = addrBits) + val dest = UInt(width = addrBits) + val length = UInt(width = addrBits) + val size = UInt(width = dmaWordSizeBits) +} + +class DmaResponse(implicit p: Parameters) extends DmaBundle()(p) { + val xact_id = UInt(width = dmaXactIdBits) + val client_id = UInt(width = dmaClientIdBits) + val status = UInt(width = dmaStatusBits) +} + +object DmaRequest { + val DMA_CMD_SZ = 3 + + val DMA_CMD_COPY = UInt("b000") + val DMA_CMD_PFR = UInt("b010") + val DMA_CMD_PFW = UInt("b011") + val DMA_CMD_SIN = UInt("b100") + val DMA_CMD_SOUT = UInt("b101") + + def apply(xact_id: UInt = UInt(0), + client_id: UInt, + cmd: UInt, + source: UInt, + dest: UInt, + length: UInt, + size: UInt = UInt(0))(implicit p: Parameters): DmaRequest = { + val req = Wire(new DmaRequest) + req.xact_id := xact_id + req.client_id := client_id + req.cmd := cmd + req.source := source + req.dest := dest + req.length := length + req.size := size + req + } +} +import DmaRequest._ + +class DmaIO(implicit p: Parameters) extends DmaBundle()(p) { + val req = Decoupled(new DmaRequest) + val resp = Decoupled(new DmaResponse).flip +} + +class DmaTrackerIO(implicit p: Parameters) extends DmaBundle()(p) { + val dma = (new DmaIO).flip + val mem = new ClientUncachedTileLinkIO + val mmio = new NastiIO +} + +class DmaManager(outstandingCSR: Int)(implicit p: Parameters) + extends DmaModule()(p) + with HasNastiParameters + with HasAddrMapParameters { + + val io = new Bundle { + val ctrl = (new NastiIO).flip + val mmio = new NastiIO + val dma = new DmaIO + } + + private val wordBits = 1 << log2Up(addrBits) + private val wordBytes = wordBits / 8 + private val wordOff = log2Up(wordBytes) + private val wordMSB = wordOff + 2 + + val s_idle :: s_wdata :: s_dma_req :: s_wresp :: Nil = Enum(Bits(), 4) + val state = Reg(init = s_idle) + + val nCtrlWords = (addrBits * 4) / nastiXDataBits + val ctrl_regs = Reg(Vec(nCtrlWords, UInt(width = nastiXDataBits))) + val ctrl_idx = Reg(UInt(width = log2Up(nCtrlWords))) + val ctrl_done = Reg(Bool()) + val ctrl_blob = ctrl_regs.toBits + val ctrl_id = Reg(UInt(width = nastiXIdBits)) + + val sizeOffset = 3 * addrBits + val cmdOffset = sizeOffset + dmaWordSizeBits + + val dma_req = new DmaRequest().fromBits(ctrl_blob) + val dma_busy = Reg(init = UInt(0, nDmaXacts)) + val dma_xact_id = PriorityEncoder(~dma_busy) + + when (io.ctrl.aw.fire()) { + ctrl_id := io.ctrl.aw.bits.id + ctrl_idx := UInt(0) + ctrl_done := Bool(false) + state := s_wdata + } + + when (io.ctrl.w.fire()) { + when (!ctrl_done) { + ctrl_regs(ctrl_idx) := io.ctrl.w.bits.data + ctrl_idx := ctrl_idx + UInt(1) + } + when (ctrl_idx === UInt(nCtrlWords - 1)) { ctrl_done := Bool(true) } + when (io.ctrl.w.bits.last) { state := s_dma_req } + } + + dma_busy := (dma_busy | + Mux(io.dma.req.fire(), UIntToOH(dma_xact_id), UInt(0))) & + ~Mux(io.dma.resp.fire(), UIntToOH(io.dma.resp.bits.xact_id), UInt(0)) + + when (io.dma.req.fire()) { state := s_wresp } + when (io.ctrl.b.fire()) { state := s_idle } + + io.ctrl.ar.ready := Bool(false) + io.ctrl.aw.ready := (state === s_idle) + io.ctrl.w.ready := (state === s_wdata) + + io.ctrl.r.valid := Bool(false) + io.ctrl.b.valid := (state === s_wresp) + io.ctrl.b.bits := NastiWriteResponseChannel(id = ctrl_id) + + io.dma.req.valid := (state === s_dma_req) && !dma_busy.andR + io.dma.req.bits := dma_req + io.dma.req.bits.xact_id := dma_xact_id + + val resp_waddr_pending = Reg(init = Bool(false)) + val resp_wdata_pending = Reg(init = Bool(false)) + val resp_wresp_pending = Reg(init = Bool(false)) + val resp_pending = resp_waddr_pending || resp_wdata_pending || resp_wresp_pending + + val resp_client_id = Reg(UInt(width = dmaClientIdBits)) + val resp_status = Reg(UInt(width = dmaStatusBits)) + + io.dma.resp.ready := !resp_pending + + when (io.dma.resp.fire()) { + resp_client_id := io.dma.resp.bits.client_id + resp_status := io.dma.resp.bits.status + resp_waddr_pending := Bool(true) + resp_wdata_pending := Bool(true) + resp_wresp_pending := Bool(true) + } + + val addrTable = Vec.tabulate(nDmaClients) { i => + //UInt(addrMap(s"conf:csr$i").start + outstandingCSR * csrDataBytes) + require(false, "CSR MMIO ports no longer exist") + UInt(0) + } + + io.mmio.ar.valid := Bool(false) + io.mmio.aw.valid := resp_waddr_pending + io.mmio.aw.bits := NastiWriteAddressChannel( + id = UInt(0), + addr = addrTable(resp_client_id), + size = { require(false, "CSR MMIO ports no longer exist"); UInt(0) }) + io.mmio.w.valid := resp_wdata_pending + io.mmio.w.bits := NastiWriteDataChannel(data = resp_status) + io.mmio.b.ready := resp_wresp_pending + io.mmio.r.ready := Bool(false) + + when (io.mmio.aw.fire()) { resp_waddr_pending := Bool(false) } + when (io.mmio.w.fire()) { resp_wdata_pending := Bool(false) } + when (io.mmio.b.fire()) { resp_wresp_pending := Bool(false) } +} + +class DmaEngine(outstandingCSR: Int)(implicit p: Parameters) extends DmaModule()(p) { + val io = new Bundle { + val ctrl = (new NastiIO).flip + val mem = new ClientUncachedTileLinkIO + val mmio = new NastiIO + } + + val manager = Module(new DmaManager(outstandingCSR)) + val trackers = Module(new DmaTrackerFile) + + manager.io.ctrl <> io.ctrl + trackers.io.dma <> manager.io.dma + + val innerIOs = trackers.io.mem + val outerIOs = trackers.io.mmio :+ manager.io.mmio + + val innerArb = Module(new ClientUncachedTileLinkIOArbiter(innerIOs.size)) + innerArb.io.in <> innerIOs + io.mem <> innerArb.io.out + + val outerArb = Module(new NastiArbiter(outerIOs.size)) + outerArb.io.master <> outerIOs + io.mmio <> outerArb.io.slave + + assert(!io.mmio.b.valid || io.mmio.b.bits.resp === UInt(0), + "DmaEngine: NASTI write response error") + + assert(!io.mmio.r.valid || io.mmio.r.bits.resp === UInt(0), + "DmaEngine: NASTI read response error") +} + +class DmaTrackerFile(implicit p: Parameters) extends DmaModule()(p) { + val io = new Bundle { + val dma = (new DmaIO).flip + val mem = Vec(nDmaTransactors, new ClientUncachedTileLinkIO) + val mmio = Vec(nDmaTransactors, new NastiIO) + } + + val trackers = List.fill(nDmaTransactors) { Module(new DmaTracker) } + val reqReadys = Vec(trackers.map(_.io.dma.req.ready)).toBits + + io.mem <> trackers.map(_.io.mem) + io.mmio <> trackers.map(_.io.mmio) + + if (nDmaTransactors > 1) { + val resp_arb = Module(new RRArbiter(new DmaResponse, nDmaTransactors)) + resp_arb.io.in <> trackers.map(_.io.dma.resp) + io.dma.resp <> resp_arb.io.out + + val selection = PriorityEncoder(reqReadys) + trackers.zipWithIndex.foreach { case (tracker, i) => + tracker.io.dma.req.valid := io.dma.req.valid && selection === UInt(i) + tracker.io.dma.req.bits := io.dma.req.bits + } + io.dma.req.ready := reqReadys.orR + } else { + io.dma <> trackers.head.io.dma + } +} + +class DmaTracker(implicit p: Parameters) extends DmaModule()(p) + with HasTileLinkParameters with HasNastiParameters { + val io = new DmaTrackerIO + + private val blockOffset = tlBeatAddrBits + tlByteAddrBits + private val blockBytes = tlDataBeats * tlDataBytes + + val data_buffer = Reg(Vec(2 * tlDataBeats, Bits(width = tlDataBits))) + val get_inflight = Reg(UInt(2 * tlDataBeats)) + val put_inflight = Reg(Bool()) + val put_half = Reg(UInt(width = 1)) + val get_half = Reg(UInt(width = 1)) + val prefetch_put = Reg(Bool()) + val get_done = !get_inflight.orR + + val src_block = Reg(UInt(width = tlBlockAddrBits)) + val dst_block = Reg(UInt(width = tlBlockAddrBits)) + val offset = Reg(UInt(width = blockOffset)) + val alignment = Reg(UInt(width = blockOffset)) + val shift_dir = Reg(Bool()) + + val bytes_left = Reg(UInt(width = addrBits)) + val streaming = Reg(Bool()) + val stream_addr = Reg(UInt(width = nastiXAddrBits)) + val stream_len = Reg(UInt(width = nastiXLenBits)) + val stream_size = Reg(UInt(width = nastiXSizeBits)) + val stream_idx = Reg(UInt(width = blockOffset)) + val stream_bytesel = MuxLookup(stream_size, UInt("b11111111"), Seq( + UInt("b00") -> UInt("b00000001"), + UInt("b01") -> UInt("b00000011"), + UInt("b10") -> UInt("b00001111"))) + val stream_mask = FillInterleaved(8, stream_bytesel) + val stream_last = Reg(Bool()) + + val stream_word_bytes = UInt(1) << stream_size + val stream_beat_idx = stream_idx(blockOffset - 1, tlByteAddrBits) + val stream_byte_idx = stream_idx(tlByteAddrBits - 1, 0) + val stream_bitshift = Cat(stream_byte_idx, UInt(0, 3)) + val stream_in_beat = + (((io.mmio.r.bits.data & stream_mask) << stream_bitshift)) | + (data_buffer(stream_beat_idx) & ~(stream_mask << stream_bitshift)) + val stream_out_word = data_buffer(stream_beat_idx) >> stream_bitshift + val stream_out_last = bytes_left === stream_word_bytes + + val acq = io.mem.acquire.bits + val gnt = io.mem.grant.bits + + val (s_idle :: s_get :: s_put :: s_prefetch :: + s_stream_read_req :: s_stream_read_resp :: + s_stream_write_req :: s_stream_write_data :: s_stream_write_resp :: + s_wait :: s_resp :: Nil) = Enum(Bits(), 11) + val state = Reg(init = s_idle) + + val (put_beat, put_done) = Counter( + io.mem.acquire.fire() && acq.hasData(), tlDataBeats) + + val put_mask = Vec.tabulate(tlDataBytes) { i => + val byte_index = Cat(put_beat, UInt(i, tlByteAddrBits)) + byte_index >= offset && byte_index < bytes_left + }.toBits + + val prefetch_sent = io.mem.acquire.fire() && io.mem.acquire.bits.isPrefetch() + val prefetch_busy = Reg(init = UInt(0, tlMaxClientXacts)) + val (prefetch_id, _) = Counter(prefetch_sent, tlMaxClientXacts) + + val base_index = Cat(put_half, put_beat) + val put_data = Wire(init = Bits(0, tlDataBits)) + val beat_align = alignment(blockOffset - 1, tlByteAddrBits) + val bit_align = Cat(alignment(tlByteAddrBits - 1, 0), UInt(0, 3)) + val rev_align = UInt(tlDataBits) - bit_align + + def getBit(value: UInt, sel: UInt): Bool = + (value >> sel)(0) + + when (alignment === UInt(0)) { + put_data := data_buffer.read(base_index) + } .elsewhen (shift_dir) { + val shift_index = base_index - beat_align + when (bit_align === UInt(0)) { + put_data := data_buffer.read(shift_index) + } .otherwise { + val upper_bits = data_buffer.read(shift_index) + val lower_bits = data_buffer.read(shift_index - UInt(1)) + val upper_shifted = upper_bits << bit_align + val lower_shifted = lower_bits >> rev_align + put_data := upper_shifted | lower_shifted + } + } .otherwise { + val shift_index = base_index + beat_align + when (bit_align === UInt(0)) { + put_data := data_buffer.read(shift_index) + } .otherwise { + val upper_bits = data_buffer.read(shift_index + UInt(1)) + val lower_bits = data_buffer.read(shift_index) + val upper_shifted = upper_bits << rev_align + val lower_shifted = lower_bits >> bit_align + put_data := upper_shifted | lower_shifted + } + } + + val put_acquire = PutBlock( + client_xact_id = UInt(2), + addr_block = dst_block, + addr_beat = put_beat, + data = put_data, + wmask = Some(put_mask)) + + val get_acquire = GetBlock( + client_xact_id = get_half, + addr_block = src_block, + alloc = Bool(false)) + + val prefetch_acquire = Mux(prefetch_put, + PutPrefetch(client_xact_id = prefetch_id, addr_block = dst_block), + GetPrefetch(client_xact_id = prefetch_id, addr_block = dst_block)) + + val resp_xact_id = Reg(UInt(width = dmaXactIdBits)) + val resp_client_id = Reg(UInt(width = dmaClientIdBits)) + + io.mem.acquire.valid := (state === s_get) || + (state === s_put && get_done) || + (state === s_prefetch && !prefetch_busy(prefetch_id)) + io.mem.acquire.bits := MuxLookup( + state, prefetch_acquire, Seq( + s_get -> get_acquire, + s_put -> put_acquire)) + io.mem.grant.ready := Bool(true) + io.dma.req.ready := state === s_idle + io.dma.resp.valid := state === s_resp + io.dma.resp.bits.xact_id := resp_xact_id + io.dma.resp.bits.client_id := resp_client_id + io.dma.resp.bits.status := UInt(0) + io.mmio.ar.valid := (state === s_stream_read_req) + io.mmio.ar.bits := NastiReadAddressChannel( + id = UInt(0), + addr = stream_addr, + size = stream_size, + len = stream_len, + burst = BURST_FIXED) + io.mmio.r.ready := (state === s_stream_read_resp) + + io.mmio.aw.valid := (state === s_stream_write_req) + io.mmio.aw.bits := NastiWriteAddressChannel( + id = UInt(0), + addr = stream_addr, + size = stream_size, + len = stream_len, + burst = BURST_FIXED) + io.mmio.w.valid := (state === s_stream_write_data) && get_done + io.mmio.w.bits := NastiWriteDataChannel( + data = stream_out_word, + last = stream_out_last) + io.mmio.b.ready := (state === s_stream_write_resp) + + when (io.dma.req.fire()) { + val src_off = io.dma.req.bits.source(blockOffset - 1, 0) + val dst_off = io.dma.req.bits.dest(blockOffset - 1, 0) + val direction = src_off < dst_off + + resp_xact_id := io.dma.req.bits.xact_id + resp_client_id := io.dma.req.bits.client_id + src_block := io.dma.req.bits.source(addrBits - 1, blockOffset) + dst_block := io.dma.req.bits.dest(addrBits - 1, blockOffset) + alignment := Mux(direction, dst_off - src_off, src_off - dst_off) + shift_dir := direction + offset := dst_off + bytes_left := io.dma.req.bits.length + dst_off + get_inflight := UInt(0) + put_inflight := Bool(false) + get_half := UInt(0) + put_half := UInt(0) + streaming := Bool(false) + stream_len := (io.dma.req.bits.length >> io.dma.req.bits.size) - UInt(1) + stream_size := io.dma.req.bits.size + stream_last := Bool(false) + + when (io.dma.req.bits.cmd === DMA_CMD_COPY) { + state := s_get + } .elsewhen (io.dma.req.bits.cmd(2, 1) === UInt("b01")) { + prefetch_put := io.dma.req.bits.cmd(0) + state := s_prefetch + } .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SIN) { + stream_addr := io.dma.req.bits.source + stream_idx := dst_off + streaming := Bool(true) + alignment := UInt(0) + state := s_stream_read_req + } .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SOUT) { + stream_addr := io.dma.req.bits.dest + stream_idx := src_off + streaming := Bool(true) + bytes_left := io.dma.req.bits.length + state := s_stream_write_req + } + } + + when (io.mmio.ar.fire()) { state := s_stream_read_resp } + + when (io.mmio.r.fire()) { + data_buffer(stream_beat_idx) := stream_in_beat + stream_idx := stream_idx + stream_word_bytes + val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes + when (block_finished || io.mmio.r.bits.last) { state := s_put } + } + + when (io.mmio.aw.fire()) { state := s_get } + + when (io.mmio.w.fire()) { + stream_idx := stream_idx + stream_word_bytes + bytes_left := bytes_left - stream_word_bytes + val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes + when (stream_out_last) { + state := s_stream_write_resp + } .elsewhen (block_finished) { + state := s_get + } + } + + when (io.mmio.b.fire()) { state := s_resp } + + when (state === s_get && io.mem.acquire.ready) { + get_inflight := get_inflight | FillInterleaved(tlDataBeats, UIntToOH(get_half)) + src_block := src_block + UInt(1) + when (streaming) { + state := s_stream_write_data + } .otherwise { + val bytes_in_buffer = UInt(blockBytes) - alignment + val extra_read = alignment > UInt(0) && !shift_dir && // dst_off < src_off + get_half === UInt(0) && // this is the first block + bytes_in_buffer < bytes_left // there is still more data left to fetch + get_half := get_half + UInt(1) + when (!extra_read) { state := s_put } + } + } + + when (prefetch_sent) { + prefetch_busy := prefetch_busy | UIntToOH(prefetch_id) + when (bytes_left < UInt(blockBytes)) { + bytes_left := UInt(0) + state := s_resp + } .otherwise { + bytes_left := bytes_left - UInt(blockBytes) + dst_block := dst_block + UInt(1) + } + } + + when (io.mem.grant.fire()) { + when (gnt.g_type === Grant.prefetchAckType) { + prefetch_busy := prefetch_busy & ~UIntToOH(gnt.client_xact_id) + } .elsewhen (gnt.hasData()) { + val write_half = gnt.client_xact_id(0) + val write_idx = Cat(write_half, gnt.addr_beat) + get_inflight := get_inflight & ~UIntToOH(write_idx) + data_buffer.write(write_idx, gnt.data) + } .otherwise { + put_inflight := Bool(false) + } + } + + when (put_done) { // state === s_put + when (!streaming) { + put_half := put_half + UInt(1) + } + offset := UInt(0) + stream_idx := UInt(0) + when (bytes_left < UInt(blockBytes)) { + bytes_left := UInt(0) + } .otherwise { + bytes_left := bytes_left - UInt(blockBytes) + } + put_inflight := Bool(true) + dst_block := dst_block + UInt(1) + state := s_wait + } + + when (state === s_wait && get_done && !put_inflight) { + state := MuxCase(s_get, Seq( + (bytes_left === UInt(0)) -> s_resp, + streaming -> s_stream_read_resp)) + } + + when (io.dma.resp.fire()) { state := s_idle } +} diff --git a/uncore/src/main/scala/devices/Plic.scala b/uncore/src/main/scala/devices/Plic.scala new file mode 100644 index 00000000..776c581b --- /dev/null +++ b/uncore/src/main/scala/devices/Plic.scala @@ -0,0 +1,187 @@ +// See LICENSE for license details. + +package uncore.devices + +import Chisel._ +import Chisel.ImplicitConversions._ + +import junctions._ +import uncore.tilelink._ +import cde.Parameters + +class GatewayPLICIO extends Bundle { + val valid = Bool(OUTPUT) + val ready = Bool(INPUT) + val complete = Bool(INPUT) +} + +class LevelGateway extends Module { + val io = new Bundle { + val interrupt = Bool(INPUT) + val plic = new GatewayPLICIO + } + + val inFlight = Reg(init=Bool(false)) + when (io.interrupt && io.plic.ready) { inFlight := true } + when (io.plic.complete) { inFlight := false } + io.plic.valid := io.interrupt && !inFlight +} + +case class PLICConfig(nHartsIn: Int, supervisor: Boolean, nDevices: Int, nPriorities: Int) { + def contextsPerHart = if (supervisor) 2 else 1 + def nHarts = contextsPerHart * nHartsIn + def context(i: Int, mode: Char) = mode match { + case 'M' => i * contextsPerHart + case 'S' => require(supervisor); i * contextsPerHart + 1 + } + def claimAddr(i: Int, mode: Char) = hartBase + hartOffset(context(i, mode)) + claimOffset + def threshAddr(i: Int, mode: Char) = hartBase + hartOffset(context(i, mode)) + def enableAddr(i: Int, mode: Char) = enableBase + enableOffset(context(i, mode)) + def size = hartBase + hartOffset(maxHarts) + + def maxDevices = 1023 + def maxHarts = 15872 + def pendingBase = 0x1000 + def enableBase = 0x2000 + def hartBase = 0x200000 + require(hartBase >= enableBase + enableOffset(maxHarts)) + + def enableOffset(i: Int) = i * ((maxDevices+7)/8) + def hartOffset(i: Int) = i * 0x1000 + def claimOffset = 4 + def priorityBytes = 4 + + require(nDevices > 0 && nDevices <= maxDevices) + require(nHarts > 0 && nHarts <= maxHarts) + require(nPriorities >= 0 && nPriorities <= nDevices) +} + +/** Platform-Level Interrupt Controller */ +class PLIC(val cfg: PLICConfig)(implicit val p: Parameters) extends Module + with HasTileLinkParameters + with HasAddrMapParameters { + val io = new Bundle { + val devices = Vec(cfg.nDevices, new GatewayPLICIO).flip + val harts = Vec(cfg.nHarts, Bool()).asOutput + val tl = new ClientUncachedTileLinkIO().flip + } + + val priority = + if (cfg.nPriorities > 0) Reg(Vec(cfg.nDevices+1, UInt(width=log2Up(cfg.nPriorities+1)))) + else Wire(init=Vec.fill(cfg.nDevices+1)(UInt(1))) + val threshold = + if (cfg.nPriorities > 0) Reg(Vec(cfg.nHarts, UInt(width = log2Up(cfg.nPriorities+1)))) + else Wire(init=Vec.fill(cfg.nHarts)(UInt(0))) + val pending = Reg(init=Vec.fill(cfg.nDevices+1){Bool(false)}) + val enables = Reg(Vec(cfg.nHarts, Vec(cfg.nDevices+1, Bool()))) + + for ((p, g) <- pending.tail zip io.devices) { + g.ready := !p + g.complete := false + when (g.valid) { p := true } + } + + def findMax(x: Seq[UInt]): (UInt, UInt) = { + if (x.length > 1) { + val half = 1 << (log2Ceil(x.length) - 1) + val lMax = findMax(x take half) + val rMax = findMax(x drop half) + val useLeft = lMax._1 >= rMax._1 + (Mux(useLeft, lMax._1, rMax._1), Mux(useLeft, lMax._2, UInt(half) + rMax._2)) + } else (x.head, UInt(0)) + } + + val maxDevs = Wire(Vec(cfg.nHarts, UInt(width = log2Up(pending.size)))) + for (hart <- 0 until cfg.nHarts) { + val effectivePriority = + for (((p, en), pri) <- (pending zip enables(hart) zip priority).tail) + yield Cat(p && en, pri) + val (maxPri, maxDev) = findMax((UInt(1) << priority(0).getWidth) +: effectivePriority) + + maxDevs(hart) := Reg(next = maxDev) + io.harts(hart) := Reg(next = maxPri) > Cat(UInt(1), threshold(hart)) + } + + val acq = Queue(io.tl.acquire, 1) + val read = acq.fire() && acq.bits.isBuiltInType(Acquire.getType) + val write = acq.fire() && acq.bits.isBuiltInType(Acquire.putType) + assert(!acq.fire() || read || write, "unsupported PLIC operation") + val addr = acq.bits.full_addr()(log2Up(cfg.size)-1,0) + + val claimant = + if (cfg.nHarts == 1) UInt(0) + else (addr - cfg.hartBase)(log2Up(cfg.hartOffset(cfg.nHarts))-1,log2Up(cfg.hartOffset(1))) + val hart = Wire(init = claimant) + val myMaxDev = maxDevs(claimant) + UInt(0) // XXX FIRRTL bug w/o the + UInt(0) + val myEnables = enables(hart) + val rdata = Wire(init = UInt(0, tlDataBits)) + val masked_wdata = (acq.bits.data & acq.bits.full_wmask()) | (rdata & ~acq.bits.full_wmask()) + + when (addr >= cfg.hartBase) { + val word = + if (tlDataBytes > cfg.claimOffset) UInt(0) + else addr(log2Up(cfg.claimOffset),log2Up(tlDataBytes)) + rdata := Cat(myMaxDev, UInt(0, 8*cfg.priorityBytes-threshold(0).getWidth), threshold(claimant)) >> (word * tlDataBits) + + when (read && addr(log2Ceil(cfg.claimOffset))) { + pending(myMaxDev) := false + } + when (write) { + when (if (tlDataBytes > cfg.claimOffset) acq.bits.wmask()(cfg.claimOffset) else addr(log2Ceil(cfg.claimOffset))) { + val dev = (acq.bits.data >> ((8 * cfg.claimOffset) % tlDataBits))(log2Up(pending.size)-1,0) + when (myEnables(dev)) { io.devices(dev-1).complete := true } + }.otherwise { + if (cfg.nPriorities > 0) threshold(claimant) := acq.bits.data + } + } + }.elsewhen (addr >= cfg.enableBase) { + val enableHart = + if (cfg.nHarts > 1) (addr - cfg.enableBase)(log2Up(cfg.enableOffset(cfg.nHarts))-1,log2Up(cfg.enableOffset(1))) + else UInt(0) + hart := enableHart + val word = + if (tlDataBits >= cfg.nHarts) UInt(0) + else addr(log2Up((cfg.nHarts+7)/8)-1,log2Up(tlDataBytes)) + for (i <- 0 until cfg.nHarts by tlDataBits) { + when (word === i/tlDataBits) { + rdata := Cat(myEnables.slice(i, i + tlDataBits).reverse) + for (j <- 0 until (tlDataBits min (myEnables.size - i))) { + when (write) { enables(enableHart)(i+j) := masked_wdata(j) } + } + } + } + }.elsewhen (addr >= cfg.pendingBase) { + val word = + if (tlDataBytes >= pending.size) UInt(0) + else addr(log2Up(pending.size)-1,log2Up(tlDataBytes)) + rdata := pending.toBits >> (word * tlDataBits) + }.otherwise { + val regsPerBeat = tlDataBytes >> log2Up(cfg.priorityBytes) + val word = + if (regsPerBeat >= priority.size) UInt(0) + else addr(log2Up(priority.size*cfg.priorityBytes)-1,log2Up(tlDataBytes)) + for (i <- 0 until priority.size by regsPerBeat) { + when (word === i/regsPerBeat) { + rdata := Cat(priority.slice(i, i + regsPerBeat).map(p => Cat(UInt(0, 8*cfg.priorityBytes-p.getWidth), p)).reverse) + for (j <- 0 until (regsPerBeat min (priority.size - i))) { + if (cfg.nPriorities > 0) when (write) { priority(i+j) := masked_wdata >> (j * 8 * cfg.priorityBytes) } + } + } + } + } + + priority(0) := 0 + pending(0) := false + for (e <- enables) + e(0) := false + + io.tl.grant.valid := acq.valid + acq.ready := io.tl.grant.ready + io.tl.grant.bits := Grant( + is_builtin_type = Bool(true), + g_type = acq.bits.getBuiltInGrantType(), + client_xact_id = acq.bits.client_xact_id, + manager_xact_id = UInt(0), + addr_beat = UInt(0), + data = rdata) +} diff --git a/uncore/src/main/scala/devices/Prci.scala b/uncore/src/main/scala/devices/Prci.scala new file mode 100644 index 00000000..19497aff --- /dev/null +++ b/uncore/src/main/scala/devices/Prci.scala @@ -0,0 +1,127 @@ +// See LICENSE for license details. + +package uncore.devices + +import Chisel._ +import Chisel.ImplicitConversions._ +import junctions._ +import junctions.NastiConstants._ +import uncore.tilelink._ +import cde.{Parameters, Field} + +/** Number of tiles */ +case object NTiles extends Field[Int] + +class PRCIInterrupts extends Bundle { + val meip = Bool() + val seip = Bool() + val debug = Bool() +} + +class PRCITileIO(implicit p: Parameters) extends Bundle { + val reset = Bool(OUTPUT) + val id = UInt(OUTPUT, log2Up(p(NTiles))) + val interrupts = new PRCIInterrupts { + val mtip = Bool() + val msip = Bool() + }.asOutput + + override def cloneType: this.type = new PRCITileIO().asInstanceOf[this.type] +} + +object PRCI { + def msip(hart: Int) = hart * msipBytes + def timecmp(hart: Int) = 0x4000 + hart * timecmpBytes + def time = 0xbff8 + def msipBytes = 4 + def timecmpBytes = 8 + def size = 0xc000 +} + +/** Power, Reset, Clock, Interrupt */ +class PRCI(implicit val p: Parameters) extends Module + with HasTileLinkParameters + with HasAddrMapParameters { + val io = new Bundle { + val interrupts = Vec(p(NTiles), new PRCIInterrupts).asInput + val tl = new ClientUncachedTileLinkIO().flip + val tiles = Vec(p(NTiles), new PRCITileIO) + val rtcTick = Bool(INPUT) + } + + val timeWidth = 64 + val timecmp = Reg(Vec(p(NTiles), UInt(width = timeWidth))) + val time = Reg(init=UInt(0, timeWidth)) + when (io.rtcTick) { time := time + UInt(1) } + + val ipi = Reg(init=Vec.fill(p(NTiles))(UInt(0, 32))) + + val acq = Queue(io.tl.acquire, 1) + val addr = acq.bits.full_addr()(log2Ceil(PRCI.size)-1,0) + val read = acq.bits.isBuiltInType(Acquire.getType) + val rdata = Wire(init=UInt(0)) + io.tl.grant.valid := acq.valid + acq.ready := io.tl.grant.ready + io.tl.grant.bits := Grant( + is_builtin_type = Bool(true), + g_type = acq.bits.getBuiltInGrantType(), + client_xact_id = acq.bits.client_xact_id, + manager_xact_id = UInt(0), + addr_beat = UInt(0), + data = rdata) + + when (addr(log2Floor(PRCI.time))) { + require(log2Floor(PRCI.timecmp(p(NTiles)-1)) < log2Floor(PRCI.time)) + rdata := load(Vec(time + UInt(0)), acq.bits) + }.elsewhen (addr >= PRCI.timecmp(0)) { + rdata := store(timecmp, acq.bits) + }.otherwise { + rdata := store(ipi, acq.bits) & Fill(tlDataBits/32, UInt(1, 32)) + } + + for ((tile, i) <- io.tiles zipWithIndex) { + tile.interrupts := io.interrupts(i) + tile.interrupts.msip := ipi(i)(0) + tile.interrupts.mtip := time >= timecmp(i) + tile.id := UInt(i) + } + + // TODO generalize these to help other TL slaves + def load(v: Vec[UInt], acq: Acquire): UInt = { + val w = v.head.getWidth + val a = acq.full_addr() + require(isPow2(w) && w >= 8) + if (w > tlDataBits) { + (v(a(log2Up(w/8*v.size)-1,log2Up(w/8))) >> a(log2Up(w/8)-1,log2Up(tlDataBytes)))(tlDataBits-1,0) + } else { + val row = for (i <- 0 until v.size by tlDataBits/w) + yield Cat(v.slice(i, i + tlDataBits/w).reverse) + if (row.size == 1) row.head + else Vec(row)(a(log2Up(w/8*v.size)-1,log2Up(tlDataBytes))) + } + } + + def store(v: Vec[UInt], acq: Acquire): UInt = { + val w = v.head.getWidth + require(isPow2(w) && w >= 8) + val a = acq.full_addr() + val rdata = load(v, acq) + val wdata = (acq.data & acq.full_wmask()) | (rdata & ~acq.full_wmask()) + if (w <= tlDataBits) { + val word = + if (tlDataBits/w >= v.size) UInt(0) + else a(log2Up(w/8*v.size)-1,log2Up(tlDataBytes)) + for (i <- 0 until v.size) { + when (acq.isBuiltInType(Acquire.putType) && word === i/(tlDataBits/w)) { + val base = i % (tlDataBits/w) + v(i) := wdata >> (w * (i % (tlDataBits/w))) + } + } + } else { + val i = a(log2Up(w/8*v.size)-1,log2Up(w/8)) + val mask = FillInterleaved(tlDataBits, UIntToOH(a(log2Up(w/8)-1,log2Up(tlDataBytes)))) + v(i) := (wdata & mask) | (v(i) & ~mask) + } + rdata + } +} diff --git a/uncore/src/main/scala/devices/Rom.scala b/uncore/src/main/scala/devices/Rom.scala new file mode 100644 index 00000000..0fd9dd3e --- /dev/null +++ b/uncore/src/main/scala/devices/Rom.scala @@ -0,0 +1,67 @@ +package uncore.devices + +import Chisel._ +import junctions._ +import uncore.tilelink._ +import uncore.util._ +import cde.{Parameters, Field} + +class ROMSlave(contents: Seq[Byte])(implicit val p: Parameters) extends Module + with HasTileLinkParameters + with HasAddrMapParameters { + val io = new ClientUncachedTileLinkIO().flip + + val acq = Queue(io.acquire, 1) + val single_beat = acq.bits.isBuiltInType(Acquire.getType) + val multi_beat = acq.bits.isBuiltInType(Acquire.getBlockType) + assert(!acq.valid || single_beat || multi_beat, "unsupported ROMSlave operation") + + val addr_beat = Reg(UInt()) + when (io.grant.fire()) { addr_beat := addr_beat + UInt(1) } + when (io.acquire.fire()) { addr_beat := io.acquire.bits.addr_beat } + + val byteWidth = tlDataBits / 8 + val rows = (contents.size + byteWidth - 1)/byteWidth + val rom = Vec.tabulate(rows) { i => + val slice = contents.slice(i*byteWidth, (i+1)*byteWidth) + UInt(slice.foldRight(BigInt(0)) { case (x,y) => (y << 8) + (x.toInt & 0xFF) }, byteWidth*8) + } + val raddr = Cat(acq.bits.addr_block, addr_beat) + val rdata = rom(if (rows == 1) UInt(0) else raddr(log2Up(rom.size)-1,0)) + + val last = !multi_beat || addr_beat === UInt(tlDataBeats-1) + io.grant.valid := acq.valid + acq.ready := io.grant.ready && last + io.grant.bits := Grant( + is_builtin_type = Bool(true), + g_type = acq.bits.getBuiltInGrantType(), + client_xact_id = acq.bits.client_xact_id, + manager_xact_id = UInt(0), + addr_beat = addr_beat, + data = rdata) +} + +class NastiROM(contents: Seq[Byte])(implicit p: Parameters) extends Module { + val io = new NastiIO().flip + val ar = Queue(io.ar, 1) + + // This assumes ROMs are in read-only parts of the address map. + // Reuse b_queue code from NastiErrorSlave if this assumption is bad. + when (ar.valid) { assert(ar.bits.len === UInt(0), "Can't burst-read from NastiROM") } + assert(!(io.aw.valid || io.w.valid), "Can't write to NastiROM") + io.aw.ready := Bool(false) + io.w.ready := Bool(false) + io.b.valid := Bool(false) + + val byteWidth = io.r.bits.nastiXDataBits / 8 + val rows = (contents.size + byteWidth - 1)/byteWidth + val rom = Vec.tabulate(rows) { i => + val slice = contents.slice(i*byteWidth, (i+1)*byteWidth) + UInt(slice.foldRight(BigInt(0)) { case (x,y) => (y << 8) + (x.toInt & 0xFF) }, byteWidth*8) + } + val rdata_word = rom(if (rows == 1) UInt(0) else ar.bits.addr(log2Up(contents.size)-1,log2Up(byteWidth))) + val rdata = new LoadGen(Cat(UInt(1), ar.bits.size), ar.bits.addr, rdata_word, Bool(false), byteWidth).data + + io.r <> ar + io.r.bits := NastiReadDataChannel(ar.bits.id, rdata) +} diff --git a/uncore/src/main/scala/tilelink/Arbiters.scala b/uncore/src/main/scala/tilelink/Arbiters.scala new file mode 100644 index 00000000..ab1f05ae --- /dev/null +++ b/uncore/src/main/scala/tilelink/Arbiters.scala @@ -0,0 +1,196 @@ +package uncore.tilelink +import Chisel._ +import junctions._ +import cde.{Parameters, Field} + +/** Utility functions for constructing TileLinkIO arbiters */ +trait TileLinkArbiterLike extends HasTileLinkParameters { + // Some shorthand type variables + type ManagerSourcedWithId = ManagerToClientChannel with HasClientTransactionId + type ClientSourcedWithId = ClientToManagerChannel with HasClientTransactionId + type ClientSourcedWithIdAndData = ClientToManagerChannel with HasClientTransactionId with HasTileLinkData + + val arbN: Int // The number of ports on the client side + + // These abstract funcs are filled in depending on whether the arbiter mucks with the + // outgoing client ids to track sourcing and then needs to revert them on the way back + def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int): Bits + def managerSourcedClientXactId(in: ManagerSourcedWithId): Bits + def arbIdx(in: ManagerSourcedWithId): UInt + + // The following functions are all wiring helpers for each of the different types of TileLink channels + + def hookupClientSource[M <: ClientSourcedWithIdAndData]( + clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], + mngr: DecoupledIO[LogicalNetworkIO[M]]) { + def hasData(m: LogicalNetworkIO[M]) = m.payload.hasMultibeatData() + val arb = Module(new LockingRRArbiter(mngr.bits, arbN, tlDataBeats, Some(hasData _))) + clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => { + arb.valid := req.valid + arb.bits := req.bits + arb.bits.payload.client_xact_id := clientSourcedClientXactId(req.bits.payload, id) + req.ready := arb.ready + }} + mngr <> arb.io.out + } + + def hookupClientSourceHeaderless[M <: ClientSourcedWithIdAndData]( + clts: Seq[DecoupledIO[M]], + mngr: DecoupledIO[M]) { + def hasData(m: M) = m.hasMultibeatData() + val arb = Module(new LockingRRArbiter(mngr.bits, arbN, tlDataBeats, Some(hasData _))) + clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => { + arb.valid := req.valid + arb.bits := req.bits + arb.bits.client_xact_id := clientSourcedClientXactId(req.bits, id) + req.ready := arb.ready + }} + mngr <> arb.io.out + } + + def hookupManagerSourceWithHeader[M <: ManagerToClientChannel]( + clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], + mngr: DecoupledIO[LogicalNetworkIO[M]]) { + mngr.ready := Bool(false) + for (i <- 0 until arbN) { + clts(i).valid := Bool(false) + when (mngr.bits.header.dst === UInt(i)) { + clts(i).valid := mngr.valid + mngr.ready := clts(i).ready + } + clts(i).bits := mngr.bits + } + } + + def hookupManagerSourceWithId[M <: ManagerSourcedWithId]( + clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], + mngr: DecoupledIO[LogicalNetworkIO[M]]) { + mngr.ready := Bool(false) + for (i <- 0 until arbN) { + clts(i).valid := Bool(false) + when (arbIdx(mngr.bits.payload) === UInt(i)) { + clts(i).valid := mngr.valid + mngr.ready := clts(i).ready + } + clts(i).bits := mngr.bits + clts(i).bits.payload.client_xact_id := managerSourcedClientXactId(mngr.bits.payload) + } + } + + def hookupManagerSourceHeaderlessWithId[M <: ManagerSourcedWithId]( + clts: Seq[DecoupledIO[M]], + mngr: DecoupledIO[M]) { + mngr.ready := Bool(false) + for (i <- 0 until arbN) { + clts(i).valid := Bool(false) + when (arbIdx(mngr.bits) === UInt(i)) { + clts(i).valid := mngr.valid + mngr.ready := clts(i).ready + } + clts(i).bits := mngr.bits + clts(i).bits.client_xact_id := managerSourcedClientXactId(mngr.bits) + } + } + + def hookupManagerSourceBroadcast[M <: Data](clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) { + clts.map{ _.valid := mngr.valid } + clts.map{ _.bits := mngr.bits } + mngr.ready := clts.map(_.ready).reduce(_&&_) + } + + def hookupFinish[M <: LogicalNetworkIO[Finish]]( clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) { + val arb = Module(new RRArbiter(mngr.bits, arbN)) + arb.io.in <> clts + mngr <> arb.io.out + } +} + +/** Abstract base case for any Arbiters that have UncachedTileLinkIOs */ +abstract class UncachedTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module + with TileLinkArbiterLike { + val io = new Bundle { + val in = Vec(arbN, new UncachedTileLinkIO).flip + val out = new UncachedTileLinkIO + } + hookupClientSource(io.in.map(_.acquire), io.out.acquire) + hookupFinish(io.in.map(_.finish), io.out.finish) + hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant) +} + +/** Abstract base case for any Arbiters that have cached TileLinkIOs */ +abstract class TileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module + with TileLinkArbiterLike { + val io = new Bundle { + val in = Vec(arbN, new TileLinkIO).flip + val out = new TileLinkIO + } + hookupClientSource(io.in.map(_.acquire), io.out.acquire) + hookupClientSource(io.in.map(_.release), io.out.release) + hookupFinish(io.in.map(_.finish), io.out.finish) + hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe) + hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant) +} + +/** Appends the port index of the arbiter to the client_xact_id */ +trait AppendsArbiterId extends TileLinkArbiterLike { + def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = + Cat(in.client_xact_id, UInt(id, log2Up(arbN))) + def managerSourcedClientXactId(in: ManagerSourcedWithId) = { + /* This shouldn't be necessary, but Chisel3 doesn't emit correct Verilog + * when right shifting by too many bits. See + * https://github.com/ucb-bar/firrtl/issues/69 */ + if (in.client_xact_id.getWidth > log2Up(arbN)) + in.client_xact_id >> log2Up(arbN) + else + UInt(0) + } + def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id(log2Up(arbN)-1,0).toUInt +} + +/** Uses the client_xact_id as is (assumes it has been set to port index) */ +trait PassesId extends TileLinkArbiterLike { + def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = in.client_xact_id + def managerSourcedClientXactId(in: ManagerSourcedWithId) = in.client_xact_id + def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id +} + +/** Overwrites some default client_xact_id with the port idx */ +trait UsesNewId extends TileLinkArbiterLike { + def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = UInt(id, log2Up(arbN)) + def managerSourcedClientXactId(in: ManagerSourcedWithId) = UInt(0) + def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id +} + +// Now we can mix-in thevarious id-generation traits to make concrete arbiter classes +class UncachedTileLinkIOArbiterThatAppendsArbiterId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with AppendsArbiterId +class UncachedTileLinkIOArbiterThatPassesId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with PassesId +class UncachedTileLinkIOArbiterThatUsesNewId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with UsesNewId +class TileLinkIOArbiterThatAppendsArbiterId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with AppendsArbiterId +class TileLinkIOArbiterThatPassesId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with PassesId +class TileLinkIOArbiterThatUsesNewId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with UsesNewId + +/** Concrete uncached client-side arbiter that appends the arbiter's port id to client_xact_id */ +class ClientUncachedTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module with TileLinkArbiterLike with AppendsArbiterId { + val io = new Bundle { + val in = Vec(arbN, new ClientUncachedTileLinkIO).flip + val out = new ClientUncachedTileLinkIO + } + if (arbN > 1) { + hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire) + hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant) + } else { io.out <> io.in.head } +} + +/** Concrete client-side arbiter that appends the arbiter's port id to client_xact_id */ +class ClientTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module with TileLinkArbiterLike with AppendsArbiterId { + val io = new Bundle { + val in = Vec(arbN, new ClientTileLinkIO).flip + val out = new ClientTileLinkIO + } + if (arbN > 1) { + hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire) + hookupClientSourceHeaderless(io.in.map(_.release), io.out.release) + hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe) + hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant) + } else { io.out <> io.in.head } +} diff --git a/uncore/src/main/scala/tilelink/Definitions.scala b/uncore/src/main/scala/tilelink/Definitions.scala new file mode 100644 index 00000000..86e59983 --- /dev/null +++ b/uncore/src/main/scala/tilelink/Definitions.scala @@ -0,0 +1,964 @@ +// See LICENSE for license details. + +package uncore.tilelink +import Chisel._ +import junctions._ +import uncore.coherence.CoherencePolicy +import uncore.Util._ +import scala.math.max +import uncore.constants._ +import cde.{Parameters, Field} + +case object CacheBlockOffsetBits extends Field[Int] +case object AmoAluOperandBits extends Field[Int] + +case object TLId extends Field[String] +case class TLKey(id: String) extends Field[TileLinkParameters] + +/** Parameters exposed to the top-level design, set based on + * external requirements or design space exploration + * + * Coherency policy used to define custom mesage types + * Number of manager agents + * Number of client agents that cache data and use custom [[uncore.Acquire]] types + * Number of client agents that do not cache data and use built-in [[uncore.Acquire]] types + * Maximum number of unique outstanding transactions per client + * Maximum number of clients multiplexed onto a single port + * Maximum number of unique outstanding transactions per manager + * Width of cache block addresses + * Total amount of data per cache block + * Number of data beats per cache block + **/ + +case class TileLinkParameters( + coherencePolicy: CoherencePolicy, + nManagers: Int, + nCachingClients: Int, + nCachelessClients: Int, + maxClientXacts: Int, + maxClientsPerPort: Int, + maxManagerXacts: Int, + dataBits: Int, + dataBeats: Int = 4, + overrideDataBitsPerBeat: Option[Int] = None + ) { + val nClients = nCachingClients + nCachelessClients + val writeMaskBits: Int = ((dataBits / dataBeats) - 1) / 8 + 1 + val dataBitsPerBeat: Int = overrideDataBitsPerBeat.getOrElse(dataBits / dataBeats) +} + + +/** Utility trait for building Modules and Bundles that use TileLink parameters */ +trait HasTileLinkParameters { + implicit val p: Parameters + val tlExternal = p(TLKey(p(TLId))) + val tlCoh = tlExternal.coherencePolicy + val tlNManagers = tlExternal.nManagers + val tlNCachingClients = tlExternal.nCachingClients + val tlNCachelessClients = tlExternal.nCachelessClients + val tlNClients = tlExternal.nClients + val tlClientIdBits = log2Up(tlNClients) + val tlManagerIdBits = log2Up(tlNManagers) + val tlMaxClientXacts = tlExternal.maxClientXacts + val tlMaxClientsPerPort = tlExternal.maxClientsPerPort + val tlMaxManagerXacts = tlExternal.maxManagerXacts + val tlClientXactIdBits = log2Up(tlMaxClientXacts*tlMaxClientsPerPort) + val tlManagerXactIdBits = log2Up(tlMaxManagerXacts) + val tlBlockAddrBits = p(PAddrBits) - p(CacheBlockOffsetBits) + val tlDataBeats = tlExternal.dataBeats + val tlDataBits = tlExternal.dataBitsPerBeat + val tlDataBytes = tlDataBits/8 + val tlWriteMaskBits = tlExternal.writeMaskBits + val tlBeatAddrBits = log2Up(tlDataBeats) + val tlByteAddrBits = log2Up(tlWriteMaskBits) + val tlMemoryOpcodeBits = M_SZ + val tlMemoryOperandSizeBits = MT_SZ + val tlAcquireTypeBits = max(log2Up(Acquire.nBuiltInTypes), + tlCoh.acquireTypeWidth) + val tlAcquireUnionBits = max(tlWriteMaskBits, + (tlByteAddrBits + + tlMemoryOperandSizeBits + + tlMemoryOpcodeBits)) + 1 + val tlGrantTypeBits = max(log2Up(Grant.nBuiltInTypes), + tlCoh.grantTypeWidth) + 1 +/** Whether the underlying physical network preserved point-to-point ordering of messages */ + val tlNetworkPreservesPointToPointOrdering = false + val tlNetworkDoesNotInterleaveBeats = true + val amoAluOperandBits = p(AmoAluOperandBits) + val amoAluOperandBytes = amoAluOperandBits/8 +} + +abstract class TLModule(implicit val p: Parameters) extends Module + with HasTileLinkParameters +abstract class TLBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p) + with HasTileLinkParameters + +/** Base trait for all TileLink channels */ +abstract class TileLinkChannel(implicit p: Parameters) extends TLBundle()(p) { + def hasData(dummy: Int = 0): Bool + def hasMultibeatData(dummy: Int = 0): Bool +} +/** Directionality of message channel. Used to hook up logical network ports to physical network ports */ +abstract class ClientToManagerChannel(implicit p: Parameters) extends TileLinkChannel()(p) +/** Directionality of message channel. Used to hook up logical network ports to physical network ports */ +abstract class ManagerToClientChannel(implicit p: Parameters) extends TileLinkChannel()(p) +/** Directionality of message channel. Used to hook up logical network ports to physical network ports */ +abstract class ClientToClientChannel(implicit p: Parameters) extends TileLinkChannel()(p) // Unused for now + +/** Common signals that are used in multiple channels. + * These traits are useful for type parameterizing bundle wiring functions. + */ + +/** Address of a cache block. */ +trait HasCacheBlockAddress extends HasTileLinkParameters { + val addr_block = UInt(width = tlBlockAddrBits) + + def conflicts(that: HasCacheBlockAddress) = this.addr_block === that.addr_block + def conflicts(addr: UInt) = this.addr_block === addr +} + +/** Sub-block address or beat id of multi-beat data */ +trait HasTileLinkBeatId extends HasTileLinkParameters { + val addr_beat = UInt(width = tlBeatAddrBits) +} + +/* Client-side transaction id. Usually Miss Status Handling Register File index */ +trait HasClientTransactionId extends HasTileLinkParameters { + val client_xact_id = Bits(width = tlClientXactIdBits) +} + +/** Manager-side transaction id. Usually Transaction Status Handling Register File index. */ +trait HasManagerTransactionId extends HasTileLinkParameters { + val manager_xact_id = Bits(width = tlManagerXactIdBits) +} + +/** A single beat of cache block data */ +trait HasTileLinkData extends HasTileLinkBeatId { + val data = UInt(width = tlDataBits) + + def hasData(dummy: Int = 0): Bool + def hasMultibeatData(dummy: Int = 0): Bool + def first(dummy: Int = 0): Bool = !hasMultibeatData() || addr_beat === UInt(0) + def last(dummy: Int = 0): Bool = !hasMultibeatData() || addr_beat === UInt(tlDataBeats-1) +} + +/** An entire cache block of data */ +trait HasTileLinkBlock extends HasTileLinkParameters { + val data_buffer = Vec(tlDataBeats, UInt(width = tlDataBits)) + val wmask_buffer = Vec(tlDataBeats, UInt(width = tlWriteMaskBits)) +} + +/** The id of a client source or destination. Used in managers. */ +trait HasClientId extends HasTileLinkParameters { + val client_id = UInt(width = tlClientIdBits) +} + +trait HasManagerId extends HasTileLinkParameters { + val manager_id = UInt(width = tlManagerIdBits) +} + +trait HasAcquireUnion extends HasTileLinkParameters { + val union = Bits(width = tlAcquireUnionBits) + + // Utility funcs for accessing subblock union: + def isBuiltInType(t: UInt): Bool + val opCodeOff = 1 + val opSizeOff = tlMemoryOpcodeBits + opCodeOff + val addrByteOff = tlMemoryOperandSizeBits + opSizeOff + val addrByteMSB = tlByteAddrBits + addrByteOff + /** Hint whether to allocate the block in any interveneing caches */ + def allocate(dummy: Int = 0) = union(0) + /** Op code for [[uncore.PutAtomic]] operations */ + def op_code(dummy: Int = 0) = Mux( + isBuiltInType(Acquire.putType) || isBuiltInType(Acquire.putBlockType), + M_XWR, union(opSizeOff-1, opCodeOff)) + /** Operand size for [[uncore.PutAtomic]] */ + def op_size(dummy: Int = 0) = union(addrByteOff-1, opSizeOff) + /** Byte address for [[uncore.PutAtomic]] operand */ + def addr_byte(dummy: Int = 0) = union(addrByteMSB-1, addrByteOff) + def amo_offset(dummy: Int = 0) = + if (tlByteAddrBits > log2Up(amoAluOperandBytes)) addr_byte()(tlByteAddrBits-1, log2Up(amoAluOperandBytes)) + else UInt(0) + /** Bit offset of [[uncore.PutAtomic]] operand */ + def amo_shift_bytes(dummy: Int = 0) = UInt(amoAluOperandBytes)*amo_offset() + /** Write mask for [[uncore.Put]], [[uncore.PutBlock]], [[uncore.PutAtomic]] */ + def wmask(dummy: Int = 0): UInt = { + val is_amo = isBuiltInType(Acquire.putAtomicType) + val amo_mask = if (tlByteAddrBits > log2Up(amoAluOperandBytes)) + FillInterleaved(amoAluOperandBytes, UIntToOH(amo_offset())) + else Acquire.fullWriteMask + val is_put = isBuiltInType(Acquire.putBlockType) || isBuiltInType(Acquire.putType) + val put_mask = union(tlWriteMaskBits, 1) + Mux(is_amo, amo_mask, Mux(is_put, put_mask, UInt(0))) + } + /** Full, beat-sized writemask */ + def full_wmask(dummy: Int = 0) = FillInterleaved(8, wmask()) + + /** Is this message a built-in read message */ + def hasPartialWritemask(dummy: Int = 0): Bool = wmask() =/= Acquire.fullWriteMask + +} + +trait HasAcquireType extends HasTileLinkParameters { + val is_builtin_type = Bool() + val a_type = UInt(width = tlAcquireTypeBits) + + /** Message type equality */ + def is(t: UInt) = a_type === t //TODO: make this more opaque; def ===? + + /** Is this message a built-in or custom type */ + def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type + /** Is this message a particular built-in type */ + def isBuiltInType(t: UInt): Bool = is_builtin_type && a_type === t + + /** Does this message refer to subblock operands using info in the Acquire.union subbundle */ + def isSubBlockType(dummy: Int = 0): Bool = isBuiltInType() && a_type.isOneOf(Acquire.typesOnSubBlocks) + + /** Is this message a built-in prefetch message */ + def isPrefetch(dummy: Int = 0): Bool = isBuiltInType() && + (is(Acquire.getPrefetchType) || is(Acquire.putPrefetchType)) + + /** Is this message a built-in atomic message */ + def isAtomic(dummy: Int = 0): Bool = isBuiltInType() && is(Acquire.putAtomicType) + + /** Is this message a built-in read message */ + def isGet(dummy: Int = 0): Bool = isBuiltInType() && (is(Acquire.getType) || is(Acquire.getBlockType)) + + /** Does this message contain data? Assumes that no custom message types have data. */ + def hasData(dummy: Int = 0): Bool = isBuiltInType() && a_type.isOneOf(Acquire.typesWithData) + + /** Does this message contain multiple beats of data? Assumes that no custom message types have data. */ + def hasMultibeatData(dummy: Int = 0): Bool = Bool(tlDataBeats > 1) && isBuiltInType() && + a_type.isOneOf(Acquire.typesWithMultibeatData) + + /** Mapping between each built-in Acquire type and a built-in Grant type. */ + def getBuiltInGrantType(dummy: Int = 0): UInt = Acquire.getBuiltInGrantType(this.a_type) +} + +trait HasProbeType extends HasTileLinkParameters { + val p_type = UInt(width = tlCoh.probeTypeWidth) + + def is(t: UInt) = p_type === t + def hasData(dummy: Int = 0) = Bool(false) + def hasMultibeatData(dummy: Int = 0) = Bool(false) +} + +trait MightBeVoluntary { + def isVoluntary(dummy: Int = 0): Bool +} + +trait HasReleaseType extends HasTileLinkParameters with MightBeVoluntary { + val voluntary = Bool() + val r_type = UInt(width = tlCoh.releaseTypeWidth) + + def is(t: UInt) = r_type === t + def hasData(dummy: Int = 0) = r_type.isOneOf(tlCoh.releaseTypesWithData) + def hasMultibeatData(dummy: Int = 0) = Bool(tlDataBeats > 1) && + r_type.isOneOf(tlCoh.releaseTypesWithData) + def isVoluntary(dummy: Int = 0) = voluntary + def requiresAck(dummy: Int = 0) = !Bool(tlNetworkPreservesPointToPointOrdering) +} + +trait HasGrantType extends HasTileLinkParameters with MightBeVoluntary { + val is_builtin_type = Bool() + val g_type = UInt(width = tlGrantTypeBits) + + // Helper funcs + def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type + def isBuiltInType(t: UInt): Bool = is_builtin_type && g_type === t + def is(t: UInt):Bool = g_type === t + def hasData(dummy: Int = 0): Bool = Mux(isBuiltInType(), + g_type.isOneOf(Grant.typesWithData), + g_type.isOneOf(tlCoh.grantTypesWithData)) + def hasMultibeatData(dummy: Int = 0): Bool = + Bool(tlDataBeats > 1) && Mux(isBuiltInType(), + g_type.isOneOf(Grant.typesWithMultibeatData), + g_type.isOneOf(tlCoh.grantTypesWithData)) + def isVoluntary(dummy: Int = 0): Bool = isBuiltInType() && (g_type === Grant.voluntaryAckType) + def requiresAck(dummy: Int = 0): Bool = !Bool(tlNetworkPreservesPointToPointOrdering) && !isVoluntary() +} + +/** TileLink channel bundle definitions */ + +/** The Acquire channel is used to intiate coherence protocol transactions in + * order to gain access to a cache block's data with certain permissions + * enabled. Messages sent over this channel may be custom types defined by + * a [[uncore.CoherencePolicy]] for cached data accesse or may be built-in types + * used for uncached data accesses. Acquires may contain data for Put or + * PutAtomic built-in types. After sending an Acquire, clients must + * wait for a manager to send them a [[uncore.Grant]] message in response. + */ +class AcquireMetadata(implicit p: Parameters) extends ClientToManagerChannel + with HasCacheBlockAddress + with HasClientTransactionId + with HasTileLinkBeatId + with HasAcquireType + with HasAcquireUnion { + /** Complete physical address for block, beat or operand */ + def full_addr(dummy: Int = 0) = + Cat(this.addr_block, this.addr_beat, + Mux(isBuiltInType() && this.a_type.isOneOf(Acquire.typesWithAddrByte), + this.addr_byte(), UInt(0, tlByteAddrBits))) +} + +/** [[uncore.AcquireMetadata]] with an extra field containing the data beat */ +class Acquire(implicit p: Parameters) extends AcquireMetadata + with HasTileLinkData + +/** [[uncore.AcquireMetadata]] with an extra field containing the entire cache block */ +class BufferedAcquire(implicit p: Parameters) extends AcquireMetadata + with HasTileLinkBlock + +/** [[uncore.Acquire]] with an extra field stating its source id */ +class AcquireFromSrc(implicit p: Parameters) extends Acquire + with HasClientId + +/** [[uncore.BufferedAcquire]] with an extra field stating its source id */ +class BufferedAcquireFromSrc(implicit p: Parameters) extends BufferedAcquire + with HasClientId + +/** Used to track metadata for transactions where multiple secondary misses have been merged + * and handled by a single transaction tracker. + */ +class SecondaryMissInfo(implicit p: Parameters) extends TLBundle + with HasClientTransactionId + with HasTileLinkBeatId + with HasClientId + with HasAcquireType + +/** Contains definitions of the the built-in Acquire types and a factory + * for [[uncore.Acquire]] + * + * In general you should avoid using this factory directly and use + * [[uncore.ClientMetadata.makeAcquire]] for custom cached Acquires and + * [[uncore.Get]], [[uncore.Put]], etc. for built-in uncached Acquires. + * + * @param is_builtin_type built-in or custom type message? + * @param a_type built-in type enum or custom type enum + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (which beat) + * @param data data being put outwards + * @param union additional fields used for uncached types + */ +object Acquire { + val nBuiltInTypes = 7 + //TODO: Use Enum + def getType = UInt("b000") // Get a single beat of data + def getBlockType = UInt("b001") // Get a whole block of data + def putType = UInt("b010") // Put a single beat of data + def putBlockType = UInt("b011") // Put a whole block of data + def putAtomicType = UInt("b100") // Perform an atomic memory op + def getPrefetchType = UInt("b101") // Prefetch a whole block of data + def putPrefetchType = UInt("b110") // Prefetch a whole block of data, with intent to write + def typesWithData = Vec(putType, putBlockType, putAtomicType) + def typesWithMultibeatData = Vec(putBlockType) + def typesOnSubBlocks = Vec(putType, getType, putAtomicType) + def typesWithAddrByte = Vec(getType, putAtomicType) + + /** Mapping between each built-in Acquire type and a built-in Grant type. */ + def getBuiltInGrantType(a_type: UInt): UInt = { + MuxLookup(a_type, Grant.putAckType, Array( + Acquire.getType -> Grant.getDataBeatType, + Acquire.getBlockType -> Grant.getDataBlockType, + Acquire.putType -> Grant.putAckType, + Acquire.putBlockType -> Grant.putAckType, + Acquire.putAtomicType -> Grant.getDataBeatType, + Acquire.getPrefetchType -> Grant.prefetchAckType, + Acquire.putPrefetchType -> Grant.prefetchAckType)) + } + + def makeUnion( + a_type: UInt, + addr_byte: UInt, + operand_size: UInt, + opcode: UInt, + wmask: UInt, + alloc: Bool) + (implicit p: Parameters): UInt = { + + val tlExternal = p(TLKey(p(TLId))) + val tlWriteMaskBits = tlExternal.writeMaskBits + val tlByteAddrBits = log2Up(tlWriteMaskBits) + + // These had better be the right size when we cat them together! + val my_addr_byte = (UInt(0, tlByteAddrBits) | addr_byte)(tlByteAddrBits-1, 0) + val my_operand_size = (UInt(0, MT_SZ) | operand_size)(MT_SZ-1, 0) + val my_opcode = (UInt(0, M_SZ) | opcode)(M_SZ-1, 0) + val my_wmask = (UInt(0, tlWriteMaskBits) | wmask)(tlWriteMaskBits-1, 0) + + MuxLookup(a_type, UInt(0), Array( + Acquire.getType -> Cat(my_addr_byte, my_operand_size, my_opcode, alloc), + Acquire.getBlockType -> Cat(my_operand_size, my_opcode, alloc), + Acquire.putType -> Cat(my_wmask, alloc), + Acquire.putBlockType -> Cat(my_wmask, alloc), + Acquire.putAtomicType -> Cat(my_addr_byte, my_operand_size, my_opcode, alloc), + Acquire.getPrefetchType -> Cat(M_XRD, alloc), + Acquire.putPrefetchType -> Cat(M_XWR, alloc))) + } + + def fullWriteMask(implicit p: Parameters) = SInt(-1, width = p(TLKey(p(TLId))).writeMaskBits).toUInt + + // Most generic constructor + def apply( + is_builtin_type: Bool, + a_type: Bits, + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0), + union: UInt = UInt(0)) + (implicit p: Parameters): Acquire = { + val acq = Wire(new Acquire) + acq.is_builtin_type := is_builtin_type + acq.a_type := a_type + acq.client_xact_id := client_xact_id + acq.addr_block := addr_block + acq.addr_beat := addr_beat + acq.data := data + acq.union := union + acq + } + + // Copy constructor + def apply(a: Acquire): Acquire = { + val acq = Wire(new Acquire()(a.p)) + acq := a + acq + } +} + +object BuiltInAcquireBuilder { + def apply( + a_type: UInt, + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0), + addr_byte: UInt = UInt(0), + operand_size: UInt = MT_Q, + opcode: UInt = UInt(0), + wmask: UInt = UInt(0), + alloc: Bool = Bool(true)) + (implicit p: Parameters): Acquire = { + Acquire( + is_builtin_type = Bool(true), + a_type = a_type, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + data = data, + union = Acquire.makeUnion(a_type, addr_byte, operand_size, opcode, wmask, alloc)) + } +} + +/** Get a single beat of data from the outer memory hierarchy + * + * The client can hint whether he block containing this beat should be + * allocated in the intervening levels of the hierarchy. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (which beat) + * @param addr_byte sub-block address (which byte) + * @param operand_size {byte, half, word, double} from [[uncore.MemoryOpConstants]] + * @param alloc hint whether the block should be allocated in intervening caches + */ +object Get { + def apply( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt, + alloc: Bool = Bool(true)) + (implicit p: Parameters): Acquire = { + BuiltInAcquireBuilder( + a_type = Acquire.getType, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + opcode = M_XRD, + alloc = alloc) + } + def apply( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt, + addr_byte: UInt, + operand_size: UInt, + alloc: Bool) + (implicit p: Parameters): Acquire = { + BuiltInAcquireBuilder( + a_type = Acquire.getType, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + addr_byte = addr_byte, + operand_size = operand_size, + opcode = M_XRD, + alloc = alloc) + } +} + +/** Get a whole cache block of data from the outer memory hierarchy + * + * The client can hint whether the block should be allocated in the + * intervening levels of the hierarchy. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param alloc hint whether the block should be allocated in intervening caches + */ +object GetBlock { + def apply( + client_xact_id: UInt = UInt(0), + addr_block: UInt, + alloc: Bool = Bool(true)) + (implicit p: Parameters): Acquire = { + BuiltInAcquireBuilder( + a_type = Acquire.getBlockType, + client_xact_id = client_xact_id, + addr_block = addr_block, + opcode = M_XRD, + alloc = alloc) + } +} + +/** Prefetch a cache block into the next-outermost level of the memory hierarchy + * with read permissions. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + */ +object GetPrefetch { + def apply( + client_xact_id: UInt, + addr_block: UInt) + (implicit p: Parameters): Acquire = { + BuiltInAcquireBuilder( + a_type = Acquire.getPrefetchType, + client_xact_id = client_xact_id, + addr_block = addr_block) + } +} + +/** Put a single beat of data into the outer memory hierarchy + * + * The block will be allocated in the next-outermost level of the hierarchy. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (which beat) + * @param data data being refilled to the original requestor + * @param wmask per-byte write mask for this beat + * @param alloc hint whether the block should be allocated in intervening caches + */ +object Put { + def apply( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt, + data: UInt, + wmask: Option[UInt]= None, + alloc: Bool = Bool(true)) + (implicit p: Parameters): Acquire = { + BuiltInAcquireBuilder( + a_type = Acquire.putType, + addr_block = addr_block, + addr_beat = addr_beat, + client_xact_id = client_xact_id, + data = data, + wmask = wmask.getOrElse(Acquire.fullWriteMask), + alloc = alloc) + } +} + +/** Put a whole cache block of data into the outer memory hierarchy + * + * If the write mask is not full, the block will be allocated in the + * next-outermost level of the hierarchy. If the write mask is full, the + * client can hint whether the block should be allocated or not. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (which beat of several) + * @param data data being refilled to the original requestor + * @param wmask per-byte write mask for this beat + * @param alloc hint whether the block should be allocated in intervening caches + */ +object PutBlock { + def apply( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt, + data: UInt, + wmask: Option[UInt] = None, + alloc: Bool = Bool(true)) + (implicit p: Parameters): Acquire = { + BuiltInAcquireBuilder( + a_type = Acquire.putBlockType, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + data = data, + wmask = wmask.getOrElse(Acquire.fullWriteMask), + alloc = alloc) + } +} + +/** Prefetch a cache block into the next-outermost level of the memory hierarchy + * with write permissions. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + */ +object PutPrefetch { + def apply( + client_xact_id: UInt, + addr_block: UInt) + (implicit p: Parameters): Acquire = { + BuiltInAcquireBuilder( + a_type = Acquire.putPrefetchType, + client_xact_id = client_xact_id, + addr_block = addr_block) + } +} + +/** Perform an atomic memory operation in the next-outermost level of the memory hierarchy + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (within which beat) + * @param addr_byte sub-block address (which byte) + * @param atomic_opcode {swap, add, xor, and, min, max, minu, maxu} from [[uncore.MemoryOpConstants]] + * @param operand_size {byte, half, word, double} from [[uncore.MemoryOpConstants]] + * @param data source operand data + */ +object PutAtomic { + def apply( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt, + addr_byte: UInt, + atomic_opcode: UInt, + operand_size: UInt, + data: UInt) + (implicit p: Parameters): Acquire = { + BuiltInAcquireBuilder( + a_type = Acquire.putAtomicType, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + data = data, + addr_byte = addr_byte, + operand_size = operand_size, + opcode = atomic_opcode) + } +} + +/** The Probe channel is used to force clients to release data or cede permissions + * on a cache block. Clients respond to Probes with [[uncore.Release]] messages. + * The available types of Probes are customized by a particular + * [[uncore.CoherencePolicy]]. + */ +class Probe(implicit p: Parameters) extends ManagerToClientChannel + with HasCacheBlockAddress + with HasProbeType + +/** [[uncore.Probe]] with an extra field stating its destination id */ +class ProbeToDst(implicit p: Parameters) extends Probe()(p) with HasClientId + +/** Contains factories for [[uncore.Probe]] and [[uncore.ProbeToDst]] + * + * In general you should avoid using these factories directly and use + * [[uncore.ManagerMetadata.makeProbe(UInt,Acquire)* makeProbe]] instead. + * + * @param dst id of client to which probe should be sent + * @param p_type custom probe type + * @param addr_block address of the cache block + */ +object Probe { + def apply(p_type: UInt, addr_block: UInt)(implicit p: Parameters): Probe = { + val prb = Wire(new Probe) + prb.p_type := p_type + prb.addr_block := addr_block + prb + } + def apply(dst: UInt, p_type: UInt, addr_block: UInt)(implicit p: Parameters): ProbeToDst = { + val prb = Wire(new ProbeToDst) + prb.client_id := dst + prb.p_type := p_type + prb.addr_block := addr_block + prb + } +} + +/** The Release channel is used to release data or permission back to the manager + * in response to [[uncore.Probe]] messages. It can also be used to voluntarily + * write back data, for example in the event that dirty data must be evicted on + * a cache miss. The available types of Release messages are always customized by + * a particular [[uncore.CoherencePolicy]]. Releases may contain data or may be + * simple acknowledgements. Voluntary Releases are acknowledged with [[uncore.Grant Grants]]. + */ +class ReleaseMetadata(implicit p: Parameters) extends ClientToManagerChannel + with HasTileLinkBeatId + with HasCacheBlockAddress + with HasClientTransactionId + with HasReleaseType { + def full_addr(dummy: Int = 0) = Cat(this.addr_block, this.addr_beat, UInt(0, width = tlByteAddrBits)) +} + +/** [[uncore.ReleaseMetadata]] with an extra field containing the data beat */ +class Release(implicit p: Parameters) extends ReleaseMetadata + with HasTileLinkData + +/** [[uncore.ReleaseMetadata]] with an extra field containing the entire cache block */ +class BufferedRelease(implicit p: Parameters) extends ReleaseMetadata + with HasTileLinkBlock + +/** [[uncore.Release]] with an extra field stating its source id */ +class ReleaseFromSrc(implicit p: Parameters) extends Release + with HasClientId + +/** [[uncore.BufferedRelease]] with an extra field stating its source id */ +class BufferedReleaseFromSrc(implicit p: Parameters) extends BufferedRelease + with HasClientId + +/** Contains a [[uncore.Release]] factory + * + * In general you should avoid using this factory directly and use + * [[uncore.ClientMetadata.makeRelease]] instead. + * + * @param voluntary is this a voluntary writeback + * @param r_type type enum defined by coherence protocol + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat beat id of the data + * @param data data being written back + */ +object Release { + def apply( + voluntary: Bool, + r_type: UInt, + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt, + data: UInt) + (implicit p: Parameters): Release = { + val rel = Wire(new Release) + rel.r_type := r_type + rel.client_xact_id := client_xact_id + rel.addr_block := addr_block + rel.addr_beat := addr_beat + rel.data := data + rel.voluntary := voluntary + rel + } + + def apply( + src: UInt, + voluntary: Bool, + r_type: UInt, + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)) + (implicit p: Parameters): ReleaseFromSrc = { + val rel = Wire(new ReleaseFromSrc) + rel.client_id := src + rel.voluntary := voluntary + rel.r_type := r_type + rel.client_xact_id := client_xact_id + rel.addr_block := addr_block + rel.addr_beat := addr_beat + rel.data := data + rel + } +} + +/** The Grant channel is used to refill data or grant permissions requested of the + * manager agent via an [[uncore.Acquire]] message. It is also used to acknowledge + * the receipt of voluntary writeback from clients in the form of [[uncore.Release]] + * messages. There are built-in Grant messages used for Gets and Puts, and + * coherence policies may also define custom Grant types. Grants may contain data + * or may be simple acknowledgements. Grants are responded to with [[uncore.Finish]]. + */ +class GrantMetadata(implicit p: Parameters) extends ManagerToClientChannel + with HasTileLinkBeatId + with HasClientTransactionId + with HasManagerTransactionId + with HasGrantType { + def makeFinish(dummy: Int = 0): Finish = { + val f = Wire(new Finish) + f.manager_xact_id := this.manager_xact_id + f + } +} + +/** [[uncore.GrantMetadata]] with an extra field containing a single beat of data */ +class Grant(implicit p: Parameters) extends GrantMetadata + with HasTileLinkData + +/** [[uncore.Grant]] with an extra field stating its destination */ +class GrantToDst(implicit p: Parameters) extends Grant + with HasClientId + +/** [[uncore.Grant]] with an extra field stating its destination */ +class GrantFromSrc(implicit p: Parameters) extends Grant + with HasManagerId { + override def makeFinish(dummy: Int = 0): FinishToDst = { + val f = Wire(new FinishToDst) + f.manager_xact_id := this.manager_xact_id + f.manager_id := this.manager_id + f + } +} + +/** [[uncore.GrantMetadata]] with an extra field containing an entire cache block */ +class BufferedGrant(implicit p: Parameters) extends GrantMetadata + with HasTileLinkBlock + +/** [[uncore.BufferedGrant]] with an extra field stating its destination */ +class BufferedGrantToDst(implicit p: Parameters) extends BufferedGrant + with HasClientId + +/** Contains definitions of the the built-in grant types and factories + * for [[uncore.Grant]] and [[uncore.GrantToDst]] + * + * In general you should avoid using these factories directly and use + * [[uncore.ManagerMetadata.makeGrant(uncore.AcquireFromSrc* makeGrant]] instead. + * + * @param dst id of client to which grant should be sent + * @param is_builtin_type built-in or custom type message? + * @param g_type built-in type enum or custom type enum + * @param client_xact_id client's transaction id + * @param manager_xact_id manager's transaction id + * @param addr_beat beat id of the data + * @param data data being refilled to the original requestor + */ +object Grant { + val nBuiltInTypes = 5 + def voluntaryAckType = UInt("b000") // For acking Releases + def prefetchAckType = UInt("b001") // For acking any kind of Prefetch + def putAckType = UInt("b011") // For acking any kind of non-prfetch Put + def getDataBeatType = UInt("b100") // Supplying a single beat of Get + def getDataBlockType = UInt("b101") // Supplying all beats of a GetBlock + def typesWithData = Vec(getDataBlockType, getDataBeatType) + def typesWithMultibeatData= Vec(getDataBlockType) + + def apply( + is_builtin_type: Bool, + g_type: UInt, + client_xact_id: UInt, + manager_xact_id: UInt, + addr_beat: UInt, + data: UInt) + (implicit p: Parameters): Grant = { + val gnt = Wire(new Grant) + gnt.is_builtin_type := is_builtin_type + gnt.g_type := g_type + gnt.client_xact_id := client_xact_id + gnt.manager_xact_id := manager_xact_id + gnt.addr_beat := addr_beat + gnt.data := data + gnt + } + + def apply( + dst: UInt, + is_builtin_type: Bool, + g_type: UInt, + client_xact_id: UInt, + manager_xact_id: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)) + (implicit p: Parameters): GrantToDst = { + val gnt = Wire(new GrantToDst) + gnt.client_id := dst + gnt.is_builtin_type := is_builtin_type + gnt.g_type := g_type + gnt.client_xact_id := client_xact_id + gnt.manager_xact_id := manager_xact_id + gnt.addr_beat := addr_beat + gnt.data := data + gnt + } +} + +/** The Finish channel is used to provide a global ordering of transactions + * in networks that do not guarantee point-to-point ordering of messages. + * A Finsish message is sent as acknowledgement of receipt of a [[uncore.Grant]]. + * When a Finish message is received, a manager knows it is safe to begin + * processing other transactions that touch the same cache block. + */ +class Finish(implicit p: Parameters) extends ClientToManagerChannel()(p) + with HasManagerTransactionId { + def hasData(dummy: Int = 0) = Bool(false) + def hasMultibeatData(dummy: Int = 0) = Bool(false) +} + +/** [[uncore.Finish]] with an extra field stating its destination */ +class FinishToDst(implicit p: Parameters) extends Finish + with HasManagerId + +/** Complete IO definition for incoherent TileLink, including networking headers */ +class UncachedTileLinkIO(implicit p: Parameters) extends TLBundle()(p) { + val acquire = new DecoupledIO(new LogicalNetworkIO(new Acquire)) + val grant = new DecoupledIO(new LogicalNetworkIO(new Grant)).flip + val finish = new DecoupledIO(new LogicalNetworkIO(new Finish)) +} + +/** Complete IO definition for coherent TileLink, including networking headers */ +class TileLinkIO(implicit p: Parameters) extends UncachedTileLinkIO()(p) { + val probe = new DecoupledIO(new LogicalNetworkIO(new Probe)).flip + val release = new DecoupledIO(new LogicalNetworkIO(new Release)) +} + +/** This version of UncachedTileLinkIO does not contain network headers. + * It is intended for use within client agents. + * + * Headers are provided in the top-level that instantiates the clients and network, + * probably using a [[uncore.ClientTileLinkNetworkPort]] module. + * By eliding the header subbundles within the clients we can enable + * hierarchical P-and-R while minimizing unconnected port errors in GDS. + * + * Secondly, this version of the interface elides [[uncore.Finish]] messages, with the + * assumption that a [[uncore.FinishUnit]] has been coupled to the TileLinkIO port + * to deal with acking received [[uncore.Grant Grants]]. + */ +class ClientUncachedTileLinkIO(implicit p: Parameters) extends TLBundle()(p) { + val acquire = new DecoupledIO(new Acquire) + val grant = new DecoupledIO(new Grant).flip +} + +/** This version of TileLinkIO does not contain network headers. + * It is intended for use within client agents. + */ +class ClientTileLinkIO(implicit p: Parameters) extends TLBundle()(p) { + val acquire = new DecoupledIO(new Acquire) + val probe = new DecoupledIO(new Probe).flip + val release = new DecoupledIO(new Release) + val grant = new DecoupledIO(new GrantFromSrc).flip + val finish = new DecoupledIO(new FinishToDst) +} + +/** This version of TileLinkIO does not contain network headers, but + * every channel does include an extra client_id subbundle. + * It is intended for use within Management agents. + * + * Managers need to track where [[uncore.Acquire]] and [[uncore.Release]] messages + * originated so that they can send a [[uncore.Grant]] to the right place. + * Similarly they must be able to issues Probes to particular clients. + * However, we'd still prefer to have [[uncore.ManagerTileLinkNetworkPort]] fill in + * the header.src to enable hierarchical p-and-r of the managers. Additionally, + * coherent clients might be mapped to random network port ids, and we'll leave it to the + * [[uncore.ManagerTileLinkNetworkPort]] to apply the correct mapping. Managers do need to + * see Finished so they know when to allow new transactions on a cache + * block to proceed. + */ +class ManagerTileLinkIO(implicit p: Parameters) extends TLBundle()(p) { + val acquire = new DecoupledIO(new AcquireFromSrc).flip + val grant = new DecoupledIO(new GrantToDst) + val finish = new DecoupledIO(new Finish).flip + val probe = new DecoupledIO(new ProbeToDst) + val release = new DecoupledIO(new ReleaseFromSrc).flip +} diff --git a/uncore/src/main/scala/tilelink/Interconnect.scala b/uncore/src/main/scala/tilelink/Interconnect.scala new file mode 100644 index 00000000..353dbb80 --- /dev/null +++ b/uncore/src/main/scala/tilelink/Interconnect.scala @@ -0,0 +1,386 @@ +package uncore.tilelink + +import Chisel._ +import junctions._ +import scala.collection.mutable.ArraySeq +import uncore.util._ +import cde.{Parameters, Field} + + +/** PortedTileLinkNetworks combine a TileLink protocol with a particular physical + * network implementation. + * + * Specifically, they provide mappings between ClientTileLinkIO/ + * ManagerTileLinkIO channels and LogicalNetwork ports (i.e. generic + * TileLinkIO with networking headers). Channels coming into the network have + * appropriate networking headers appended and outgoing channels have their + * headers stripped. + * + * @constructor base class constructor for Ported TileLink NoC + * @param addrToManagerId a mapping from a physical address to the network + * id of a coherence manager + * @param sharerToClientId a mapping from the id of a particular coherent + * client (as determined by e.g. the directory) and the network id + * of that client + * @param clientDepths the depths of the queue that should be used to buffer + * each channel on the client side of the network + * @param managerDepths the depths of the queue that should be used to buffer + * each channel on the manager side of the network + */ +abstract class PortedTileLinkNetwork( + addrToManagerId: UInt => UInt, + sharerToClientId: UInt => UInt, + clientDepths: TileLinkDepths, + managerDepths: TileLinkDepths) + (implicit p: Parameters) extends TLModule()(p) { + val nClients = tlNClients + val nManagers = tlNManagers + val io = new Bundle { + val clients_cached = Vec(tlNCachingClients, new ClientTileLinkIO).flip + val clients_uncached = Vec(tlNCachelessClients, new ClientUncachedTileLinkIO).flip + val managers = Vec(nManagers, new ManagerTileLinkIO).flip + } + + val clients = (io.clients_cached ++ io.clients_uncached).zipWithIndex.map { + case (io, idx) => { + val qs = Module(new TileLinkEnqueuer(clientDepths)) + io match { + case c: ClientTileLinkIO => { + val port = Module(new ClientTileLinkNetworkPort(idx, addrToManagerId)) + port.io.client <> c + qs.io.client <> port.io.network + qs.io.manager + } + case u: ClientUncachedTileLinkIO => { + val port = Module(new ClientUncachedTileLinkNetworkPort(idx, addrToManagerId)) + port.io.client <> u + qs.io.client <> port.io.network + qs.io.manager + } + } + } + } + + val managers = io.managers.zipWithIndex.map { + case (m, i) => { + val port = Module(new ManagerTileLinkNetworkPort(i, sharerToClientId)) + val qs = Module(new TileLinkEnqueuer(managerDepths)) + port.io.manager <> m + port.io.network <> qs.io.manager + qs.io.client + } + } +} + +/** A simple arbiter for each channel that also deals with header-based routing. + * Assumes a single manager agent. */ +class PortedTileLinkArbiter( + sharerToClientId: UInt => UInt = (u: UInt) => u, + clientDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0), + managerDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0)) + (implicit p: Parameters) + extends PortedTileLinkNetwork(u => UInt(0), sharerToClientId, clientDepths, managerDepths)(p) + with TileLinkArbiterLike + with PassesId { + val arbN = nClients + require(nManagers == 1) + if(arbN > 1) { + hookupClientSource(clients.map(_.acquire), managers.head.acquire) + hookupClientSource(clients.map(_.release), managers.head.release) + hookupFinish(clients.map(_.finish), managers.head.finish) + hookupManagerSourceWithHeader(clients.map(_.probe), managers.head.probe) + hookupManagerSourceWithHeader(clients.map(_.grant), managers.head.grant) + } else { + managers.head <> clients.head + } +} + +/** Provides a separate physical crossbar for each channel. Assumes multiple manager + * agents. Managers are assigned to higher physical network port ids than + * clients, and translations between logical network id and physical crossbar + * port id are done automatically. + */ +class PortedTileLinkCrossbar( + addrToManagerId: UInt => UInt = u => UInt(0), + sharerToClientId: UInt => UInt = u => u, + clientDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0), + managerDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0)) + (implicit p: Parameters) + extends PortedTileLinkNetwork(addrToManagerId, sharerToClientId, clientDepths, managerDepths)(p) { + val n = p(LNEndpoints) + val phyHdrWidth = log2Up(n) + val count = tlDataBeats + // Actually instantiate the particular networks required for TileLink + val acqNet = Module(new BasicBus(CrossbarConfig(n, new Acquire, count, Some((a: PhysicalNetworkIO[Acquire]) => a.payload.hasMultibeatData())))) + val relNet = Module(new BasicBus(CrossbarConfig(n, new Release, count, Some((r: PhysicalNetworkIO[Release]) => r.payload.hasMultibeatData())))) + val prbNet = Module(new BasicBus(CrossbarConfig(n, new Probe))) + val gntNet = Module(new BasicBus(CrossbarConfig(n, new Grant, count, Some((g: PhysicalNetworkIO[Grant]) => g.payload.hasMultibeatData())))) + val ackNet = Module(new BasicBus(CrossbarConfig(n, new Finish))) + + // Aliases for the various network IO bundle types + type PNIO[T <: Data] = DecoupledIO[PhysicalNetworkIO[T]] + type LNIO[T <: Data] = DecoupledIO[LogicalNetworkIO[T]] + type FromCrossbar[T <: Data] = PNIO[T] => LNIO[T] + type ToCrossbar[T <: Data] = LNIO[T] => PNIO[T] + + // Shims for converting between logical network IOs and physical network IOs + def crossbarToManagerShim[T <: Data](in: PNIO[T]): LNIO[T] = { + val out = DefaultFromPhysicalShim(in) + out.bits.header.src := in.bits.header.src - UInt(nManagers) + out + } + def crossbarToClientShim[T <: Data](in: PNIO[T]): LNIO[T] = { + val out = DefaultFromPhysicalShim(in) + out.bits.header.dst := in.bits.header.dst - UInt(nManagers) + out + } + def managerToCrossbarShim[T <: Data](in: LNIO[T]): PNIO[T] = { + val out = DefaultToPhysicalShim(n, in) + out.bits.header.dst := in.bits.header.dst + UInt(nManagers, phyHdrWidth) + out + } + def clientToCrossbarShim[T <: Data](in: LNIO[T]): PNIO[T] = { + val out = DefaultToPhysicalShim(n, in) + out.bits.header.src := in.bits.header.src + UInt(nManagers, phyHdrWidth) + out + } + + // Make an individual connection between virtual and physical ports using + // a particular shim. Also pin the unused Decoupled control signal low. + def doDecoupledInputHookup[T <: Data](phys_in: PNIO[T], phys_out: PNIO[T], log_io: LNIO[T], shim: ToCrossbar[T]) = { + val s = shim(log_io) + phys_in.valid := s.valid + phys_in.bits := s.bits + s.ready := phys_in.ready + phys_out.ready := Bool(false) + } + + def doDecoupledOutputHookup[T <: Data](phys_in: PNIO[T], phys_out: PNIO[T], log_io: LNIO[T], shim: FromCrossbar[T]) = { + val s = shim(phys_out) + log_io.valid := s.valid + log_io.bits := s.bits + s.ready := log_io.ready + phys_in.valid := Bool(false) + } + + //Hookup all instances of a particular subbundle of TileLink + def doDecoupledHookups[T <: Data](physIO: BasicCrossbarIO[T], getLogIO: TileLinkIO => LNIO[T]) = { + physIO.in.head.bits.payload match { + case c: ClientToManagerChannel => { + managers.zipWithIndex.map { case (i, id) => + doDecoupledOutputHookup(physIO.in(id), physIO.out(id), getLogIO(i), crossbarToManagerShim[T]) + } + clients.zipWithIndex.map { case (i, id) => + doDecoupledInputHookup(physIO.in(id+nManagers), physIO.out(id+nManagers), getLogIO(i), clientToCrossbarShim[T]) + } + } + case m: ManagerToClientChannel => { + managers.zipWithIndex.map { case (i, id) => + doDecoupledInputHookup(physIO.in(id), physIO.out(id), getLogIO(i), managerToCrossbarShim[T]) + } + clients.zipWithIndex.map { case (i, id) => + doDecoupledOutputHookup(physIO.in(id+nManagers), physIO.out(id+nManagers), getLogIO(i), crossbarToClientShim[T]) + } + } + } + } + + doDecoupledHookups(acqNet.io, (tl: TileLinkIO) => tl.acquire) + doDecoupledHookups(relNet.io, (tl: TileLinkIO) => tl.release) + doDecoupledHookups(prbNet.io, (tl: TileLinkIO) => tl.probe) + doDecoupledHookups(gntNet.io, (tl: TileLinkIO) => tl.grant) + doDecoupledHookups(ackNet.io, (tl: TileLinkIO) => tl.finish) +} + +class ClientUncachedTileLinkIORouter( + nOuter: Int, routeSel: UInt => UInt)(implicit p: Parameters) + extends TLModule { + + val io = new Bundle { + val in = (new ClientUncachedTileLinkIO).flip + val out = Vec(nOuter, new ClientUncachedTileLinkIO) + } + + val acq_route = routeSel(io.in.acquire.bits.full_addr()) + + io.in.acquire.ready := Bool(false) + + io.out.zipWithIndex.foreach { case (out, i) => + out.acquire.valid := io.in.acquire.valid && acq_route(i) + out.acquire.bits := io.in.acquire.bits + when (acq_route(i)) { io.in.acquire.ready := out.acquire.ready } + } + + val gnt_arb = Module(new LockingRRArbiter( + new Grant, nOuter, tlDataBeats, Some((gnt: Grant) => gnt.hasMultibeatData()))) + gnt_arb.io.in <> io.out.map(_.grant) + io.in.grant <> gnt_arb.io.out + + assert(!io.in.acquire.valid || acq_route.orR, "No valid route") +} + +class TileLinkInterconnectIO(val nInner: Int, val nOuter: Int) + (implicit p: Parameters) extends Bundle { + val in = Vec(nInner, new ClientUncachedTileLinkIO).flip + val out = Vec(nOuter, new ClientUncachedTileLinkIO) +} + +class ClientUncachedTileLinkIOCrossbar( + nInner: Int, nOuter: Int, routeSel: UInt => UInt) + (implicit p: Parameters) extends TLModule { + + val io = new TileLinkInterconnectIO(nInner, nOuter) + + if (nInner == 1) { + val router = Module(new ClientUncachedTileLinkIORouter(nOuter, routeSel)) + router.io.in <> io.in.head + io.out <> router.io.out + } else { + val routers = List.fill(nInner) { + Module(new ClientUncachedTileLinkIORouter(nOuter, routeSel)) } + val arbiters = List.fill(nOuter) { + Module(new ClientUncachedTileLinkIOArbiter(nInner)) } + + for (i <- 0 until nInner) { + routers(i).io.in <> io.in(i) + } + + for (i <- 0 until nOuter) { + arbiters(i).io.in <> routers.map(r => r.io.out(i)) + io.out(i) <> arbiters(i).io.out + } + } +} + +abstract class TileLinkInterconnect(implicit p: Parameters) extends TLModule()(p) { + val nInner: Int + val nOuter: Int + + lazy val io = new TileLinkInterconnectIO(nInner, nOuter) +} + +class TileLinkRecursiveInterconnect(val nInner: Int, addrMap: AddrMap) + (implicit p: Parameters) extends TileLinkInterconnect()(p) { + def port(name: String) = io.out(addrMap.port(name)) + val nOuter = addrMap.numSlaves + val routeSel = (addr: UInt) => + Cat(addrMap.entries.map(e => addrMap(e.name).containsAddress(addr)).reverse) + + val xbar = Module(new ClientUncachedTileLinkIOCrossbar(nInner, addrMap.length, routeSel)) + xbar.io.in <> io.in + + io.out <> addrMap.entries.zip(xbar.io.out).flatMap { + case (entry, xbarOut) => { + entry.region match { + case submap: AddrMap if submap.isEmpty => + xbarOut.acquire.ready := Bool(false) + xbarOut.grant.valid := Bool(false) + None + case submap: AddrMap => + val ic = Module(new TileLinkRecursiveInterconnect(1, submap)) + ic.io.in.head <> xbarOut + ic.io.out + case _ => + Some(xbarOut) + } + } + } +} + +class TileLinkMemoryInterconnect( + nBanksPerChannel: Int, nChannels: Int) + (implicit p: Parameters) extends TileLinkInterconnect()(p) { + + val nBanks = nBanksPerChannel * nChannels + val nInner = nBanks + val nOuter = nChannels + + def connectChannel(outer: ClientUncachedTileLinkIO, inner: ClientUncachedTileLinkIO) { + outer <> inner + outer.acquire.bits.addr_block := inner.acquire.bits.addr_block >> UInt(log2Ceil(nChannels)) + } + + for (i <- 0 until nChannels) { + /* Bank assignments to channels are strided so that consecutive banks + * map to different channels. That way, consecutive cache lines also + * map to different channels */ + val banks = (i until nBanks by nChannels).map(j => io.in(j)) + + val channelArb = Module(new ClientUncachedTileLinkIOArbiter(nBanksPerChannel)) + channelArb.io.in <> banks + connectChannel(io.out(i), channelArb.io.out) + } +} + +/** Allows users to switch between various memory configurations. Note that + * this is a dangerous operation: not only does switching the select input to + * this module violate TileLink, it also causes the memory of the machine to + * become garbled. It's expected that select only changes at boot time, as + * part of the memory controller configuration. */ +class TileLinkMemorySelectorIO(val nBanks: Int, val maxMemChannels: Int, nConfigs: Int) + (implicit p: Parameters) + extends TileLinkInterconnectIO(nBanks, maxMemChannels) { + val select = UInt(INPUT, width = log2Up(nConfigs)) + override def cloneType = + new TileLinkMemorySelectorIO(nBanks, maxMemChannels, nConfigs).asInstanceOf[this.type] +} + +class TileLinkMemorySelector(nBanks: Int, maxMemChannels: Int, configs: Seq[Int]) + (implicit p: Parameters) + extends TileLinkInterconnect()(p) { + val nInner = nBanks + val nOuter = maxMemChannels + val nConfigs = configs.size + + override lazy val io = new TileLinkMemorySelectorIO(nBanks, maxMemChannels, nConfigs) + + def muxOnSelect[T <: Data](up: DecoupledIO[T], dn: DecoupledIO[T], active: Bool): Unit = { + when (active) { dn.bits := up.bits } + when (active) { up.ready := dn.ready } + when (active) { dn.valid := up.valid } + } + + def muxOnSelect(up: ClientUncachedTileLinkIO, dn: ClientUncachedTileLinkIO, active: Bool): Unit = { + muxOnSelect(up.acquire, dn.acquire, active) + muxOnSelect(dn.grant, up.grant, active) + } + + def muxOnSelect(up: Vec[ClientUncachedTileLinkIO], dn: Vec[ClientUncachedTileLinkIO], active: Bool) : Unit = { + for (i <- 0 until up.size) + muxOnSelect(up(i), dn(i), active) + } + + /* Disconnects a vector of TileLink ports, which involves setting them to + * invalid. Due to Chisel reasons, we need to also set the bits to 0 (since + * there can't be any unconnected inputs). */ + def disconnectOuter(outer: Vec[ClientUncachedTileLinkIO]) = { + outer.foreach{ m => + m.acquire.valid := Bool(false) + m.acquire.bits := m.acquire.bits.fromBits(UInt(0)) + m.grant.ready := Bool(false) + } + } + + def disconnectInner(inner: Vec[ClientUncachedTileLinkIO]) = { + inner.foreach { m => + m.grant.valid := Bool(false) + m.grant.bits := m.grant.bits.fromBits(UInt(0)) + m.acquire.ready := Bool(false) + } + } + + /* Provides default wires on all our outputs. */ + disconnectOuter(io.out) + disconnectInner(io.in) + + /* Constructs interconnects for each of the layouts suggested by the + * configuration and switches between them based on the select input. */ + configs.zipWithIndex.foreach{ case (nChannels, select) => + val nBanksPerChannel = nBanks / nChannels + val ic = Module(new TileLinkMemoryInterconnect(nBanksPerChannel, nChannels)) + disconnectInner(ic.io.out) + disconnectOuter(ic.io.in) + muxOnSelect(io.in, ic.io.in, io.select === UInt(select)) + muxOnSelect(ic.io.out, io.out, io.select === UInt(select)) + } +} diff --git a/uncore/src/main/scala/tilelink/Network.scala b/uncore/src/main/scala/tilelink/Network.scala new file mode 100644 index 00000000..1c094013 --- /dev/null +++ b/uncore/src/main/scala/tilelink/Network.scala @@ -0,0 +1,308 @@ +// See LICENSE for license details. + +package uncore.tilelink + +import Chisel._ +import uncore.util._ +import cde.{Parameters, Field} + +case object LNEndpoints extends Field[Int] +case object LNHeaderBits extends Field[Int] + +class PhysicalHeader(n: Int) extends Bundle { + val src = UInt(width = log2Up(n)) + val dst = UInt(width = log2Up(n)) +} + +class PhysicalNetworkIO[T <: Data](n: Int, dType: T) extends Bundle { + val header = new PhysicalHeader(n) + val payload = dType.cloneType + override def cloneType = new PhysicalNetworkIO(n,dType).asInstanceOf[this.type] +} + +class BasicCrossbarIO[T <: Data](n: Int, dType: T) extends Bundle { + val in = Vec(n, Decoupled(new PhysicalNetworkIO(n,dType))).flip + val out = Vec(n, Decoupled(new PhysicalNetworkIO(n,dType))) +} + +abstract class PhysicalNetwork extends Module + +case class CrossbarConfig[T <: Data](n: Int, dType: T, count: Int = 1, needsLock: Option[PhysicalNetworkIO[T] => Bool] = None) + +abstract class AbstractCrossbar[T <: Data](conf: CrossbarConfig[T]) extends PhysicalNetwork { + val io = new BasicCrossbarIO(conf.n, conf.dType) +} + +class BasicBus[T <: Data](conf: CrossbarConfig[T]) extends AbstractCrossbar(conf) { + val arb = Module(new LockingRRArbiter(io.in(0).bits, conf.n, conf.count, conf.needsLock)) + arb.io.in <> io.in + + arb.io.out.ready := io.out(arb.io.out.bits.header.dst).ready + for ((out, i) <- io.out zipWithIndex) { + out.valid := arb.io.out.valid && arb.io.out.bits.header.dst === UInt(i) + out.bits := arb.io.out.bits + } +} + +class BasicCrossbar[T <: Data](conf: CrossbarConfig[T]) extends AbstractCrossbar(conf) { + io.in.foreach { _.ready := Bool(false) } + + io.out.zipWithIndex.map{ case (out, i) => { + val rrarb = Module(new LockingRRArbiter(io.in(0).bits, conf.n, conf.count, conf.needsLock)) + (rrarb.io.in, io.in).zipped.map{ case (arb, in) => { + val destined = in.bits.header.dst === UInt(i) + arb.valid := in.valid && destined + arb.bits := in.bits + when (arb.ready && destined) { in.ready := Bool(true) } + }} + out <> rrarb.io.out + }} +} + +abstract class LogicalNetwork extends Module + +class LogicalHeader(implicit p: Parameters) extends junctions.ParameterizedBundle()(p) { + val src = UInt(width = p(LNHeaderBits)) + val dst = UInt(width = p(LNHeaderBits)) +} + +class LogicalNetworkIO[T <: Data](dType: T)(implicit p: Parameters) extends Bundle { + val header = new LogicalHeader + val payload = dType.cloneType + override def cloneType = new LogicalNetworkIO(dType)(p).asInstanceOf[this.type] +} + +object DecoupledLogicalNetworkIOWrapper { + def apply[T <: Data]( + in: DecoupledIO[T], + src: UInt = UInt(0), + dst: UInt = UInt(0)) + (implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = { + val out = Wire(Decoupled(new LogicalNetworkIO(in.bits))) + out.valid := in.valid + out.bits.payload := in.bits + out.bits.header.dst := dst + out.bits.header.src := src + in.ready := out.ready + out + } +} + +object DecoupledLogicalNetworkIOUnwrapper { + def apply[T <: Data](in: DecoupledIO[LogicalNetworkIO[T]]) + (implicit p: Parameters): DecoupledIO[T] = { + val out = Wire(Decoupled(in.bits.payload)) + out.valid := in.valid + out.bits := in.bits.payload + in.ready := out.ready + out + } +} + +object DefaultFromPhysicalShim { + def apply[T <: Data](in: DecoupledIO[PhysicalNetworkIO[T]]) + (implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = { + val out = Wire(Decoupled(new LogicalNetworkIO(in.bits.payload))) + out.bits.header := in.bits.header + out.bits.payload := in.bits.payload + out.valid := in.valid + in.ready := out.ready + out + } +} + +object DefaultToPhysicalShim { + def apply[T <: Data](n: Int, in: DecoupledIO[LogicalNetworkIO[T]]) + (implicit p: Parameters): DecoupledIO[PhysicalNetworkIO[T]] = { + val out = Wire(Decoupled(new PhysicalNetworkIO(n, in.bits.payload))) + out.bits.header := in.bits.header + out.bits.payload := in.bits.payload + out.valid := in.valid + in.ready := out.ready + out + } +} + +/** A helper module that automatically issues [[uncore.Finish]] messages in repsonse + * to [[uncore.Grant]] that it receives from a manager and forwards to a client + */ +class FinishUnit(srcId: Int = 0, outstanding: Int = 2)(implicit p: Parameters) extends TLModule()(p) + with HasDataBeatCounters { + val io = new Bundle { + val grant = Decoupled(new LogicalNetworkIO(new Grant)).flip + val refill = Decoupled(new Grant) + val finish = Decoupled(new LogicalNetworkIO(new Finish)) + val ready = Bool(OUTPUT) + } + + val g = io.grant.bits.payload + + if(tlNetworkPreservesPointToPointOrdering) { + io.finish.valid := Bool(false) + io.refill.valid := io.grant.valid + io.refill.bits := g + io.grant.ready := io.refill.ready + io.ready := Bool(true) + } else { + // We only want to send Finishes after we have collected all beats of + // a multibeat Grant. But Grants from multiple managers or transactions may + // get interleaved, so we could need a counter for each. + val done = if(tlNetworkDoesNotInterleaveBeats) { + connectIncomingDataBeatCounterWithHeader(io.grant) + } else { + val entries = 1 << tlClientXactIdBits + def getId(g: LogicalNetworkIO[Grant]) = g.payload.client_xact_id + assert(getId(io.grant.bits) <= UInt(entries), "Not enough grant beat counters, only " + entries + " entries.") + connectIncomingDataBeatCountersWithHeader(io.grant, entries, getId).reduce(_||_) + } + val q = Module(new FinishQueue(outstanding)) + q.io.enq.valid := io.grant.fire() && g.requiresAck() && (!g.hasMultibeatData() || done) + q.io.enq.bits := g.makeFinish() + q.io.enq.bits.manager_id := io.grant.bits.header.src + + io.finish.bits.header.src := UInt(srcId) + io.finish.bits.header.dst := q.io.deq.bits.manager_id + io.finish.bits.payload := q.io.deq.bits + io.finish.valid := q.io.deq.valid + q.io.deq.ready := io.finish.ready + + io.refill.valid := (q.io.enq.ready || !g.requiresAck()) && io.grant.valid + io.refill.bits := g + io.grant.ready := (q.io.enq.ready || !g.requiresAck()) && io.refill.ready + io.ready := q.io.enq.ready + } +} + +class FinishQueue(entries: Int)(implicit p: Parameters) extends Queue(new FinishToDst()(p), entries) + +/** A port to convert [[uncore.ClientTileLinkIO]].flip into [[uncore.TileLinkIO]] + * + * Creates network headers for [[uncore.Acquire]] and [[uncore.Release]] messages, + * calculating header.dst and filling in header.src. + * Strips headers from [[uncore.Probe Probes]]. + * Passes [[uncore.GrantFromSrc]] and accepts [[uncore.FinishFromDst]] in response, + * setting up the headers for each. + * + * @param clientId network port id of this agent + * @param addrConvert how a physical address maps to a destination manager port id + */ +class ClientTileLinkNetworkPort(clientId: Int, addrConvert: UInt => UInt) + (implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val client = new ClientTileLinkIO().flip + val network = new TileLinkIO + } + + val acq_with_header = ClientTileLinkHeaderCreator(io.client.acquire, clientId, addrConvert) + val rel_with_header = ClientTileLinkHeaderCreator(io.client.release, clientId, addrConvert) + val fin_with_header = ClientTileLinkHeaderCreator(io.client.finish, clientId) + val prb_without_header = DecoupledLogicalNetworkIOUnwrapper(io.network.probe) + val gnt_without_header = DecoupledLogicalNetworkIOUnwrapper(io.network.grant) + + io.network.acquire <> acq_with_header + io.network.release <> rel_with_header + io.network.finish <> fin_with_header + io.client.probe <> prb_without_header + io.client.grant.bits.manager_id := io.network.grant.bits.header.src + io.client.grant <> gnt_without_header +} + +/** A port to convert [[uncore.ClientUncachedTileLinkIO]].flip into [[uncore.TileLinkIO]] + * + * Creates network headers for [[uncore.Acquire]] and [[uncore.Release]] messages, + * calculating header.dst and filling in header.src. + * Responds to [[uncore.Grant]] by automatically issuing [[uncore.Finish]] to the granting managers. + * + * @param clientId network port id of this agent + * @param addrConvert how a physical address maps to a destination manager port id + */ +class ClientUncachedTileLinkNetworkPort(clientId: Int, addrConvert: UInt => UInt) + (implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val client = new ClientUncachedTileLinkIO().flip + val network = new TileLinkIO + } + + val finisher = Module(new FinishUnit(clientId)) + finisher.io.grant <> io.network.grant + io.network.finish <> finisher.io.finish + + val acq_with_header = ClientTileLinkHeaderCreator(io.client.acquire, clientId, addrConvert) + val gnt_without_header = finisher.io.refill + + io.network.acquire.bits := acq_with_header.bits + io.network.acquire.valid := acq_with_header.valid && finisher.io.ready + acq_with_header.ready := io.network.acquire.ready && finisher.io.ready + io.client.grant <> gnt_without_header + io.network.probe.ready := Bool(false) + io.network.release.valid := Bool(false) +} + +object ClientTileLinkHeaderCreator { + def apply[T <: ClientToManagerChannel with HasManagerId]( + in: DecoupledIO[T], + clientId: Int) + (implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = { + val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits))) + out.bits.payload := in.bits + out.bits.header.src := UInt(clientId) + out.bits.header.dst := in.bits.manager_id + out.valid := in.valid + in.ready := out.ready + out + } + def apply[T <: ClientToManagerChannel with HasCacheBlockAddress]( + in: DecoupledIO[T], + clientId: Int, + addrConvert: UInt => UInt) + (implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = { + val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits))) + out.bits.payload := in.bits + out.bits.header.src := UInt(clientId) + out.bits.header.dst := addrConvert(in.bits.addr_block) + out.valid := in.valid + in.ready := out.ready + out + } +} + +/** A port to convert [[uncore.ManagerTileLinkIO]].flip into [[uncore.TileLinkIO]].flip + * + * Creates network headers for [[uncore.Probe]] and [[uncore.Grant]] messagess, + * calculating header.dst and filling in header.src. + * Strips headers from [[uncore.Acquire]], [[uncore.Release]] and [[uncore.Finish]], + * but supplies client_id instead. + * + * @param managerId the network port id of this agent + * @param idConvert how a sharer id maps to a destination client port id + */ +class ManagerTileLinkNetworkPort(managerId: Int, idConvert: UInt => UInt) + (implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val manager = new ManagerTileLinkIO().flip + val network = new TileLinkIO().flip + } + io.network.grant <> ManagerTileLinkHeaderCreator(io.manager.grant, managerId, (u: UInt) => u) + io.network.probe <> ManagerTileLinkHeaderCreator(io.manager.probe, managerId, idConvert) + io.manager.acquire <> DecoupledLogicalNetworkIOUnwrapper(io.network.acquire) + io.manager.acquire.bits.client_id := io.network.acquire.bits.header.src + io.manager.release <> DecoupledLogicalNetworkIOUnwrapper(io.network.release) + io.manager.release.bits.client_id := io.network.release.bits.header.src + io.manager.finish <> DecoupledLogicalNetworkIOUnwrapper(io.network.finish) +} + +object ManagerTileLinkHeaderCreator { + def apply[T <: ManagerToClientChannel with HasClientId]( + in: DecoupledIO[T], + managerId: Int, + idConvert: UInt => UInt) + (implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = { + val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits))) + out.bits.payload := in.bits + out.bits.header.src := UInt(managerId) + out.bits.header.dst := idConvert(in.bits.client_id) + out.valid := in.valid + in.ready := out.ready + out + } +} diff --git a/uncore/src/main/scala/util/AmoAlu.scala b/uncore/src/main/scala/util/AmoAlu.scala new file mode 100644 index 00000000..d6ff9ce8 --- /dev/null +++ b/uncore/src/main/scala/util/AmoAlu.scala @@ -0,0 +1,109 @@ +// See LICENSE for license details. + +package uncore.util + +import Chisel._ +import uncore.tilelink._ +import cde.Parameters +import uncore.constants._ + +class StoreGen(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) { + val size = typ(log2Up(log2Up(maxSize)+1)-1,0) + def misaligned = + (addr & ((UInt(1) << size) - UInt(1))(log2Up(maxSize)-1,0)).orR + + def mask = { + var res = UInt(1) + for (i <- 0 until log2Up(maxSize)) { + val upper = Mux(addr(i), res, UInt(0)) | Mux(size >= UInt(i+1), UInt((BigInt(1) << (1 << i))-1), UInt(0)) + val lower = Mux(addr(i), UInt(0), res) + res = Cat(upper, lower) + } + res + } + + protected def genData(i: Int): UInt = + if (i >= log2Up(maxSize)) dat + else Mux(size === UInt(i), Fill(1 << (log2Up(maxSize)-i), dat((8 << i)-1,0)), genData(i+1)) + + def data = genData(0) + def wordData = genData(2) +} + +class StoreGenAligned(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) extends StoreGen(typ, addr, dat, maxSize) { + override def genData(i: Int) = dat +} + +class LoadGen(typ: UInt, addr: UInt, dat: UInt, zero: Bool, maxSize: Int) { + private val t = new StoreGen(typ, addr, dat, maxSize) + private val signed = typ.toSInt >= SInt(0) + + private def genData(logMinSize: Int): UInt = { + var res = dat + for (i <- log2Up(maxSize)-1 to logMinSize by -1) { + val pos = 8 << i + val shifted = Mux(addr(i), res(2*pos-1,pos), res(pos-1,0)) + val doZero = Bool(i == 0) && zero + val zeroed = Mux(doZero, UInt(0), shifted) + res = Cat(Mux(t.size === UInt(i) || doZero, Fill(8*maxSize-pos, signed && zeroed(pos-1)), res(8*maxSize-1,pos)), zeroed) + } + res + } + + def wordData = genData(2) + def data = genData(0) +} + +class AMOALU(rhsIsAligned: Boolean = false)(implicit p: Parameters) extends Module { + val operandBits = p(AmoAluOperandBits) + val blockOffBits = p(CacheBlockOffsetBits) + require(operandBits == 32 || operandBits == 64) + val io = new Bundle { + val addr = Bits(INPUT, blockOffBits) + val cmd = Bits(INPUT, M_SZ) + val typ = Bits(INPUT, MT_SZ) + val lhs = Bits(INPUT, operandBits) + val rhs = Bits(INPUT, operandBits) + val out = Bits(OUTPUT, operandBits) + } + + val storegen = + if(rhsIsAligned) new StoreGenAligned(io.typ, io.addr, io.rhs, operandBits/8) + else new StoreGen(io.typ, io.addr, io.rhs, operandBits/8) + val rhs = storegen.wordData + + val sgned = io.cmd === M_XA_MIN || io.cmd === M_XA_MAX + val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU + val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU + val word = io.typ === MT_W || io.typ === MT_WU || // Logic minimization: + io.typ === MT_B || io.typ === MT_BU + + val adder_out = + if (operandBits == 32) io.lhs + rhs + else { + val mask = ~UInt(0,64) ^ (io.addr(2) << 31) + (io.lhs & mask).toUInt + (rhs & mask) + } + + val less = + if (operandBits == 32) Mux(io.lhs(31) === rhs(31), io.lhs < rhs, Mux(sgned, io.lhs(31), io.rhs(31))) + else { + val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63)) + val cmp_rhs = Mux(word && !io.addr(2), rhs(31), rhs(63)) + val lt_lo = io.lhs(31,0) < rhs(31,0) + val lt_hi = io.lhs(63,32) < rhs(63,32) + val eq_hi = io.lhs(63,32) === rhs(63,32) + val lt = Mux(word, Mux(io.addr(2), lt_hi, lt_lo), lt_hi || eq_hi && lt_lo) + Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs)) + } + + val out = Mux(io.cmd === M_XA_ADD, adder_out, + Mux(io.cmd === M_XA_AND, io.lhs & rhs, + Mux(io.cmd === M_XA_OR, io.lhs | rhs, + Mux(io.cmd === M_XA_XOR, io.lhs ^ rhs, + Mux(Mux(less, min, max), io.lhs, + storegen.data))))) + + val wmask = FillInterleaved(8, storegen.mask) + io.out := wmask & out | ~wmask & io.lhs +} diff --git a/uncore/src/main/scala/util/Counters.scala b/uncore/src/main/scala/util/Counters.scala new file mode 100644 index 00000000..3bc2d85b --- /dev/null +++ b/uncore/src/main/scala/util/Counters.scala @@ -0,0 +1,134 @@ +package uncore.util + +import Chisel._ +import uncore.tilelink._ +import cde.Parameters + +// Produces 0-width value when counting to 1 +class ZCounter(val n: Int) { + val value = Reg(init=UInt(0, log2Ceil(n))) + def inc(): Bool = { + if (n == 1) Bool(true) + else { + val wrap = value === UInt(n-1) + value := Mux(Bool(!isPow2(n)) && wrap, UInt(0), value + UInt(1)) + wrap + } + } +} + +object ZCounter { + def apply(n: Int) = new ZCounter(n) + def apply(cond: Bool, n: Int): (UInt, Bool) = { + val c = new ZCounter(n) + var wrap: Bool = null + when (cond) { wrap = c.inc() } + (c.value, cond && wrap) + } +} + +object TwoWayCounter { + def apply(up: Bool, down: Bool, max: Int): UInt = { + val cnt = Reg(init = UInt(0, log2Up(max+1))) + when (up && !down) { cnt := cnt + UInt(1) } + when (down && !up) { cnt := cnt - UInt(1) } + cnt + } +} + +class BeatCounterStatus extends Bundle { + val idx = UInt() + val done = Bool() +} + +class TwoWayBeatCounterStatus extends Bundle { + val pending = Bool() + val up = new BeatCounterStatus() + val down = new BeatCounterStatus() +} + +/** Utility trait containing wiring functions to keep track of how many data beats have + * been sent or recieved over a particular [[uncore.TileLinkChannel]] or pair of channels. + * + * Won't count message types that don't have data. + * Used in [[uncore.XactTracker]] and [[uncore.FinishUnit]]. + */ +trait HasDataBeatCounters { + type HasBeat = TileLinkChannel with HasTileLinkBeatId + type HasId = TileLinkChannel with HasClientId + + /** Returns the current count on this channel and when a message is done + * @param inc increment the counter (usually .valid or .fire()) + * @param data the actual channel data + * @param beat count to return for single-beat messages + */ + def connectDataBeatCounter[S <: TileLinkChannel](inc: Bool, data: S, beat: UInt) = { + val multi = data.hasMultibeatData() + val (multi_cnt, multi_done) = Counter(inc && multi, data.tlDataBeats) + val cnt = Mux(multi, multi_cnt, beat) + val done = Mux(multi, multi_done, inc) + (cnt, done) + } + + /** Counter for beats on outgoing [[chisel.DecoupledIO]] */ + def connectOutgoingDataBeatCounter[T <: TileLinkChannel]( + out: DecoupledIO[T], + beat: UInt = UInt(0)): (UInt, Bool) = + connectDataBeatCounter(out.fire(), out.bits, beat) + + /** Returns done but not cnt. Use the addr_beat subbundle instead of cnt for beats on + * incoming channels in case of network reordering. + */ + def connectIncomingDataBeatCounter[T <: TileLinkChannel](in: DecoupledIO[T]): Bool = + connectDataBeatCounter(in.fire(), in.bits, UInt(0))._2 + + /** Counter for beats on incoming DecoupledIO[LogicalNetworkIO[]]s returns done */ + def connectIncomingDataBeatCounterWithHeader[T <: TileLinkChannel](in: DecoupledIO[LogicalNetworkIO[T]]): Bool = + connectDataBeatCounter(in.fire(), in.bits.payload, UInt(0))._2 + + /** If the network might interleave beats from different messages, we need a Vec of counters, + * one for every outstanding message id that might be interleaved. + * + * @param getId mapping from Message to counter id + */ + def connectIncomingDataBeatCountersWithHeader[T <: TileLinkChannel with HasClientTransactionId]( + in: DecoupledIO[LogicalNetworkIO[T]], + entries: Int, + getId: LogicalNetworkIO[T] => UInt): Vec[Bool] = { + Vec((0 until entries).map { i => + connectDataBeatCounter(in.fire() && getId(in.bits) === UInt(i), in.bits.payload, UInt(0))._2 + }) + } + + /** Provides counters on two channels, as well a meta-counter that tracks how many + * messages have been sent over the up channel but not yet responded to over the down channel + * + * @param status bundle of status of the counters + * @param up outgoing channel + * @param down incoming channel + * @param max max number of outstanding ups with no down + * @param beat overrides cnts on single-beat messages + * @param track whether up's message should be tracked + * @return a tuple containing whether their are outstanding messages, up's count, + * up's done, down's count, down's done + */ + def connectTwoWayBeatCounters[T <: TileLinkChannel, S <: TileLinkChannel]( + status: TwoWayBeatCounterStatus, + up: DecoupledIO[T], + down: DecoupledIO[S], + max: Int = 1, + beat: UInt = UInt(0), + trackUp: T => Bool = (t: T) => Bool(true), + trackDown: S => Bool = (s: S) => Bool(true)) { + val (up_idx, up_done) = connectDataBeatCounter(up.fire() && trackUp(up.bits), up.bits, beat) + val (dn_idx, dn_done) = connectDataBeatCounter(down.fire() && trackDown(down.bits), down.bits, beat) + val cnt = TwoWayCounter(up_done, dn_done, max) + status.pending := cnt > UInt(0) + status.up.idx := up_idx + status.up.done := up_done + status.down.idx := dn_idx + status.down.done := dn_done + } +} + + diff --git a/uncore/src/main/scala/util/Enqueuer.scala b/uncore/src/main/scala/util/Enqueuer.scala new file mode 100644 index 00000000..f6f3587a --- /dev/null +++ b/uncore/src/main/scala/util/Enqueuer.scala @@ -0,0 +1,56 @@ +package uncore.util + +import Chisel._ +import uncore.tilelink._ +import cde.Parameters + +/** Struct for describing per-channel queue depths */ +case class TileLinkDepths(acq: Int, prb: Int, rel: Int, gnt: Int, fin: Int) + +/** Optionally enqueues each [[uncore.TileLinkChannel]] individually */ +class TileLinkEnqueuer(depths: TileLinkDepths)(implicit p: Parameters) extends Module { + val io = new Bundle { + val client = new TileLinkIO().flip + val manager = new TileLinkIO + } + io.manager.acquire <> (if(depths.acq > 0) Queue(io.client.acquire, depths.acq) else io.client.acquire) + io.client.probe <> (if(depths.prb > 0) Queue(io.manager.probe, depths.prb) else io.manager.probe) + io.manager.release <> (if(depths.rel > 0) Queue(io.client.release, depths.rel) else io.client.release) + io.client.grant <> (if(depths.gnt > 0) Queue(io.manager.grant, depths.gnt) else io.manager.grant) + io.manager.finish <> (if(depths.fin > 0) Queue(io.client.finish, depths.fin) else io.client.finish) +} + +object TileLinkEnqueuer { + def apply(in: TileLinkIO, depths: TileLinkDepths)(implicit p: Parameters): TileLinkIO = { + val t = Module(new TileLinkEnqueuer(depths)) + t.io.client <> in + t.io.manager + } + def apply(in: TileLinkIO, depth: Int)(implicit p: Parameters): TileLinkIO = { + apply(in, TileLinkDepths(depth, depth, depth, depth, depth)) + } +} + +class ClientTileLinkEnqueuer(depths: TileLinkDepths)(implicit p: Parameters) extends Module { + val io = new Bundle { + val inner = new ClientTileLinkIO().flip + val outer = new ClientTileLinkIO + } + + io.outer.acquire <> (if(depths.acq > 0) Queue(io.inner.acquire, depths.acq) else io.inner.acquire) + io.inner.probe <> (if(depths.prb > 0) Queue(io.outer.probe, depths.prb) else io.outer.probe) + io.outer.release <> (if(depths.rel > 0) Queue(io.inner.release, depths.rel) else io.inner.release) + io.inner.grant <> (if(depths.gnt > 0) Queue(io.outer.grant, depths.gnt) else io.outer.grant) + io.outer.finish <> (if(depths.fin > 0) Queue(io.inner.finish, depths.fin) else io.inner.finish) +} + +object ClientTileLinkEnqueuer { + def apply(in: ClientTileLinkIO, depths: TileLinkDepths)(implicit p: Parameters): ClientTileLinkIO = { + val t = Module(new ClientTileLinkEnqueuer(depths)) + t.io.inner <> in + t.io.outer + } + def apply(in: ClientTileLinkIO, depth: Int)(implicit p: Parameters): ClientTileLinkIO = { + apply(in, TileLinkDepths(depth, depth, depth, depth, depth)) + } +} diff --git a/uncore/src/main/scala/util/Serializer.scala b/uncore/src/main/scala/util/Serializer.scala new file mode 100644 index 00000000..8cc0caa2 --- /dev/null +++ b/uncore/src/main/scala/util/Serializer.scala @@ -0,0 +1,69 @@ +// See LICENSE for license details. + +package uncore.util + +import Chisel._ +import uncore.tilelink._ + +class FlowThroughSerializer[T <: Bundle with HasTileLinkData](gen: T, n: Int) extends Module { + val io = new Bundle { + val in = Decoupled(gen).flip + val out = Decoupled(gen) + val cnt = UInt(OUTPUT, log2Up(n)) + val done = Bool(OUTPUT) + } + val narrowWidth = io.in.bits.data.getWidth / n + require(io.in.bits.data.getWidth % narrowWidth == 0) + + if(n == 1) { + io.out <> io.in + io.cnt := UInt(0) + io.done := Bool(true) + } else { + val cnt = Reg(init=UInt(0, width = log2Up(n))) + val wrap = cnt === UInt(n-1) + val rbits = Reg{io.in.bits} + val active = Reg(init=Bool(false)) + + val shifter = Wire(Vec(n, Bits(width = narrowWidth))) + (0 until n).foreach { + i => shifter(i) := rbits.data((i+1)*narrowWidth-1,i*narrowWidth) + } + + io.done := Bool(false) + io.cnt := cnt + io.in.ready := !active + io.out.valid := active || io.in.valid + io.out.bits := io.in.bits + when(!active && io.in.valid) { + when(io.in.bits.hasData()) { + cnt := Mux(io.out.ready, UInt(1), UInt(0)) + rbits := io.in.bits + active := Bool(true) + } + io.done := !io.in.bits.hasData() + } + when(active) { + io.out.bits := rbits + io.out.bits.data := shifter(cnt) + when(io.out.ready) { + cnt := cnt + UInt(1) + when(wrap) { + cnt := UInt(0) + io.done := Bool(true) + active := Bool(false) + } + } + } + } +} + +object FlowThroughSerializer { + def apply[T <: Bundle with HasTileLinkData](in: DecoupledIO[T], n: Int): DecoupledIO[T] = { + val fs = Module(new FlowThroughSerializer(in.bits, n)) + fs.io.in.valid := in.valid + fs.io.in.bits := in.bits + in.ready := fs.io.in.ready + fs.io.out + } +}