commit 1e05fc052510c2a5daea779564996d3986970082 Author: Henry Cook Date: Wed Apr 29 13:18:26 2015 -0700 First pages commit diff --git a/uncore/.gitignore b/uncore/.gitignore new file mode 100644 index 00000000..555feb41 --- /dev/null +++ b/uncore/.gitignore @@ -0,0 +1,2 @@ +target/ +project/target/ diff --git a/uncore/LICENSE b/uncore/LICENSE new file mode 100644 index 00000000..7cff15e4 --- /dev/null +++ b/uncore/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2012-2014, The Regents of the University of California +(Regents). All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. Neither the name of the Regents nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING +OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS +BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED +HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE +MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/uncore/README.md b/uncore/README.md new file mode 100644 index 00000000..003f9caa --- /dev/null +++ b/uncore/README.md @@ -0,0 +1,11 @@ +Uncore Library +============== + +This is the repository for uncore components assosciated with Rocket chip +project. To uses these modules, include this repo as a git submodule within +the your chip repository and add it as Project in your chip's build.scala. +These components are only dependent on Chisel, i.e. + + lazy val uncore = Project("uncore", file("uncore"), settings = buildSettings) dependsOn(chisel) + +Documentation about the uncore library will come in the near future. diff --git a/uncore/build.sbt b/uncore/build.sbt new file mode 100644 index 00000000..d78d02ca --- /dev/null +++ b/uncore/build.sbt @@ -0,0 +1,13 @@ +organization := "edu.berkeley.cs" + +version := "2.0" + +name := "uncore" + +scalaVersion := "2.10.2" + +site.settings + +ghpages.settings + +git.remoteRepo := "git@github.com:ucb-bar/uncore.git" diff --git a/uncore/chisel-dependent.sbt b/uncore/chisel-dependent.sbt new file mode 100644 index 00000000..88eb615c --- /dev/null +++ b/uncore/chisel-dependent.sbt @@ -0,0 +1,8 @@ +// Provide a managed dependency on chisel if -DchiselVersion="" is +// supplied on the command line. + +val chiselVersion_u = System.getProperty("chiselVersion", "None") + +// _u a temporary fix until sbt 13.6 https://github.com/sbt/sbt/issues/1465 + +libraryDependencies ++= ( if (chiselVersion_u != "None" ) ("edu.berkeley.cs" %% "chisel" % chiselVersion_u) :: Nil; else Nil) diff --git a/uncore/doc/TileLink0.3.1Specification.pdf b/uncore/doc/TileLink0.3.1Specification.pdf new file mode 100644 index 00000000..23666814 Binary files /dev/null and b/uncore/doc/TileLink0.3.1Specification.pdf differ diff --git a/uncore/index.html b/uncore/index.html new file mode 100644 index 00000000..03f98016 --- /dev/null +++ b/uncore/index.html @@ -0,0 +1 @@ +My GitHub Page diff --git a/uncore/src/main/scala/bigmem.scala b/uncore/src/main/scala/bigmem.scala new file mode 100644 index 00000000..fbbc4c07 --- /dev/null +++ b/uncore/src/main/scala/bigmem.scala @@ -0,0 +1,80 @@ +// See LICENSE for license details. + +package uncore +import Chisel._ + +class BigMem[T <: Data](n: Int, preLatency: Int, postLatency: Int, leaf: Mem[UInt], noMask: Boolean = false)(gen: => T) extends Module +{ + class Inputs extends Bundle { + val addr = UInt(INPUT, log2Up(n)) + val rw = Bool(INPUT) + val wdata = gen.asInput + val wmask = gen.asInput + override def clone = new Inputs().asInstanceOf[this.type] + } + val io = new Bundle { + val in = Valid(new Inputs).flip + val rdata = gen.asOutput + } + val data = gen + val colMux = if (2*data.getWidth <= leaf.data.getWidth && n > leaf.n) 1 << math.floor(math.log(leaf.data.getWidth/data.getWidth)/math.log(2)).toInt else 1 + val nWide = if (data.getWidth > leaf.data.getWidth) 1+(data.getWidth-1)/leaf.data.getWidth else 1 + val nDeep = if (n > colMux*leaf.n) 1+(n-1)/(colMux*leaf.n) else 1 + if (nDeep > 1 || colMux > 1) + require(isPow2(n) && isPow2(leaf.n)) + + val rdataDeep = Vec.fill(nDeep){Bits()} + val rdataSel = Vec.fill(nDeep){Bool()} + for (i <- 0 until nDeep) { + val in = Pipe(io.in.valid && (if (nDeep == 1) Bool(true) else UInt(i) === io.in.bits.addr(log2Up(n)-1, log2Up(n/nDeep))), io.in.bits, preLatency) + val idx = in.bits.addr(log2Up(n/nDeep/colMux)-1, 0) + val wdata = in.bits.wdata.toBits + val wmask = in.bits.wmask.toBits + val ren = in.valid && !in.bits.rw + val reg_ren = Reg(next=ren) + val rdata = Vec.fill(nWide){Bits()} + + val r = Pipe(ren, in.bits.addr, postLatency) + + for (j <- 0 until nWide) { + val mem = leaf.clone + var dout: Bits = null + val ridx = if (postLatency > 0) Reg(Bits()) else null + + var wmask0 = Fill(colMux, wmask(math.min(wmask.getWidth, leaf.data.getWidth*(j+1))-1, leaf.data.getWidth*j)) + if (colMux > 1) + wmask0 = wmask0 & FillInterleaved(gen.getWidth, UIntToOH(in.bits.addr(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux)), log2Up(colMux))) + val wdata0 = Fill(colMux, wdata(math.min(wdata.getWidth, leaf.data.getWidth*(j+1))-1, leaf.data.getWidth*j)) + when (in.valid) { + when (in.bits.rw) { + if (noMask) + mem.write(idx, wdata0) + else + mem.write(idx, wdata0, wmask0) + } + .otherwise { if (postLatency > 0) ridx := idx } + } + + if (postLatency == 0) { + dout = mem(idx) + } else if (postLatency == 1) { + dout = mem(ridx) + } else + dout = Pipe(reg_ren, mem(ridx), postLatency-1).bits + + rdata(j) := dout + } + val rdataWide = rdata.reduceLeft((x, y) => Cat(y, x)) + + var colMuxOut = rdataWide + if (colMux > 1) { + val colMuxIn = Vec((0 until colMux).map(k => rdataWide(gen.getWidth*(k+1)-1, gen.getWidth*k))) + colMuxOut = colMuxIn(r.bits(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux))) + } + + rdataDeep(i) := colMuxOut + rdataSel(i) := r.valid + } + + io.rdata := Mux1H(rdataSel, rdataDeep) +} diff --git a/uncore/src/main/scala/broadcast.scala b/uncore/src/main/scala/broadcast.scala new file mode 100644 index 00000000..b554b231 --- /dev/null +++ b/uncore/src/main/scala/broadcast.scala @@ -0,0 +1,387 @@ +// See LICENSE for license details. + +package uncore +import Chisel._ + +case object L2StoreDataQueueDepth extends Field[Int] + +trait BroadcastHubParameters extends CoherenceAgentParameters { + val sdqDepth = params(L2StoreDataQueueDepth)*innerDataBeats + val dqIdxBits = math.max(log2Up(nReleaseTransactors) + 1, log2Up(sdqDepth)) + val nDataQueueLocations = 3 //Stores, VoluntaryWBs, Releases +} + +class DataQueueLocation extends Bundle with BroadcastHubParameters { + val idx = UInt(width = dqIdxBits) + val loc = UInt(width = log2Ceil(nDataQueueLocations)) +} + +object DataQueueLocation { + def apply(idx: UInt, loc: UInt) = { + val d = new DataQueueLocation + d.idx := idx + d.loc := loc + d + } +} + +class L2BroadcastHub extends ManagerCoherenceAgent + with BroadcastHubParameters { + val internalDataBits = new DataQueueLocation().getWidth + val inStoreQueue :: inVolWBQueue :: inClientReleaseQueue :: Nil = Enum(UInt(), nDataQueueLocations) + + // Create SHRs for outstanding transactions + val trackerList = (0 until nReleaseTransactors).map(id => + Module(new BroadcastVoluntaryReleaseTracker(id), {case TLDataBits => internalDataBits})) ++ + (nReleaseTransactors until nTransactors).map(id => + Module(new BroadcastAcquireTracker(id), {case TLDataBits => internalDataBits})) + + // Propagate incoherence flags + trackerList.map(_.io.incoherent := io.incoherent.toBits) + + // Queue to store impending Put data + val sdq = Vec.fill(sdqDepth){ Reg(io.iacq().data) } + val sdq_val = Reg(init=Bits(0, sdqDepth)) + val sdq_alloc_id = PriorityEncoder(~sdq_val) + val sdq_rdy = !sdq_val.andR + val sdq_enq = io.inner.acquire.fire() && io.iacq().hasData() + when (sdq_enq) { sdq(sdq_alloc_id) := io.iacq().data } + + // Handle acquire transaction initiation + val trackerAcquireIOs = trackerList.map(_.io.inner.acquire) + val acquireConflicts = Vec(trackerList.map(_.io.has_acquire_conflict)).toBits + val acquireMatches = Vec(trackerList.map(_.io.has_acquire_match)).toBits + val acquireReadys = Vec(trackerAcquireIOs.map(_.ready)).toBits + val acquire_idx = Mux(acquireMatches.orR, + PriorityEncoder(acquireMatches), + PriorityEncoder(acquireReadys)) + + val block_acquires = acquireConflicts.orR || !sdq_rdy + io.inner.acquire.ready := acquireReadys.orR && !block_acquires + trackerAcquireIOs.zipWithIndex.foreach { + case(tracker, i) => + tracker.bits := io.inner.acquire.bits + tracker.bits.data := DataQueueLocation(sdq_alloc_id, inStoreQueue).toBits + tracker.valid := io.inner.acquire.valid && !block_acquires && (acquire_idx === UInt(i)) + } + + // Queue to store impending Voluntary Release data + val voluntary = io.irel().isVoluntary() + val vwbdq_enq = io.inner.release.fire() && voluntary && io.irel().hasData() + val (rel_data_cnt, rel_data_done) = Counter(vwbdq_enq, innerDataBeats) //TODO Zero width + val vwbdq = Vec.fill(innerDataBeats){ Reg(io.irel().data) } //TODO Assumes nReleaseTransactors == 1 + when(vwbdq_enq) { vwbdq(rel_data_cnt) := io.irel().data } + + // Handle releases, which might be voluntary and might have data + val trackerReleaseIOs = trackerList.map(_.io.inner.release) + val releaseReadys = Vec(trackerReleaseIOs.map(_.ready)).toBits + val releaseMatches = Vec(trackerList.map(_.io.has_release_match)).toBits + val release_idx = PriorityEncoder(releaseMatches) + io.inner.release.ready := releaseReadys(release_idx) + trackerReleaseIOs.zipWithIndex.foreach { + case(tracker, i) => + tracker.valid := io.inner.release.valid && (release_idx === UInt(i)) + tracker.bits := io.inner.release.bits + tracker.bits.data := DataQueueLocation(rel_data_cnt, + (if(i < nReleaseTransactors) inVolWBQueue + else inClientReleaseQueue)).toBits + } + assert(!(io.inner.release.valid && !releaseMatches.orR), + "Non-voluntary release should always have a Tracker waiting for it.") + + // Wire probe requests and grant reply to clients, finish acks from clients + // Note that we bypass the Grant data subbundles + io.inner.grant.bits.data := io.outer.grant.bits.data + io.inner.grant.bits.addr_beat := io.outer.grant.bits.addr_beat + doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant)) + doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe)) + doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish)) + + // Create an arbiter for the one memory port + val outer_arb = Module(new ClientUncachedTileLinkIOArbiter(trackerList.size), + { case TLId => params(OuterTLId) + case TLDataBits => internalDataBits }) + outer_arb.io.in <> trackerList.map(_.io.outer) + // Get the pending data out of the store data queue + val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.data) + val is_in_sdq = outer_data_ptr.loc === inStoreQueue + val free_sdq = io.outer.acquire.fire() && + io.outer.acquire.bits.hasData() && + outer_data_ptr.loc === inStoreQueue + io.outer.acquire.bits.data := MuxLookup(outer_data_ptr.loc, io.irel().data, Array( + inStoreQueue -> sdq(outer_data_ptr.idx), + inVolWBQueue -> vwbdq(outer_data_ptr.idx))) + io.outer <> outer_arb.io.out + + // Update SDQ valid bits + when (io.outer.acquire.valid || sdq_enq) { + sdq_val := sdq_val & ~(UIntToOH(outer_data_ptr.idx) & Fill(sdqDepth, free_sdq)) | + PriorityEncoderOH(~sdq_val(sdqDepth-1,0)) & Fill(sdqDepth, sdq_enq) + } +} + +class BroadcastXactTracker extends XactTracker { + val io = new ManagerXactTrackerIO +} + +class BroadcastVoluntaryReleaseTracker(trackerId: Int) extends BroadcastXactTracker { + val s_idle :: s_outer :: s_grant :: s_ack :: Nil = Enum(UInt(), 4) + val state = Reg(init=s_idle) + + val xact = Reg(Bundle(new ReleaseFromSrc, { case TLId => params(InnerTLId); case TLDataBits => 0 })) + val data_buffer = Vec.fill(innerDataBeats){ Reg(io.irel().data.clone) } + val coh = ManagerMetadata.onReset + + val collect_irel_data = Reg(init=Bool(false)) + val irel_data_valid = Reg(init=Bits(0, width = innerDataBeats)) + val irel_data_done = connectIncomingDataBeatCounter(io.inner.release) + val (oacq_data_cnt, oacq_data_done) = connectOutgoingDataBeatCounter(io.outer.acquire) + + io.has_acquire_conflict := Bool(false) + io.has_release_match := io.irel().isVoluntary() + io.has_acquire_match := Bool(false) + + io.outer.acquire.valid := Bool(false) + io.outer.grant.ready := Bool(false) + io.inner.acquire.ready := Bool(false) + io.inner.probe.valid := Bool(false) + io.inner.release.ready := Bool(false) + io.inner.grant.valid := Bool(false) + io.inner.finish.ready := Bool(false) + + io.inner.grant.bits := coh.makeGrant(xact, UInt(trackerId)) + + //TODO: Use io.outer.release instead? + io.outer.acquire.bits := Bundle( + PutBlock( + client_xact_id = UInt(trackerId), + addr_block = xact.addr_block, + addr_beat = oacq_data_cnt, + data = data_buffer(oacq_data_cnt)))(outerTLParams) + + when(collect_irel_data) { + io.inner.release.ready := Bool(true) + when(io.inner.release.valid) { + data_buffer(io.irel().addr_beat) := io.irel().data + irel_data_valid(io.irel().addr_beat) := Bool(true) + } + when(irel_data_done) { collect_irel_data := Bool(false) } + } + + switch (state) { + is(s_idle) { + io.inner.release.ready := Bool(true) + when( io.inner.release.valid ) { + xact := io.irel() + data_buffer(UInt(0)) := io.irel().data + collect_irel_data := io.irel().hasMultibeatData() + irel_data_valid := io.irel().hasData() << io.irel().addr_beat + state := Mux(io.irel().hasData(), s_outer, + Mux(io.irel().requiresAck(), s_ack, s_idle)) + } + } + is(s_outer) { + io.outer.acquire.valid := !collect_irel_data || irel_data_valid(oacq_data_cnt) + when(oacq_data_done) { + state := s_grant // converted irel to oacq, so expect grant TODO: Mux(xact.requiresAck(), s_grant, s_idle) ? + } + } + is(s_grant) { // Forward the Grant.voluntaryAck + io.outer.grant.ready := io.inner.grant.ready + io.inner.grant.valid := io.outer.grant.valid + when(io.inner.grant.fire()) { + state := Mux(io.ignt().requiresAck(), s_ack, s_idle) + } + } + is(s_ack) { + // TODO: This state is unnecessary if no client will ever issue the + // pending Acquire that caused this writeback until it receives the + // Grant.voluntaryAck for this writeback + io.inner.finish.ready := Bool(true) + when(io.inner.finish.valid) { state := s_idle } + } + } +} + +class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker { + val s_idle :: s_probe :: s_mem_read :: s_mem_write :: s_make_grant :: s_mem_resp :: s_ack :: Nil = Enum(UInt(), 7) + val state = Reg(init=s_idle) + + val xact = Reg(Bundle(new AcquireFromSrc, { case TLId => params(InnerTLId); case TLDataBits => 0 })) + val data_buffer = Vec.fill(innerDataBeats){ Reg(io.iacq().data.clone) } + val coh = ManagerMetadata.onReset + + assert(!(state != s_idle && xact.isBuiltInType() && + Vec(Acquire.getType, Acquire.putType, Acquire.putAtomicType, + Acquire.prefetchType).contains(xact.a_type)), + "Broadcast Hub does not support PutAtomics, subblock Gets/Puts, or prefetches") // TODO + + val release_count = Reg(init=UInt(0, width = log2Up(io.inner.tlNCachingClients+1))) + val pending_probes = Reg(init=Bits(0, width = io.inner.tlNCachingClients)) + val curr_p_id = PriorityEncoder(pending_probes) + val full_sharers = coh.full() + val probe_self = io.inner.acquire.bits.requiresSelfProbe() + val mask_self_true = UInt(UInt(1) << io.inner.acquire.bits.client_id, width = io.inner.tlNCachingClients) + val mask_self_false = ~UInt(UInt(1) << io.inner.acquire.bits.client_id, width = io.inner.tlNCachingClients) + val mask_self = Mux(probe_self, full_sharers | mask_self_true, full_sharers & mask_self_false) + val mask_incoherent = mask_self & ~io.incoherent.toBits + + val collect_iacq_data = Reg(init=Bool(false)) + val iacq_data_valid = Reg(init=Bits(0, width = innerDataBeats)) + val iacq_data_done = connectIncomingDataBeatCounter(io.inner.acquire) + val irel_data_done = connectIncomingDataBeatCounter(io.inner.release) + val (ignt_data_cnt, ignt_data_done) = connectOutgoingDataBeatCounter(io.inner.grant) + val (oacq_data_cnt, oacq_data_done) = connectOutgoingDataBeatCounter(io.outer.acquire) + val ognt_data_done = connectIncomingDataBeatCounter(io.outer.grant) + val pending_ognt_ack = Reg(init=Bool(false)) + val pending_outer_write = xact.hasData() + val pending_outer_write_ = io.iacq().hasData() + val pending_outer_read = io.ignt().hasData() + val pending_outer_read_ = coh.makeGrant(io.iacq(), UInt(trackerId)).hasData() + + io.has_acquire_conflict := xact.conflicts(io.iacq()) && + (state != s_idle) && + !collect_iacq_data + io.has_acquire_match := xact.conflicts(io.iacq()) && + collect_iacq_data + io.has_release_match := xact.conflicts(io.irel()) && + !io.irel().isVoluntary() && + (state === s_probe) + + val outer_write_acq = Bundle(PutBlock( + client_xact_id = UInt(trackerId), + addr_block = xact.addr_block, + addr_beat = oacq_data_cnt, + data = data_buffer(oacq_data_cnt)))(outerTLParams) + val outer_write_rel = Bundle(PutBlock( + client_xact_id = UInt(trackerId), + addr_block = xact.addr_block, + addr_beat = io.irel().addr_beat, + data = io.irel().data))(outerTLParams) + val outer_read = Bundle(GetBlock( + client_xact_id = UInt(trackerId), + addr_block = xact.addr_block))(outerTLParams) + + io.outer.acquire.valid := Bool(false) + io.outer.acquire.bits := outer_read //default + io.outer.grant.ready := Bool(false) + + io.inner.probe.valid := Bool(false) + io.inner.probe.bits := coh.makeProbe(curr_p_id, xact) + + io.inner.grant.valid := Bool(false) + io.inner.grant.bits := coh.makeGrant(xact, UInt(trackerId)) // Data bypassed in parent + + io.inner.acquire.ready := Bool(false) + io.inner.release.ready := Bool(false) + io.inner.finish.ready := Bool(false) + + assert(!(state != s_idle && collect_iacq_data && io.inner.acquire.fire() && + io.iacq().client_id != xact.client_id), + "AcquireTracker accepted data beat from different network source than initial request.") + + assert(!(state != s_idle && collect_iacq_data && io.inner.acquire.fire() && + io.iacq().client_xact_id != xact.client_xact_id), + "AcquireTracker accepted data beat from different client transaction than initial request.") + + assert(!(state === s_idle && io.inner.acquire.fire() && + io.iacq().addr_beat != UInt(0)), + "AcquireTracker initialized with a tail data beat.") + + when(collect_iacq_data) { + io.inner.acquire.ready := Bool(true) + when(io.inner.acquire.valid) { + data_buffer(io.iacq().addr_beat) := io.iacq().data + iacq_data_valid(io.iacq().addr_beat) := Bool(true) + } + when(iacq_data_done) { collect_iacq_data := Bool(false) } + } + + when(pending_ognt_ack) { + io.outer.grant.ready := Bool(true) + when(io.outer.grant.valid) { pending_ognt_ack := Bool(false) } + //TODO add finish queue if this isnt the last level manager + } + + switch (state) { + is(s_idle) { + io.inner.acquire.ready := Bool(true) + when(io.inner.acquire.valid) { + xact := io.iacq() + data_buffer(UInt(0)) := io.iacq().data + collect_iacq_data := io.iacq().hasMultibeatData() + iacq_data_valid := io.iacq().hasData() << io.iacq().addr_beat + val needs_probes = mask_incoherent.orR + when(needs_probes) { + pending_probes := mask_incoherent + release_count := PopCount(mask_incoherent) + } + state := Mux(needs_probes, s_probe, + Mux(pending_outer_write_, s_mem_write, + Mux(pending_outer_read_, s_mem_read, s_make_grant))) + } + } + is(s_probe) { + // Generate probes + io.inner.probe.valid := pending_probes.orR + when(io.inner.probe.ready) { + pending_probes := pending_probes & ~UIntToOH(curr_p_id) + } + + // Handle releases, which may have data to be written back + io.inner.release.ready := !io.irel().hasData() || io.outer.acquire.ready + when(io.inner.release.valid) { + when(io.irel().hasData()) { + io.outer.acquire.valid := Bool(true) + io.outer.acquire.bits := outer_write_rel + when(io.outer.acquire.ready) { + when(oacq_data_done) { + pending_ognt_ack := Bool(true) + release_count := release_count - UInt(1) + when(release_count === UInt(1)) { + state := Mux(pending_outer_write, s_mem_write, + Mux(pending_outer_read, s_mem_read, s_make_grant)) + } + } + } + } .otherwise { + release_count := release_count - UInt(1) + when(release_count === UInt(1)) { + state := Mux(pending_outer_write, s_mem_write, + Mux(pending_outer_read, s_mem_read, s_make_grant)) + } + } + } + } + is(s_mem_write) { // Write data to outer memory + io.outer.acquire.valid := !pending_ognt_ack || !collect_iacq_data || iacq_data_valid(oacq_data_cnt) + io.outer.acquire.bits := outer_write_acq + when(oacq_data_done) { + pending_ognt_ack := Bool(true) + state := Mux(pending_outer_read, s_mem_read, s_mem_resp) + } + } + is(s_mem_read) { // Read data from outer memory (possibly what was just written) + io.outer.acquire.valid := !pending_ognt_ack + io.outer.acquire.bits := outer_read + when(io.outer.acquire.fire()) { state := s_mem_resp } + } + is(s_mem_resp) { // Wait to forward grants from outer memory + io.outer.grant.ready := io.inner.grant.ready + io.inner.grant.valid := io.outer.grant.valid + when(ignt_data_done) { + state := Mux(io.ignt().requiresAck(), s_ack, s_idle) + } + } + is(s_make_grant) { // Manufacture a local grant (some kind of permission upgrade) + io.inner.grant.valid := Bool(true) + when(io.inner.grant.ready) { + state := Mux(io.ignt().requiresAck(), s_ack, s_idle) + } + } + is(s_ack) { // Wait for transaction to complete + io.inner.finish.ready := Bool(true) + when(io.inner.finish.valid) { state := s_idle } + } + } +} diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala new file mode 100644 index 00000000..7fcd1408 --- /dev/null +++ b/uncore/src/main/scala/cache.scala @@ -0,0 +1,1078 @@ +// See LICENSE for license details. + +package uncore +import Chisel._ +import scala.reflect.ClassTag + +case object CacheName extends Field[String] +case object NSets extends Field[Int] +case object NWays extends Field[Int] +case object RowBits extends Field[Int] +case object Replacer extends Field[() => ReplacementPolicy] +case object AmoAluOperandBits extends Field[Int] +case object L2DirectoryRepresentation extends Field[DirectoryRepresentation] +case object NPrimaryMisses extends Field[Int] +case object NSecondaryMisses extends Field[Int] +case object CacheBlockBytes extends Field[Int] +case object CacheBlockOffsetBits extends Field[Int] +case object ECCCode extends Field[Option[Code]] + +abstract trait CacheParameters extends UsesParameters { + val nSets = params(NSets) + val blockOffBits = params(CacheBlockOffsetBits) + val idxBits = log2Up(nSets) + val untagBits = blockOffBits + idxBits + val tagBits = params(PAddrBits) - untagBits + val nWays = params(NWays) + val wayBits = log2Up(nWays) + val isDM = nWays == 1 + val rowBits = params(RowBits) + val rowBytes = rowBits/8 + val rowOffBits = log2Up(rowBytes) + val code = params(ECCCode).getOrElse(new IdentityCode) +} + +abstract class CacheBundle extends Bundle with CacheParameters +abstract class CacheModule extends Module with CacheParameters + +class StoreGen(typ: Bits, addr: Bits, dat: Bits) { + val byte = typ === MT_B || typ === MT_BU + val half = typ === MT_H || typ === MT_HU + val word = typ === MT_W || typ === MT_WU + def mask = + Mux(byte, Bits( 1) << addr(2,0), + Mux(half, Bits( 3) << Cat(addr(2,1), Bits(0,1)), + Mux(word, Bits( 15) << Cat(addr(2), Bits(0,2)), + Bits(255)))) + def data = + Mux(byte, Fill(8, dat( 7,0)), + Mux(half, Fill(4, dat(15,0)), + wordData)) + lazy val wordData = + Mux(word, Fill(2, dat(31,0)), + dat) +} + +class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool) { + val t = new StoreGen(typ, addr, dat) + val sign = typ === MT_B || typ === MT_H || typ === MT_W || typ === MT_D + + val wordShift = Mux(addr(2), dat(63,32), dat(31,0)) + val word = Cat(Mux(t.word, Fill(32, sign && wordShift(31)), dat(63,32)), wordShift) + val halfShift = Mux(addr(1), word(31,16), word(15,0)) + val half = Cat(Mux(t.half, Fill(48, sign && halfShift(15)), word(63,16)), halfShift) + val byteShift = Mux(zero, UInt(0), Mux(addr(0), half(15,8), half(7,0))) + val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) +} + +class AMOALU extends CacheModule { + val operandBits = params(AmoAluOperandBits) + require(operandBits == 64) + val io = new Bundle { + val addr = Bits(INPUT, blockOffBits) + val cmd = Bits(INPUT, M_SZ) + val typ = Bits(INPUT, MT_SZ) + val lhs = Bits(INPUT, operandBits) + val rhs = Bits(INPUT, operandBits) + val out = Bits(OUTPUT, operandBits) + } + + val storegen = new StoreGen(io.typ, io.addr, io.rhs) + val rhs = storegen.wordData + + val sgned = io.cmd === M_XA_MIN || io.cmd === M_XA_MAX + val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU + val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU + val word = io.typ === MT_W || io.typ === MT_WU || // Logic minimization: + io.typ === MT_B || io.typ === MT_BU + + val mask = SInt(-1,64) ^ (io.addr(2) << UInt(31)) + val adder_out = (io.lhs & mask).toUInt + (rhs & mask) + + val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63)) + val cmp_rhs = Mux(word && !io.addr(2), rhs(31), rhs(63)) + val lt_lo = io.lhs(31,0) < rhs(31,0) + val lt_hi = io.lhs(63,32) < rhs(63,32) + val eq_hi = io.lhs(63,32) === rhs(63,32) + val lt = Mux(word, Mux(io.addr(2), lt_hi, lt_lo), lt_hi || eq_hi && lt_lo) + val less = Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs)) + + val out = Mux(io.cmd === M_XA_ADD, adder_out, + Mux(io.cmd === M_XA_AND, io.lhs & rhs, + Mux(io.cmd === M_XA_OR, io.lhs | rhs, + Mux(io.cmd === M_XA_XOR, io.lhs ^ rhs, + Mux(Mux(less, min, max), io.lhs, + storegen.data))))) + + val wmask = FillInterleaved(8, storegen.mask) + io.out := wmask & out | ~wmask & io.lhs +} + +abstract class ReplacementPolicy { + def way: UInt + def miss: Unit + def hit: Unit +} + +class RandomReplacement(ways: Int) extends ReplacementPolicy { + private val replace = Bool() + replace := Bool(false) + val lfsr = LFSR16(replace) + + def way = if(ways == 1) UInt(0) else lfsr(log2Up(ways)-1,0) + def miss = replace := Bool(true) + def hit = {} +} + +abstract class Metadata extends CacheBundle { + val tag = Bits(width = tagBits) + val coh: CoherenceMetadata +} + +class MetaReadReq extends CacheBundle { + val idx = Bits(width = idxBits) +} + +class MetaWriteReq[T <: Metadata](gen: T) extends MetaReadReq { + val way_en = Bits(width = nWays) + val data = gen.clone + override def clone = new MetaWriteReq(gen).asInstanceOf[this.type] +} + +class MetadataArray[T <: Metadata](makeRstVal: () => T) extends CacheModule { + val rstVal = makeRstVal() + val io = new Bundle { + val read = Decoupled(new MetaReadReq).flip + val write = Decoupled(new MetaWriteReq(rstVal.clone)).flip + val resp = Vec.fill(nWays){rstVal.clone.asOutput} + } + val rst_cnt = Reg(init=UInt(0, log2Up(nSets+1))) + val rst = rst_cnt < UInt(nSets) + val waddr = Mux(rst, rst_cnt, io.write.bits.idx) + val wdata = Mux(rst, rstVal, io.write.bits.data).toBits + val wmask = Mux(rst, SInt(-1), io.write.bits.way_en).toUInt + when (rst) { rst_cnt := rst_cnt+UInt(1) } + + val metabits = rstVal.getWidth + val tag_arr = Mem(UInt(width = metabits*nWays), nSets, seqRead = true) + when (rst || io.write.valid) { + tag_arr.write(waddr, Fill(nWays, wdata), FillInterleaved(metabits, wmask)) + } + + val tags = tag_arr(RegEnable(io.read.bits.idx, io.read.valid)) + io.resp := io.resp.fromBits(tags) + io.read.ready := !rst && !io.write.valid // so really this could be a 6T RAM + io.write.ready := !rst +} + +abstract trait L2HellaCacheParameters extends CacheParameters with CoherenceAgentParameters { + val idxMSB = idxBits-1 + val idxLSB = 0 + val blockAddrBits = params(TLBlockAddrBits) + val refillCyclesPerBeat = outerDataBits/rowBits + val refillCycles = refillCyclesPerBeat*outerDataBeats + val internalDataBeats = params(CacheBlockBytes)*8/rowBits + require(refillCyclesPerBeat == 1) + val amoAluOperandBits = params(AmoAluOperandBits) + require(amoAluOperandBits <= innerDataBits) + require(rowBits == innerDataBits) // TODO: relax this by improving s_data_* states + val nSecondaryMisses = params(NSecondaryMisses) + val isLastLevelCache = true + val ignoresWriteMask = !params(ECCCode).isEmpty +} + +abstract class L2HellaCacheBundle extends Bundle with L2HellaCacheParameters +abstract class L2HellaCacheModule extends Module with L2HellaCacheParameters { + def doInternalOutputArbitration[T <: Data : ClassTag]( + out: DecoupledIO[T], + ins: Seq[DecoupledIO[T]]) { + val arb = Module(new RRArbiter(out.bits.clone, ins.size)) + out <> arb.io.out + arb.io.in <> ins + } + + def doInternalInputRouting[T <: HasL2Id](in: ValidIO[T], outs: Seq[ValidIO[T]]) { + outs.map(_.bits := in.bits) + outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && in.bits.id === UInt(i) } + } +} + +trait HasL2Id extends Bundle with CoherenceAgentParameters { + val id = UInt(width = log2Up(nTransactors + 1)) +} + +trait HasL2InternalRequestState extends L2HellaCacheBundle { + val tag_match = Bool() + val meta = new L2Metadata + val way_en = Bits(width = nWays) +} + +trait HasL2BeatAddr extends L2HellaCacheBundle { + val addr_beat = UInt(width = log2Up(refillCycles)) +} + +trait HasL2Data extends L2HellaCacheBundle + with HasL2BeatAddr { + val data = UInt(width = rowBits) + def hasData(dummy: Int = 0) = Bool(true) + def hasMultibeatData(dummy: Int = 0) = Bool(refillCycles > 1) +} + +class L2Metadata extends Metadata with L2HellaCacheParameters { + val coh = new HierarchicalMetadata +} + +object L2Metadata { + def apply(tag: Bits, coh: HierarchicalMetadata) = { + val meta = new L2Metadata + meta.tag := tag + meta.coh := coh + meta + } +} + +class L2MetaReadReq extends MetaReadReq with HasL2Id { + val tag = Bits(width = tagBits) +} + +class L2MetaWriteReq extends MetaWriteReq[L2Metadata](new L2Metadata) + with HasL2Id { + override def clone = new L2MetaWriteReq().asInstanceOf[this.type] +} + +class L2MetaResp extends L2HellaCacheBundle + with HasL2Id + with HasL2InternalRequestState + +trait HasL2MetaReadIO extends L2HellaCacheBundle { + val read = Decoupled(new L2MetaReadReq) + val resp = Valid(new L2MetaResp).flip +} + +trait HasL2MetaWriteIO extends L2HellaCacheBundle { + val write = Decoupled(new L2MetaWriteReq) +} + +class L2MetaRWIO extends L2HellaCacheBundle with HasL2MetaReadIO with HasL2MetaWriteIO + +class L2MetadataArray extends L2HellaCacheModule { + val io = new L2MetaRWIO().flip + + def onReset = L2Metadata(UInt(0), HierarchicalMetadata.onReset) + val meta = Module(new MetadataArray(onReset _)) + meta.io.read <> io.read + meta.io.write <> io.write + + val s1_tag = RegEnable(io.read.bits.tag, io.read.valid) + val s1_id = RegEnable(io.read.bits.id, io.read.valid) + def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f)) + val s1_clk_en = Reg(next = io.read.fire()) + val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === s1_tag) + val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta.io.resp(w).coh.outer.isValid()).toBits + val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en) + val s2_tag_match = s2_tag_match_way.orR + val s2_hit_coh = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en))) + + val replacer = params(Replacer)() + val s1_replaced_way_en = UIntToOH(replacer.way) + val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en)) + val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => + RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) + when(!s2_tag_match) { replacer.miss } + + io.resp.valid := Reg(next = s1_clk_en) + io.resp.bits.id := RegEnable(s1_id, s1_clk_en) + io.resp.bits.tag_match := s2_tag_match + io.resp.bits.meta := Mux(s2_tag_match, + L2Metadata(s2_repl_meta.tag, s2_hit_coh), + s2_repl_meta) + io.resp.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) +} + +class L2DataReadReq extends L2HellaCacheBundle + with HasL2BeatAddr + with HasL2Id { + val addr_idx = UInt(width = idxBits) + val way_en = Bits(width = nWays) +} + +class L2DataWriteReq extends L2DataReadReq + with HasL2Data { + val wmask = Bits(width = rowBits/8) +} + +class L2DataResp extends L2HellaCacheBundle with HasL2Id with HasL2Data + +trait HasL2DataReadIO extends L2HellaCacheBundle { + val read = Decoupled(new L2DataReadReq) + val resp = Valid(new L2DataResp).flip +} + +trait HasL2DataWriteIO extends L2HellaCacheBundle { + val write = Decoupled(new L2DataWriteReq) +} + +class L2DataRWIO extends L2HellaCacheBundle with HasL2DataReadIO with HasL2DataWriteIO + +class L2DataArray(delay: Int) extends L2HellaCacheModule { + val io = new L2DataRWIO().flip + + val wmask = FillInterleaved(8, io.write.bits.wmask) + val reg_raddr = Reg(UInt()) + val array = Mem(Bits(width=rowBits), nWays*nSets*refillCycles, seqRead = true) + val waddr = Cat(OHToUInt(io.write.bits.way_en), io.write.bits.addr_idx, io.write.bits.addr_beat) + val raddr = Cat(OHToUInt(io.read.bits.way_en), io.read.bits.addr_idx, io.read.bits.addr_beat) + + when (io.write.bits.way_en.orR && io.write.valid) { + array.write(waddr, io.write.bits.data, wmask) + }.elsewhen (io.read.bits.way_en.orR && io.read.valid) { + reg_raddr := raddr + } + + io.resp.valid := ShiftRegister(io.read.fire(), delay+1) + io.resp.bits.id := ShiftRegister(io.read.bits.id, delay+1) + io.resp.bits.addr_beat := ShiftRegister(io.read.bits.addr_beat, delay+1) + io.resp.bits.data := ShiftRegister(array(reg_raddr), delay) + io.read.ready := !io.write.valid + io.write.ready := Bool(true) +} + +class L2HellaCacheBank extends HierarchicalCoherenceAgent with L2HellaCacheParameters { + require(isPow2(nSets)) + require(isPow2(nWays)) + + val meta = Module(new L2MetadataArray) // TODO: add delay knob + val data = Module(new L2DataArray(1)) + val tshrfile = Module(new TSHRFile) + tshrfile.io.inner <> io.inner + io.outer <> tshrfile.io.outer + io.incoherent <> tshrfile.io.incoherent + tshrfile.io.meta <> meta.io + tshrfile.io.data <> data.io +} + +class TSHRFileIO extends HierarchicalTLIO { + val meta = new L2MetaRWIO + val data = new L2DataRWIO +} + +class TSHRFile extends L2HellaCacheModule with HasCoherenceAgentWiringHelpers { + val io = new TSHRFileIO + + // Create TSHRs for outstanding transactions + val trackerList = (0 until nReleaseTransactors).map(id => Module(new L2VoluntaryReleaseTracker(id))) ++ + (nReleaseTransactors until nTransactors).map(id => Module(new L2AcquireTracker(id))) + + // WritebackUnit evicts data from L2, including invalidating L1s + val wb = Module(new L2WritebackUnit(nTransactors)) + doInternalOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req)) + doInternalInputRouting(wb.io.wb.resp, trackerList.map(_.io.wb.resp)) + + // Propagate incoherence flags + (trackerList.map(_.io.incoherent) :+ wb.io.incoherent) foreach { _ := io.incoherent.toBits } + + // Handle acquire transaction initiation + val trackerAcquireIOs = trackerList.map(_.io.inner.acquire) + val acquireConflicts = Vec(trackerList.map(_.io.has_acquire_conflict)).toBits + val acquireMatches = Vec(trackerList.map(_.io.has_acquire_match)).toBits + val acquireReadys = Vec(trackerAcquireIOs.map(_.ready)).toBits + val acquire_idx = Mux(acquireMatches.orR, + PriorityEncoder(acquireMatches), + PriorityEncoder(acquireReadys)) + val block_acquires = acquireConflicts.orR + io.inner.acquire.ready := acquireReadys.orR && !block_acquires + trackerAcquireIOs.zipWithIndex.foreach { + case(tracker, i) => + tracker.bits := io.inner.acquire.bits + tracker.valid := io.inner.acquire.valid && !block_acquires && (acquire_idx === UInt(i)) + } + + // Wire releases from clients + val trackerReleaseIOs = trackerList.map(_.io.inner.release) :+ wb.io.inner.release + val releaseReadys = Vec(trackerReleaseIOs.map(_.ready)).toBits + val releaseMatches = Vec(trackerList.map(_.io.has_release_match) :+ wb.io.has_release_match).toBits + val release_idx = PriorityEncoder(releaseMatches) + io.inner.release.ready := releaseReadys(release_idx) + trackerReleaseIOs.zipWithIndex.foreach { + case(tracker, i) => + tracker.bits := io.inner.release.bits + tracker.valid := io.inner.release.valid && (release_idx === UInt(i)) + } + assert(!(io.inner.release.valid && !releaseMatches.orR), + "Non-voluntary release should always have a Tracker waiting for it.") + + // Wire probe requests and grant reply to clients, finish acks from clients + doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe) :+ wb.io.inner.probe) + doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant)) + doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish)) + + // Create an arbiter for the one memory port + val outerList = trackerList.map(_.io.outer) :+ wb.io.outer + val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size))(outerTLParams) + outer_arb.io.in <> outerList + io.outer <> outer_arb.io.out + + // Wire local memory arrays + doInternalOutputArbitration(io.meta.read, trackerList.map(_.io.meta.read)) + doInternalOutputArbitration(io.meta.write, trackerList.map(_.io.meta.write)) + doInternalOutputArbitration(io.data.read, trackerList.map(_.io.data.read) :+ wb.io.data.read) + doInternalOutputArbitration(io.data.write, trackerList.map(_.io.data.write)) + doInternalInputRouting(io.meta.resp, trackerList.map(_.io.meta.resp)) + doInternalInputRouting(io.data.resp, trackerList.map(_.io.data.resp) :+ wb.io.data.resp) +} + + +class L2XactTrackerIO extends HierarchicalXactTrackerIO { + val data = new L2DataRWIO + val meta = new L2MetaRWIO + val wb = new L2WritebackIO +} + +abstract class L2XactTracker extends XactTracker with L2HellaCacheParameters { + class CacheBlockBuffer { // TODO + val buffer = Reg(Bits(width = params(CacheBlockBytes)*8)) + + def internal = Vec.fill(internalDataBeats){ Bits(width = rowBits) }.fromBits(buffer) + def inner = Vec.fill(innerDataBeats){ Bits(width = innerDataBits) }.fromBits(buffer) + def outer = Vec.fill(outerDataBeats){ Bits(width = outerDataBits) }.fromBits(buffer) + } + + def connectDataBeatCounter[S <: L2HellaCacheBundle](inc: Bool, data: S, beat: UInt, full_block: Bool) = { + if(data.refillCycles > 1) { + val (multi_cnt, multi_done) = Counter(full_block && inc, data.refillCycles) + (Mux(!full_block, beat, multi_cnt), Mux(!full_block, inc, multi_done)) + } else { (UInt(0), inc) } + } + + def connectInternalDataBeatCounter[T <: HasL2BeatAddr]( + in: DecoupledIO[T], + beat: UInt = UInt(0), + full_block: Bool = Bool(true)) = { + connectDataBeatCounter(in.fire(), in.bits, beat, full_block) + } + + def connectInternalDataBeatCounter[T <: HasL2Data]( + in: ValidIO[T], + full_block: Bool = Bool(true)) = { + connectDataBeatCounter(in.valid, in.bits, UInt(0), full_block)._2 + } + + def addPendingBitInternal[T <: HasL2BeatAddr](in: DecoupledIO[T]) = + Fill(in.bits.refillCycles, in.fire()) & UIntToOH(in.bits.addr_beat) + + def addPendingBitInternal[T <: HasL2BeatAddr](in: ValidIO[T]) = + Fill(in.bits.refillCycles, in.valid) & UIntToOH(in.bits.addr_beat) + + def dropPendingBit[T <: HasL2BeatAddr] (in: DecoupledIO[T]) = + ~Fill(in.bits.refillCycles, in.fire()) | ~UIntToOH(in.bits.addr_beat) + + def dropPendingBitInternal[T <: HasL2BeatAddr] (in: ValidIO[T]) = + ~Fill(in.bits.refillCycles, in.valid) | ~UIntToOH(in.bits.addr_beat) + + def addPendingBitWhenBeatHasPartialWritemask(in: DecoupledIO[AcquireFromSrc]): UInt = { + val a = in.bits + val isPartial = a.wmask() != Acquire.fullWriteMask + addPendingBitWhenBeat(in.fire() && isPartial && Bool(ignoresWriteMask), a) + } + + def pinAllReadyValidLow[T <: Data](b: Bundle) { + b.elements.foreach { + _._2 match { + case d: DecoupledIO[T] => + if(d.ready.dir == OUTPUT) d.ready := Bool(false) + else if(d.valid.dir == OUTPUT) d.valid := Bool(false) + case v: ValidIO[T] => if(v.valid.dir == OUTPUT) v.valid := Bool(false) + case b: Bundle => pinAllReadyValidLow(b) + case _ => + } + } + } +} + +class L2VoluntaryReleaseTracker(trackerId: Int) extends L2XactTracker { + val io = new L2XactTrackerIO + pinAllReadyValidLow(io) + + val s_idle :: s_meta_read :: s_meta_resp :: s_busy :: s_meta_write :: Nil = Enum(UInt(), 5) + val state = Reg(init=s_idle) + + val xact = Reg(Bundle(new ReleaseFromSrc, { case TLId => params(InnerTLId); case TLDataBits => 0 })) + val data_buffer = Vec.fill(innerDataBeats){ Reg(init=UInt(0, width = innerDataBits)) } + val xact_way_en = Reg{ Bits(width = nWays) } + val xact_old_meta = Reg{ new L2Metadata } + val coh = xact_old_meta.coh + + val pending_irels = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_writes = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_ignt = Reg(init=Bool(false)) + + val all_pending_done = + !(pending_writes.orR || + pending_ignt) + + // Accept a voluntary Release (and any further beats of data) + pending_irels := (pending_irels & dropPendingBitWhenBeatHasData(io.inner.release)) + io.inner.release.ready := state === s_idle || pending_irels.orR + when(io.inner.release.fire()) { data_buffer(io.irel().addr_beat) := io.irel().data } + + // Begin a transaction by getting the current block metadata + io.meta.read.valid := state === s_meta_read + io.meta.read.bits.id := UInt(trackerId) + io.meta.read.bits.idx := xact.addr_block(idxMSB,idxLSB) + io.meta.read.bits.tag := xact.addr_block >> UInt(idxBits) + + // Write the voluntarily written back data to this cache + pending_writes := (pending_writes & dropPendingBit(io.data.write)) | + addPendingBitWhenBeatHasData(io.inner.release) + val curr_write_beat = PriorityEncoder(pending_writes) + io.data.write.valid := state === s_busy && pending_writes.orR + io.data.write.bits.id := UInt(trackerId) + io.data.write.bits.way_en := xact_way_en + io.data.write.bits.addr_idx := xact.addr_block(idxMSB,idxLSB) + io.data.write.bits.addr_beat := curr_write_beat + io.data.write.bits.wmask := SInt(-1) + io.data.write.bits.data := data_buffer(curr_write_beat) + + // Send an acknowledgement + io.inner.grant.valid := state === s_busy && pending_ignt && !pending_irels + io.inner.grant.bits := coh.inner.makeGrant(xact, UInt(trackerId)) + when(io.inner.grant.fire()) { pending_ignt := Bool(false) } + + // End a transaction by updating the block metadata + io.meta.write.valid := state === s_meta_write + io.meta.write.bits.id := UInt(trackerId) + io.meta.write.bits.idx := xact.addr_block(idxMSB,idxLSB) + io.meta.write.bits.way_en := xact_way_en + io.meta.write.bits.data.tag := xact.addr_block >> UInt(idxBits) + io.meta.write.bits.data.coh.inner := xact_old_meta.coh.inner.onRelease(xact) + io.meta.write.bits.data.coh.outer := Mux(xact.hasData(), + xact_old_meta.coh.outer.onHit(M_XWR), + xact_old_meta.coh.outer) + + // State machine updates and transaction handler metadata intialization + when(state === s_idle && io.inner.release.valid) { + xact := io.irel() + when(io.irel().hasMultibeatData()) { + pending_irels := dropPendingBitWhenBeatHasData(io.inner.release) + }. otherwise { + pending_irels := UInt(0) + } + pending_writes := addPendingBitWhenBeatHasData(io.inner.release) + pending_ignt := io.irel().requiresAck() + state := s_meta_read + } + when(state === s_meta_read && io.meta.read.ready) { state := s_meta_resp } + when(state === s_meta_resp && io.meta.resp.valid) { + xact_old_meta := io.meta.resp.bits.meta + xact_way_en := io.meta.resp.bits.way_en + state := s_busy + } + when(state === s_busy && all_pending_done) { state := s_meta_write } + when(state === s_meta_write && io.meta.write.ready) { state := s_idle } + + // These IOs are used for routing in the parent + io.has_release_match := io.irel().isVoluntary() + io.has_acquire_match := Bool(false) + io.has_acquire_conflict := Bool(false) + + // Checks for illegal behavior + assert(!(state === s_meta_resp && io.meta.resp.valid && !io.meta.resp.bits.tag_match), + "VoluntaryReleaseTracker accepted Release for a block not resident in this cache!") + assert(!(state === s_idle && io.inner.release.fire() && !io.irel().isVoluntary()), + "VoluntaryReleaseTracker accepted Release that wasn't voluntary!") +} + + +class L2AcquireTracker(trackerId: Int) extends L2XactTracker { + val io = new L2XactTrackerIO + pinAllReadyValidLow(io) + + val s_idle :: s_meta_read :: s_meta_resp :: s_wb_req :: s_wb_resp :: s_inner_probe :: s_outer_acquire :: s_busy :: s_meta_write :: Nil = Enum(UInt(), 9) + val state = Reg(init=s_idle) + + // State holding transaction metadata + val xact = Reg(Bundle(new AcquireFromSrc, { case TLId => params(InnerTLId) })) + val data_buffer = Vec.fill(innerDataBeats){ Reg(init=UInt(0, width = innerDataBits)) } + val wmask_buffer = Vec.fill(innerDataBeats){ Reg(init=UInt(0,width = innerDataBits/8)) } + val xact_tag_match = Reg{ Bool() } + val xact_way_en = Reg{ Bits(width = nWays) } + val xact_old_meta = Reg{ new L2Metadata } + val pending_coh = Reg{ xact_old_meta.coh.clone } + + // Secondary miss queue + val ignt_q = Module(new Queue(new SecondaryMissInfo, nSecondaryMisses))(innerTLParams) + + // State holding progress made on processing this transaction + val iacq_data_done = connectIncomingDataBeatCounter(io.inner.acquire) + val pending_irels = connectTwoWayBeatCounter( + max = io.inner.tlNCachingClients, + up = io.inner.probe, + down = io.inner.release)._1 + val (pending_ognt, oacq_data_idx, oacq_data_done, ognt_data_idx, ognt_data_done) = + connectTwoWayBeatCounter( + max = 1, + up = io.outer.acquire, + down = io.outer.grant, + beat = xact.addr_beat) + val (ignt_data_idx, ignt_data_done) = connectOutgoingDataBeatCounter(io.inner.grant, ignt_q.io.deq.bits.addr_beat) + val pending_ifins = connectTwoWayBeatCounter( + max = nSecondaryMisses, + up = io.inner.grant, + down = io.inner.finish, + track = (g: Grant) => g.requiresAck())._1 + val pending_puts = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_iprbs = Reg(init = Bits(0, width = io.inner.tlNCachingClients)) + val pending_reads = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_writes = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_resps = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_ignt_data = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_meta_write = Reg{ Bool() } + + val all_pending_done = + !(pending_reads.orR || + pending_writes.orR || + pending_resps.orR || + pending_puts.orR || + pending_ognt || + ignt_q.io.count > UInt(0) || + //pending_meta_write || // Has own state: s_meta_write + pending_ifins) + + // Provide a single ALU per tracker to merge Puts and AMOs with data being + // refilled, written back, or extant in the cache + val amoalu = Module(new AMOALU) + amoalu.io.addr := xact.addr() + amoalu.io.cmd := xact.op_code() + amoalu.io.typ := xact.op_size() + amoalu.io.lhs := io.data.resp.bits.data // default, overwritten by calls to mergeData + amoalu.io.rhs := data_buffer.head // default, overwritten by calls to mergeData + val amo_result = xact.data // Reuse xact buffer space to store AMO result + + // Utility functions for updating the data and metadata that will be kept in + // the cache or granted to the original requestor after this transaction: + + def updatePendingCohWhen(flag: Bool, next: HierarchicalMetadata) { + when(flag && pending_coh != next) { + pending_meta_write := Bool(true) + pending_coh := next + } + } + + def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) { + val old_data = incoming // Refilled, written back, or de-cached data + val new_data = data_buffer(beat) // Newly Put data is already in the buffer + amoalu.io.lhs := old_data >> xact.amo_shift_bits() + amoalu.io.rhs := new_data >> xact.amo_shift_bits() + val wmask = FillInterleaved(8, wmask_buffer(beat)) + data_buffer(beat) := ~wmask & old_data | + wmask & Mux(xact.isBuiltInType(Acquire.putAtomicType), + amoalu.io.out << xact.amo_shift_bits(), + new_data) + wmask_buffer(beat) := SInt(-1) + when(xact.is(Acquire.putAtomicType) && xact.addr_beat === beat) { amo_result := old_data } + } + def mergeDataInternal[T <: HasL2Data with HasL2BeatAddr](in: ValidIO[T]) { + when(in.valid) { mergeData(rowBits)(in.bits.addr_beat, in.bits.data) } + } + def mergeDataInner[T <: HasTileLinkData with HasTileLinkBeatId](in: DecoupledIO[T]) { + when(in.fire() && in.bits.hasData()) { + mergeData(innerDataBits)(in.bits.addr_beat, in.bits.data) + } + } + def mergeDataOuter[T <: HasTileLinkData with HasTileLinkBeatId](in: DecoupledIO[T]) { + when(in.fire() && in.bits.hasData()) { + mergeData(outerDataBits)(in.bits.addr_beat, in.bits.data) + } + } + + // Actual transaction processing logic begins here: + // + // First, take care of accpeting new requires or secondary misses + // For now, the only allowed secondary miss types are Gets-under-Get + // and Puts-under-Put from the same client + val can_merge_iacq_get = (xact.isBuiltInType(Acquire.getType) && + io.iacq().isBuiltInType(Acquire.getType)) && + xact.client_id === io.iacq().client_id && //TODO remove + xact.conflicts(io.iacq()) && + state != s_idle && state != s_meta_write && + !all_pending_done && + xact.allocate() && + !io.inner.release.fire() && + !io.outer.grant.fire() && + !io.data.resp.valid && + ignt_q.io.enq.ready + + // This logic also allows the tail beats of a PutBlock to be merged in + val can_merge_iacq_put = ((xact.isBuiltInType(Acquire.putType) && + io.iacq().isBuiltInType(Acquire.putType)) || + (xact.isBuiltInType(Acquire.putBlockType) && + io.iacq().isBuiltInType(Acquire.putBlockType))) && + xact.client_id === io.iacq().client_id && //TODO remove + xact.conflicts(io.iacq()) && + state != s_idle && state != s_meta_write && + !all_pending_done && + xact.allocate() && + !io.inner.release.fire() && + !io.outer.grant.fire() && + !io.data.resp.valid && + ignt_q.io.enq.ready + + io.inner.acquire.ready := state === s_idle || + can_merge_iacq_put || + can_merge_iacq_get + + // Enqueue secondary miss information + ignt_q.io.enq.valid := iacq_data_done + ignt_q.io.enq.bits.client_xact_id := io.iacq().client_xact_id + ignt_q.io.enq.bits.addr_beat := io.iacq().addr_beat + // TODO add ignt.dst <- iacq.src + + // Track whether any beats are missing from a PutBlock + pending_puts := (pending_puts & dropPendingBitWhenBeatHasData(io.inner.acquire)) + + // Begin a transaction by getting the current block metadata + io.meta.read.valid := state === s_meta_read + io.meta.read.bits.id := UInt(trackerId) + io.meta.read.bits.idx := xact.addr_block(idxMSB,idxLSB) + io.meta.read.bits.tag := xact.addr_block >> UInt(idxBits) + + // Issue a request to the writeback unit + io.wb.req.valid := state === s_wb_req + io.wb.req.bits.id := UInt(trackerId) + io.wb.req.bits.idx := xact.addr_block(idxMSB,idxLSB) + io.wb.req.bits.tag := xact_old_meta.tag + io.wb.req.bits.coh := xact_old_meta.coh + io.wb.req.bits.way_en := xact_way_en + + // Track which clients yet need to be probed and make Probe message + pending_iprbs := pending_iprbs & dropPendingBitAtDest(io.inner.probe) + val curr_probe_dst = PriorityEncoder(pending_iprbs) + io.inner.probe.valid := state === s_inner_probe && pending_iprbs.orR + io.inner.probe.bits := pending_coh.inner.makeProbe(curr_probe_dst, xact) + + // Handle incoming releases from clients, which may reduce sharer counts + // and/or write back dirty data + io.inner.release.ready := state === s_inner_probe + val pending_coh_on_irel = HierarchicalMetadata( + pending_coh.inner.onRelease(io.irel()), // Drop sharer + Mux(io.irel().hasData(), // Dirty writeback + pending_coh.outer.onHit(M_XWR), + pending_coh.outer)) + updatePendingCohWhen(io.inner.release.fire(), pending_coh_on_irel) + mergeDataInner(io.inner.release) + + // Handle misses or coherence permission upgrades by initiating a new transaction in the outer memory: + // + // If we're allocating in this cache, we can use the current metadata + // to make an appropriate custom Acquire, otherwise we copy over the + // built-in Acquire from the inner TL to the outer TL + io.outer.acquire.valid := state === s_outer_acquire + io.outer.acquire.bits := Mux( + xact.allocate(), + xact_old_meta.coh.outer.makeAcquire( + client_xact_id = UInt(0), + addr_block = xact.addr_block, + op_code = xact.op_code()), + Bundle(Acquire(xact))(outerTLParams)) + + // Handle the response from outer memory + io.outer.grant.ready := state === s_busy + val pending_coh_on_ognt = HierarchicalMetadata( + ManagerMetadata.onReset, + pending_coh.outer.onGrant(io.outer.grant.bits, xact.op_code())) + updatePendingCohWhen(ognt_data_done, pending_coh_on_ognt) + mergeDataOuter(io.outer.grant) + + // Going back to the original inner transaction, we can issue a Grant as + // soon as the data is released, granted, put, or read from the cache + pending_ignt_data := pending_ignt_data | + addPendingBitWhenBeatHasData(io.inner.release) | + addPendingBitWhenBeatHasData(io.outer.grant) | + addPendingBitInternal(io.data.resp) + ignt_q.io.deq.ready := ignt_data_done + io.inner.grant.valid := state === s_busy && + ignt_q.io.deq.valid && + (!io.ignt().hasData() || pending_ignt_data(ignt_data_idx)) + // Make the Grant message using the data stored in the secondary miss queue + io.inner.grant.bits := pending_coh.inner.makeGrant( + pri = xact, + sec = ignt_q.io.deq.bits, + manager_xact_id = UInt(trackerId), + data = Mux(xact.is(Acquire.putAtomicType), + amo_result, + data_buffer(ignt_data_idx))) + io.inner.grant.bits.addr_beat := ignt_data_idx // override based on outgoing counter + + val pending_coh_on_ignt = HierarchicalMetadata( + pending_coh.inner.onGrant(io.ignt()), + Mux(ognt_data_done, + pending_coh_on_ognt.outer, + pending_coh.outer)) + updatePendingCohWhen(io.inner.grant.fire(), pending_coh_on_ignt) + + // We must wait for as many Finishes as we sent Grants + io.inner.finish.ready := state === s_busy + + // We read from the the cache at this level if data wasn't written back or refilled. + // We may merge Gets, requiring further beats to be read. + // If ECC requires a full writemask, we'll read out data on partial writes as well. + pending_reads := (pending_reads & + dropPendingBit(io.data.read) & + dropPendingBitWhenBeatHasData(io.inner.release) & + dropPendingBitWhenBeatHasData(io.outer.grant)) | + addPendingBitWhenBeatIsGetOrAtomic(io.inner.acquire) | + addPendingBitWhenBeatHasPartialWritemask(io.inner.acquire) + val curr_read_beat = PriorityEncoder(pending_reads) + io.data.read.valid := state === s_busy && pending_reads.orR && !pending_ognt + io.data.read.bits.id := UInt(trackerId) + io.data.read.bits.way_en := xact_way_en + io.data.read.bits.addr_idx := xact.addr_block(idxMSB,idxLSB) + io.data.read.bits.addr_beat := curr_read_beat + + pending_resps := (pending_resps & dropPendingBitInternal(io.data.resp)) | + addPendingBitInternal(io.data.read) + mergeDataInternal(io.data.resp) + + // We write data to the cache at this level if it was Put here with allocate flag, + // written back dirty, or refilled from outer memory. + pending_writes := (pending_writes & dropPendingBit(io.data.write)) | + addPendingBitWhenBeatHasData(io.inner.acquire) | + addPendingBitWhenBeatHasData(io.inner.release) | + addPendingBitWhenBeatHasData(io.outer.grant) + val curr_write_beat = PriorityEncoder(pending_writes) + io.data.write.valid := state === s_busy && + pending_writes.orR && + !pending_ognt && + !pending_reads(curr_write_beat) && + !pending_resps(curr_write_beat) + io.data.write.bits.id := UInt(trackerId) + io.data.write.bits.way_en := xact_way_en + io.data.write.bits.addr_idx := xact.addr_block(idxMSB,idxLSB) + io.data.write.bits.addr_beat := curr_write_beat + io.data.write.bits.wmask := wmask_buffer(curr_write_beat) + io.data.write.bits.data := data_buffer(curr_write_beat) + + // End a transaction by updating the block metadata + io.meta.write.valid := state === s_meta_write + io.meta.write.bits.id := UInt(trackerId) + io.meta.write.bits.idx := xact.addr_block(idxMSB,idxLSB) + io.meta.write.bits.way_en := xact_way_en + io.meta.write.bits.data.tag := xact.addr_block >> UInt(idxBits) + io.meta.write.bits.data.coh := pending_coh + + // Handling of secondary misses (Gets and Puts only for now) + when(io.inner.acquire.fire() && io.iacq().hasData()) { // state <= s_meta_wrtie + val beat = io.iacq().addr_beat + val wmask = io.iacq().wmask() + val full = FillInterleaved(8, wmask) + data_buffer(beat) := (~full & data_buffer(beat)) | (full & io.iacq().data) + wmask_buffer(beat) := wmask | Mux(state === s_idle, Bits(0), wmask_buffer(beat)) + } + + // Defined here because of Chisel default wire demands, used in s_meta_resp + val pending_coh_on_hit = HierarchicalMetadata( + io.meta.resp.bits.meta.coh.inner, + io.meta.resp.bits.meta.coh.outer.onHit(xact.op_code())) + + // State machine updates and transaction handler metadata intialization + when(state === s_idle && io.inner.acquire.valid) { + xact := io.iacq() + xact.data := UInt(0) + pending_puts := Mux( // Make sure to collect all data from a PutBlock + io.iacq().isBuiltInType(Acquire.putBlockType), + dropPendingBitWhenBeatHasData(io.inner.acquire), + UInt(0)) + pending_reads := Mux( // GetBlocks and custom types read all beats + io.iacq().isBuiltInType(Acquire.getBlockType) || !io.iacq().isBuiltInType(), + SInt(-1, width = innerDataBeats), + (addPendingBitWhenBeatIsGetOrAtomic(io.inner.acquire) | + addPendingBitWhenBeatHasPartialWritemask(io.inner.acquire)).toUInt) + pending_writes := addPendingBitWhenBeatHasData(io.inner.acquire) + pending_resps := UInt(0) + pending_ignt_data := UInt(0) + pending_meta_write := UInt(0) + state := s_meta_read + } + when(state === s_meta_read && io.meta.read.ready) { state := s_meta_resp } + when(state === s_meta_resp && io.meta.resp.valid) { + xact_tag_match := io.meta.resp.bits.tag_match + xact_old_meta := io.meta.resp.bits.meta + xact_way_en := io.meta.resp.bits.way_en + val coh = io.meta.resp.bits.meta.coh + val tag_match = io.meta.resp.bits.tag_match + val is_hit = (if(!isLastLevelCache) tag_match && coh.outer.isHit(xact.op_code()) + else xact.isBuiltInType(Acquire.putBlockType) || + tag_match && coh.outer.isValid()) + val needs_writeback = !tag_match && + xact.allocate() && + (coh.outer.requiresVoluntaryWriteback() || + coh.inner.requiresProbesOnVoluntaryWriteback()) + val needs_inner_probes = tag_match && coh.inner.requiresProbes(xact) + when(!tag_match || is_hit && pending_coh_on_hit != coh) { pending_meta_write := Bool(true) } + pending_coh := Mux(is_hit, pending_coh_on_hit, coh) + when(needs_inner_probes) { + val full_sharers = coh.inner.full() + val mask_self = Mux( + xact.requiresSelfProbe(), + coh.inner.full() | UIntToOH(xact.client_id), + coh.inner.full() & ~UIntToOH(xact.client_id)) + val mask_incoherent = mask_self & ~io.incoherent.toBits + pending_iprbs := mask_incoherent + } + state := Mux(needs_writeback, s_wb_req, + Mux(needs_inner_probes, s_inner_probe, + Mux(!is_hit, s_outer_acquire, s_busy))) + } + when(state === s_wb_req && io.wb.req.ready) { state := s_wb_resp } + when(state === s_wb_resp && io.wb.resp.valid) { + // If we're overwriting the whole block in a last level cache we can + // just do it without fetching any data from memory + val skip_outer_acquire = Bool(isLastLevelCache) && xact.isBuiltInType(Acquire.putBlockType) + state := Mux(!skip_outer_acquire, s_outer_acquire, s_busy) + } + when(state === s_inner_probe && !(pending_iprbs.orR || pending_irels)) { + // Tag matches, so if this is the last level cache we can use the data without upgrading permissions + val skip_outer_acquire = + (if(!isLastLevelCache) xact_old_meta.coh.outer.isHit(xact.op_code()) + else xact.isBuiltInType(Acquire.putBlockType) || xact_old_meta.coh.outer.isValid()) + state := Mux(!skip_outer_acquire, s_outer_acquire, s_busy) + } + when(state === s_outer_acquire && oacq_data_done) { state := s_busy } + when(state === s_busy && all_pending_done) { state := s_meta_write } + when(state === s_meta_write && (io.meta.write.ready || !pending_meta_write)) { + wmask_buffer.foreach { w => w := UInt(0) } + state := s_idle + } + + // These IOs are used for routing in the parent + val in_same_set = xact.addr_block(idxMSB,idxLSB) === io.iacq().addr_block(idxMSB,idxLSB) + io.has_release_match := xact.conflicts(io.irel()) && !io.irel().isVoluntary() && io.inner.release.ready + io.has_acquire_match := can_merge_iacq_put || can_merge_iacq_get + io.has_acquire_conflict := in_same_set && (state != s_idle) && !io.has_acquire_match + //TODO: relax from in_same_set to xact.conflicts(io.iacq())? + + // Checks for illegal behavior + assert(!(state != s_idle && io.inner.acquire.fire() && + io.inner.acquire.bits.client_id != xact.client_id), + "AcquireTracker accepted data beat from different network source than initial request.") +} + +class L2WritebackReq extends L2Metadata with HasL2Id { + val idx = Bits(width = idxBits) + val way_en = Bits(width = nWays) +} + +class L2WritebackResp extends L2HellaCacheBundle with HasL2Id + +class L2WritebackIO extends L2HellaCacheBundle { + val req = Decoupled(new L2WritebackReq) + val resp = Valid(new L2WritebackResp).flip +} + +class L2WritebackUnitIO extends HierarchicalXactTrackerIO { + val wb = new L2WritebackIO().flip + val data = new L2DataRWIO +} + +class L2WritebackUnit(trackerId: Int) extends L2XactTracker { + val io = new L2WritebackUnitIO + pinAllReadyValidLow(io) + + val s_idle :: s_inner_probe :: s_busy :: s_outer_grant :: s_wb_resp :: Nil = Enum(UInt(), 5) + val state = Reg(init=s_idle) + + val xact = Reg(new L2WritebackReq) + val data_buffer = Vec.fill(innerDataBeats){ Reg(init=UInt(0, width = innerDataBits)) } + val xact_addr_block = Cat(xact.tag, xact.idx) + + val pending_irels = + connectTwoWayBeatCounter(max = io.inner.tlNCachingClients, up = io.inner.probe, down = io.inner.release)._1 + val (pending_ognt, orel_data_idx, orel_data_done, ognt_data_idx, ognt_data_done) = + connectTwoWayBeatCounter(max = 1, up = io.outer.release, down = io.outer.grant) + val pending_iprbs = Reg(init = Bits(0, width = io.inner.tlNCachingClients)) + val pending_reads = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_resps = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_orel_data = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + + // Start the writeback sub-transaction + io.wb.req.ready := state === s_idle + + // Track which clients yet need to be probed and make Probe message + pending_iprbs := pending_iprbs & dropPendingBitAtDest(io.inner.probe) + val curr_probe_dst = PriorityEncoder(pending_iprbs) + io.inner.probe.valid := state === s_inner_probe && pending_iprbs.orR + io.inner.probe.bits := xact.coh.inner.makeProbeForVoluntaryWriteback(curr_probe_dst, xact_addr_block) + + // Handle incoming releases from clients, which may reduce sharer counts + // and/or write back dirty data + val inner_coh_on_irel = xact.coh.inner.onRelease(io.irel()) + val outer_coh_on_irel = xact.coh.outer.onHit(M_XWR) + io.inner.release.ready := state === s_inner_probe || state === s_busy + when(io.inner.release.fire()) { + xact.coh.inner := inner_coh_on_irel + when(io.irel().hasData()) { xact.coh.outer := outer_coh_on_irel } // WB is a write + data_buffer(io.inner.release.bits.addr_beat) := io.inner.release.bits.data + } + + // If a release didn't write back data, have to read it from data array + pending_reads := (pending_reads & + dropPendingBit(io.data.read) & + dropPendingBitWhenBeatHasData(io.inner.release)) + val curr_read_beat = PriorityEncoder(pending_reads) + io.data.read.valid := state === s_busy && pending_reads.orR + io.data.read.bits.id := UInt(trackerId) + io.data.read.bits.way_en := xact.way_en + io.data.read.bits.addr_idx := xact.idx + io.data.read.bits.addr_beat := curr_read_beat + io.data.write.valid := Bool(false) + + pending_resps := (pending_resps & dropPendingBitInternal(io.data.resp)) | + addPendingBitInternal(io.data.read) + when(io.data.resp.valid) { + data_buffer(io.data.resp.bits.addr_beat) := io.data.resp.bits.data + } + + // Once the data is buffered we can write it back to outer memory + pending_orel_data := pending_orel_data | + addPendingBitWhenBeatHasData(io.inner.release) | + addPendingBitInternal(io.data.resp) + io.outer.release.valid := state === s_busy && + (!io.orel().hasData() || pending_orel_data(orel_data_idx)) + io.outer.release.bits := xact.coh.outer.makeVoluntaryWriteback( + client_xact_id = UInt(trackerId), + addr_block = xact_addr_block, + addr_beat = orel_data_idx, + data = data_buffer(orel_data_idx)) + + // Wait for an acknowledgement + io.outer.grant.ready := state === s_outer_grant + + // Respond to the initiating transaction handler signalling completion of the writeback + io.wb.resp.valid := state === s_wb_resp + io.wb.resp.bits.id := xact.id + + // State machine updates and transaction handler metadata intialization + when(state === s_idle && io.wb.req.valid) { + xact := io.wb.req.bits + val coh = io.wb.req.bits.coh + val needs_inner_probes = coh.inner.requiresProbesOnVoluntaryWriteback() + when(needs_inner_probes) { pending_iprbs := coh.inner.full() & ~io.incoherent.toBits } + pending_reads := SInt(-1, width = innerDataBeats) + pending_resps := UInt(0) + pending_orel_data := UInt(0) + state := Mux(needs_inner_probes, s_inner_probe, s_busy) + } + when(state === s_inner_probe && !(pending_iprbs.orR || pending_irels)) { + state := Mux(xact.coh.outer.requiresVoluntaryWriteback(), s_busy, s_wb_resp) + } + when(state === s_busy && orel_data_done) { + state := Mux(io.orel().requiresAck(), s_outer_grant, s_wb_resp) + } + when(state === s_outer_grant && ognt_data_done) { state := s_wb_resp } + when(state === s_wb_resp ) { state := s_idle } + + // These IOs are used for routing in the parent + io.has_release_match := io.irel().conflicts(xact_addr_block) && !io.irel().isVoluntary() && io.inner.release.ready + io.has_acquire_match := Bool(false) + io.has_acquire_conflict := Bool(false) +} diff --git a/uncore/src/main/scala/coherence.scala b/uncore/src/main/scala/coherence.scala new file mode 100644 index 00000000..862eb484 --- /dev/null +++ b/uncore/src/main/scala/coherence.scala @@ -0,0 +1,688 @@ +// See LICENSE for license details. + +package uncore +import Chisel._ + +/** The entire CoherencePolicy API consists of the following three traits: + * HasCustomTileLinkMessageTypes, used to define custom messages + * HasClientSideCoherencePolicy, for client coherence agents + * HasManagerSideCoherencePolicy, for manager coherence agents + */ +abstract class CoherencePolicy(val dir: DirectoryRepresentation) extends + HasCustomTileLinkMessageTypes with + HasClientSideCoherencePolicy with + HasManagerSideCoherencePolicy + +/** This API defines the custom, coherence-policy-defined message types, + * as opposed to the built-in ones found in tilelink.scala. + * Policies must enumerate the custom messages to be sent over each + * channel, as well as which of them have associated data. + */ +trait HasCustomTileLinkMessageTypes { + val nAcquireTypes: Int + def acquireTypeWidth = log2Up(nAcquireTypes) + val nProbeTypes: Int + def probeTypeWidth = log2Up(nProbeTypes) + val nReleaseTypes: Int + def releaseTypeWidth = log2Up(nReleaseTypes) + val nGrantTypes: Int + def grantTypeWidth = log2Up(nGrantTypes) + + val acquireTypesWithData = Nil // Only built-in Acquire types have data for now + val releaseTypesWithData: Vec[UInt] + val grantTypesWithData: Vec[UInt] +} + +/** This API contains all functions required for client coherence agents. + * Policies must enumerate the number of client states and define their + * permissions with respect to memory operations. Policies must fill in functions + * to control which messages are sent and how metadata is updated in response + * to coherence events. These funtions are generally called from within the + * ClientMetadata class in metadata.scala + */ +trait HasClientSideCoherencePolicy { + // Client coherence states and their permissions + val nClientStates: Int + def clientStateWidth = log2Ceil(nClientStates) + val clientStatesWithReadPermission: Vec[UInt] + val clientStatesWithWritePermission: Vec[UInt] + val clientStatesWithDirtyData: Vec[UInt] + + // Transaction initiation logic + def isValid(meta: ClientMetadata): Bool + def isHit(cmd: UInt, meta: ClientMetadata): Bool = { + Mux(isWriteIntent(cmd), + clientStatesWithWritePermission.contains(meta.state), + clientStatesWithReadPermission.contains(meta.state)) + } + //TODO: Assumes all states with write permissions also have read permissions + def requiresAcquireOnSecondaryMiss( + first_cmd: UInt, + second_cmd: UInt, + meta: ClientMetadata): Bool = { + isWriteIntent(second_cmd) && !isWriteIntent(first_cmd) + } + //TODO: Assumes all cache ctrl ops writeback dirty data, and + // doesn't issue transaction when e.g. downgrading Exclusive to Shared: + def requiresReleaseOnCacheControl(cmd: UInt, meta: ClientMetadata): Bool = + clientStatesWithDirtyData.contains(meta.state) + + // Determine which custom message type to use + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt + def getReleaseType(p: Probe, meta: ClientMetadata): UInt + + // Mutate ClientMetadata based on messages or cmds + def clientMetadataOnReset: ClientMetadata + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata): ClientMetadata + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata): ClientMetadata + def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata): ClientMetadata + def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata): ClientMetadata +} + +/** This API contains all functions required for manager coherence agents. + * Policies must enumerate the number of manager states. Policies must fill + * in functions to control which Probe and Grant messages are sent and how + * metadata should be updated in response to coherence events. These funtions + * are generally called from within the ManagerMetadata class in metadata.scala + */ +trait HasManagerSideCoherencePolicy extends HasDirectoryRepresentation { + val nManagerStates: Int + def masterStateWidth = log2Ceil(nManagerStates) + + // Transaction probing logic + def requiresProbes(acq: Acquire, meta: ManagerMetadata): Bool + def requiresProbes(cmd: UInt, meta: ManagerMetadata): Bool + + // Determine which custom message type to use in response + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt + def getProbeType(acq: Acquire, meta: ManagerMetadata): UInt + def getGrantType(acq: Acquire, meta: ManagerMetadata): UInt + + // Mutate ManagerMetadata based on messages or cmds + def managerMetadataOnReset: ManagerMetadata + def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata): ManagerMetadata + def managerMetadataOnGrant(outgoing: Grant, dst: UInt, meta: ManagerMetadata) = + ManagerMetadata(sharers=Mux(outgoing.isBuiltInType(), // Assumes all built-ins are uncached + meta.sharers, + dir.push(meta.sharers, dst))) + //state = meta.state) TODO: Fix 0-width wires in Chisel +} + +/** The following concrete implementations of CoherencePolicy each provide the + * functionality of one particular protocol. + */ + +/** A simple protocol with only two Client states. + * Data is always assumed to be dirty. + * Only a single client may ever have a copy of a block at a time. + */ +class MICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { + // Message types + val nAcquireTypes = 1 + val nProbeTypes = 2 + val nReleaseTypes = 4 + val nGrantTypes = 1 + + val acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes) + val probeInvalidate :: probeCopy :: Nil = Enum(UInt(), nProbeTypes) + val releaseInvalidateData :: releaseCopyData :: releaseInvalidateAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes) + val grantExclusive :: Nil = Enum(UInt(), nGrantTypes) + + val releaseTypesWithData = Vec(releaseInvalidateData, releaseCopyData) + val grantTypesWithData = Vec(grantExclusive) + + // Client states and functions + val nClientStates = 2 + val clientInvalid :: clientValid :: Nil = Enum(UInt(), nClientStates) + + val clientStatesWithReadPermission = Vec(clientValid) + val clientStatesWithWritePermission = Vec(clientValid) + val clientStatesWithDirtyData = Vec(clientValid) + + def isValid (meta: ClientMetadata): Bool = meta.state != clientInvalid + + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = acquireExclusive + + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = { + val dirty = clientStatesWithDirtyData.contains(meta.state) + MuxLookup(cmd, releaseCopyAck, Array( + M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck), + M_PRODUCE -> Mux(dirty, releaseCopyData, releaseCopyAck), + M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck))) + } + + def getReleaseType(incoming: Probe, meta: ClientMetadata): UInt = + MuxLookup(incoming.p_type, releaseInvalidateAck, Array( + probeInvalidate -> getReleaseType(M_FLUSH, meta), + probeCopy -> getReleaseType(M_CLEAN, meta))) + + def clientMetadataOnReset = ClientMetadata(clientInvalid) + + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = meta + + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) = + ClientMetadata(Mux(cmd === M_FLUSH, clientInvalid, meta.state)) + + def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata) = + ClientMetadata(Mux(incoming.isBuiltInType(), clientInvalid, clientValid)) + + def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata) = + ClientMetadata(Mux(incoming.p_type === probeInvalidate, + clientInvalid, meta.state)) + + // Manager states and functions: + val nManagerStates = 0 // We don't actually need any states for this protocol + + def requiresProbes(a: Acquire, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt = + MuxLookup(cmd, probeCopy, Array( + M_FLUSH -> probeInvalidate)) + + def getProbeType(a: Acquire, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), + MuxLookup(a.a_type, probeCopy, Array( + Acquire.getBlockType -> probeCopy, + Acquire.putBlockType -> probeInvalidate, + Acquire.getType -> probeCopy, + Acquire.putType -> probeInvalidate, + Acquire.putAtomicType -> probeInvalidate)), + probeInvalidate) + + def getGrantType(a: Acquire, meta: ManagerMetadata): UInt = grantExclusive + + def managerMetadataOnReset = ManagerMetadata() + + def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata) = { + val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src)) + MuxBundle(meta, Array( + incoming.is(releaseInvalidateData) -> popped, + incoming.is(releaseInvalidateAck) -> popped)) + } +} + +/** A simple protocol with only three Client states. + * Data is marked as dirty when written. + * Only a single client may ever have a copy of a block at a time. + */ +class MEICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { + // Message types + val nAcquireTypes = 2 + val nProbeTypes = 3 + val nReleaseTypes = 6 + val nGrantTypes = 1 + + val acquireExclusiveClean :: acquireExclusiveDirty :: Nil = Enum(UInt(), nAcquireTypes) + val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes) + val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes) + val grantExclusive :: Nil = Enum(UInt(), nGrantTypes) + + val releaseTypesWithData = Vec(releaseInvalidateData, releaseDowngradeData, releaseCopyData) + val grantTypesWithData = Vec(grantExclusive) + + // Client states and functions + val nClientStates = 3 + val clientInvalid :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates) + + val clientStatesWithReadPermission = Vec(clientExclusiveClean, clientExclusiveDirty) + val clientStatesWithWritePermission = Vec(clientExclusiveClean, clientExclusiveDirty) + val clientStatesWithDirtyData = Vec(clientExclusiveDirty) + + def isValid (meta: ClientMetadata) = meta.state != clientInvalid + + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = + Mux(isWriteIntent(cmd), acquireExclusiveDirty, acquireExclusiveClean) + + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = { + val dirty = clientStatesWithDirtyData.contains(meta.state) + MuxLookup(cmd, releaseCopyAck, Array( + M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck), + M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck), + M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck))) + } + + def getReleaseType(incoming: Probe, meta: ClientMetadata): UInt = + MuxLookup(incoming.p_type, releaseInvalidateAck, Array( + probeInvalidate -> getReleaseType(M_FLUSH, meta), + probeDowngrade -> getReleaseType(M_PRODUCE, meta), + probeCopy -> getReleaseType(M_CLEAN, meta))) + + def clientMetadataOnReset = ClientMetadata(clientInvalid) + + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = + ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state)) + + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(cmd, meta.state, Array( + M_FLUSH -> clientInvalid, + M_CLEAN -> Mux(meta.state === clientExclusiveDirty, clientExclusiveClean, meta.state)))) + + def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + Mux(incoming.isBuiltInType(), clientInvalid, + Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean))) + + def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(incoming.p_type, meta.state, Array( + probeInvalidate -> clientInvalid, + probeDowngrade -> clientExclusiveClean, + probeCopy -> meta.state))) + + // Manager states and functions: + val nManagerStates = 0 // We don't actually need any states for this protocol + + def requiresProbes(a: Acquire, meta: ManagerMetadata) = !dir.none(meta.sharers) + def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt = + MuxLookup(cmd, probeCopy, Array( + M_FLUSH -> probeInvalidate, + M_PRODUCE -> probeDowngrade)) + + def getProbeType(a: Acquire, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), + MuxLookup(a.a_type, probeCopy, Array( + Acquire.getBlockType -> probeCopy, + Acquire.putBlockType -> probeInvalidate, + Acquire.getType -> probeCopy, + Acquire.putType -> probeInvalidate, + Acquire.putAtomicType -> probeInvalidate)), + probeInvalidate) + + def getGrantType(a: Acquire, meta: ManagerMetadata): UInt = grantExclusive + + def managerMetadataOnReset = ManagerMetadata() + + def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata) = { + val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src)) + MuxBundle(meta, Array( + incoming.is(releaseInvalidateData) -> popped, + incoming.is(releaseInvalidateAck) -> popped)) + } +} + +/** A protocol with only three Client states. + * Data is always assumed to be dirty. + * Multiple clients may share read permissions on a block at the same time. + */ +class MSICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { + // Message types + val nAcquireTypes = 2 + val nProbeTypes = 3 + val nReleaseTypes = 6 + val nGrantTypes = 3 + + val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes) + val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes) + val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes) + val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes) + + val releaseTypesWithData = Vec(releaseInvalidateData, releaseDowngradeData, releaseCopyData) + val grantTypesWithData = Vec(grantShared, grantExclusive) + + // Client states and functions + val nClientStates = 3 + val clientInvalid :: clientShared :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates) + + val clientStatesWithReadPermission = Vec(clientShared, clientExclusiveDirty) + val clientStatesWithWritePermission = Vec(clientExclusiveDirty) + val clientStatesWithDirtyData = Vec(clientExclusiveDirty) + + def isValid(meta: ClientMetadata): Bool = meta.state != clientInvalid + + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = + Mux(isWriteIntent(cmd), acquireExclusive, acquireShared) + + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = { + val dirty = clientStatesWithDirtyData.contains(meta.state) + MuxLookup(cmd, releaseCopyAck, Array( + M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck), + M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck), + M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck))) + } + + def getReleaseType(incoming: Probe, meta: ClientMetadata): UInt = + MuxLookup(incoming.p_type, releaseInvalidateAck, Array( + probeInvalidate -> getReleaseType(M_FLUSH, meta), + probeDowngrade -> getReleaseType(M_PRODUCE, meta), + probeCopy -> getReleaseType(M_CLEAN, meta))) + + def clientMetadataOnReset = ClientMetadata(clientInvalid) + + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = + ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state)) + + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(cmd, meta.state, Array( + M_FLUSH -> clientInvalid, + M_PRODUCE -> Mux(clientStatesWithWritePermission.contains(meta.state), + clientShared, meta.state)))) + + def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + Mux(incoming.isBuiltInType(), clientInvalid, + MuxLookup(incoming.g_type, clientInvalid, Array( + grantShared -> clientShared, + grantExclusive -> clientExclusiveDirty, + grantExclusiveAck -> clientExclusiveDirty)))) + + def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(incoming.p_type, meta.state, Array( + probeInvalidate -> clientInvalid, + probeDowngrade -> clientShared, + probeCopy -> meta.state))) + + // Manager states and functions: + val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing + // only a single sharer (also would need + // notification msg to track clean drops) + // Also could avoid probes on outer WBs. + + def requiresProbes(a: Acquire, meta: ManagerMetadata) = + Mux(dir.none(meta.sharers), Bool(false), + Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive + Mux(a.isBuiltInType(), a.hasData(), a.a_type != acquireShared))) + + def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt = + MuxLookup(cmd, probeCopy, Array( + M_FLUSH -> probeInvalidate, + M_PRODUCE -> probeDowngrade)) + + def getProbeType(a: Acquire, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), + MuxLookup(a.a_type, probeCopy, Array( + Acquire.getBlockType -> probeCopy, + Acquire.putBlockType -> probeInvalidate, + Acquire.getType -> probeCopy, + Acquire.putType -> probeInvalidate, + Acquire.putAtomicType -> probeInvalidate)), + MuxLookup(a.a_type, probeCopy, Array( + acquireShared -> probeDowngrade, + acquireExclusive -> probeInvalidate))) + + def getGrantType(a: Acquire, meta: ManagerMetadata): UInt = + Mux(a.a_type === acquireShared, + Mux(!dir.none(meta.sharers), grantShared, grantExclusive), + grantExclusive) + + def managerMetadataOnReset = ManagerMetadata() + + def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata) = { + val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src)) + MuxBundle(meta, Array( + incoming.is(releaseInvalidateData) -> popped, + incoming.is(releaseInvalidateAck) -> popped)) + } +} + +/** A protocol with four Client states. + * Data is marked as dirty when written. + * Multiple clients may share read permissions on a block at the same time. + */ +class MESICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { + // Message types + val nAcquireTypes = 2 + val nProbeTypes = 3 + val nReleaseTypes = 6 + val nGrantTypes = 3 + + val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes) + val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes) + val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes) + val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes) + + val releaseTypesWithData = Vec(releaseInvalidateData, releaseDowngradeData, releaseCopyData) + val grantTypesWithData = Vec(grantShared, grantExclusive) + + // Client states and functions + val nClientStates = 4 + val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates) + + val clientStatesWithReadPermission = Vec(clientShared, clientExclusiveClean, clientExclusiveDirty) + val clientStatesWithWritePermission = Vec(clientExclusiveClean, clientExclusiveDirty) + val clientStatesWithDirtyData = Vec(clientExclusiveDirty) + + def isValid (meta: ClientMetadata): Bool = meta.state != clientInvalid + + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = + Mux(isWriteIntent(cmd), acquireExclusive, acquireShared) + + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = { + val dirty = clientStatesWithDirtyData.contains(meta.state) + MuxLookup(cmd, releaseCopyAck, Array( + M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck), + M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck), + M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck))) + } + + def getReleaseType(incoming: Probe, meta: ClientMetadata): UInt = + MuxLookup(incoming.p_type, releaseInvalidateAck, Array( + probeInvalidate -> getReleaseType(M_FLUSH, meta), + probeDowngrade -> getReleaseType(M_PRODUCE, meta), + probeCopy -> getReleaseType(M_CLEAN, meta))) + + def clientMetadataOnReset = ClientMetadata(clientInvalid) + + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = + ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state)) + + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(cmd, meta.state, Array( + M_FLUSH -> clientInvalid, + M_PRODUCE -> Mux(clientStatesWithWritePermission.contains(meta.state), + clientShared, meta.state), + M_CLEAN -> Mux(meta.state === clientExclusiveDirty, clientExclusiveClean, meta.state)))) + + def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + Mux(incoming.isBuiltInType(), clientInvalid, + MuxLookup(incoming.g_type, clientInvalid, Array( + grantShared -> clientShared, + grantExclusive -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean), + grantExclusiveAck -> clientExclusiveDirty)))) + + def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(incoming.p_type, meta.state, Array( + probeInvalidate -> clientInvalid, + probeDowngrade -> clientShared, + probeCopy -> meta.state))) + + // Manager states and functions: + val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing + // only a single sharer (also would need + // notification msg to track clean drops) + // Also could avoid probes on outer WBs. + + def requiresProbes(a: Acquire, meta: ManagerMetadata) = + Mux(dir.none(meta.sharers), Bool(false), + Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive + Mux(a.isBuiltInType(), a.hasData(), a.a_type != acquireShared))) + + def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt = + MuxLookup(cmd, probeCopy, Array( + M_FLUSH -> probeInvalidate, + M_PRODUCE -> probeDowngrade)) + + def getProbeType(a: Acquire, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), + MuxLookup(a.a_type, probeCopy, Array( + Acquire.getBlockType -> probeCopy, + Acquire.putBlockType -> probeInvalidate, + Acquire.getType -> probeCopy, + Acquire.putType -> probeInvalidate, + Acquire.putAtomicType -> probeInvalidate)), + MuxLookup(a.a_type, probeCopy, Array( + acquireShared -> probeDowngrade, + acquireExclusive -> probeInvalidate))) + + def getGrantType(a: Acquire, meta: ManagerMetadata): UInt = + Mux(a.a_type === acquireShared, + Mux(!dir.none(meta.sharers), grantShared, grantExclusive), + grantExclusive) + + def managerMetadataOnReset = ManagerMetadata() + + def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata) = { + val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src)) + MuxBundle(meta, Array( + incoming.is(releaseInvalidateData) -> popped, + incoming.is(releaseInvalidateAck) -> popped)) + } +} + +class MigratoryCoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { + // Message types + val nAcquireTypes = 3 + val nProbeTypes = 4 + val nReleaseTypes = 10 + val nGrantTypes = 4 + + val acquireShared :: acquireExclusive :: acquireInvalidateOthers :: Nil = Enum(UInt(), nAcquireTypes) + val probeInvalidate :: probeDowngrade :: probeCopy :: probeInvalidateOthers :: Nil = Enum(UInt(), nProbeTypes) + val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: releaseDowngradeDataMigratory :: releaseDowngradeAckHasCopy :: releaseInvalidateDataMigratory :: releaseInvalidateAckMigratory :: Nil = Enum(UInt(), nReleaseTypes) + val grantShared :: grantExclusive :: grantExclusiveAck :: grantReadMigratory :: Nil = Enum(UInt(), nGrantTypes) + + val releaseTypesWithData = Vec(releaseInvalidateData, releaseDowngradeData, releaseCopyData, releaseInvalidateDataMigratory, releaseDowngradeDataMigratory) + val grantTypesWithData = Vec(grantShared, grantExclusive, grantReadMigratory) + + // Client states and functions + val nClientStates = 7 + val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: clientSharedByTwo :: clientMigratoryClean :: clientMigratoryDirty :: Nil = Enum(UInt(), nClientStates) + + val clientStatesWithReadPermission = Vec(clientShared, clientExclusiveClean, clientExclusiveDirty, clientSharedByTwo, clientMigratoryClean, clientMigratoryDirty) + val clientStatesWithWritePermission = Vec(clientExclusiveClean, clientExclusiveDirty, clientMigratoryClean, clientMigratoryDirty) + val clientStatesWithDirtyData = Vec(clientExclusiveDirty, clientMigratoryDirty) + + def isValid (meta: ClientMetadata): Bool = meta.state != clientInvalid + + def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = + Mux(isWriteIntent(cmd), + Mux(meta.state === clientInvalid, acquireExclusive, acquireInvalidateOthers), + acquireShared) + + def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = { + val dirty = clientStatesWithDirtyData.contains(meta.state) + MuxLookup(cmd, releaseCopyAck, Array( + M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck), + M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck), + M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck))) + } + + def getReleaseType(incoming: Probe, meta: ClientMetadata): UInt = { + val dirty = clientStatesWithDirtyData.contains(meta.state) + val with_data = MuxLookup(incoming.p_type, releaseInvalidateData, Array( + probeInvalidate -> Mux(Vec(clientExclusiveDirty, clientMigratoryDirty).contains(meta.state), + releaseInvalidateDataMigratory, releaseInvalidateData), + probeDowngrade -> Mux(meta.state === clientMigratoryDirty, + releaseDowngradeDataMigratory, releaseDowngradeData), + probeCopy -> releaseCopyData)) + val without_data = MuxLookup(incoming.p_type, releaseInvalidateAck, Array( + probeInvalidate -> Mux(clientExclusiveClean === meta.state, + releaseInvalidateAckMigratory, releaseInvalidateAck), + probeInvalidateOthers -> Mux(clientSharedByTwo === meta.state, + releaseInvalidateAckMigratory, releaseInvalidateAck), + probeDowngrade -> Mux(meta.state != clientInvalid, + releaseDowngradeAckHasCopy, releaseDowngradeAck), + probeCopy -> releaseCopyAck)) + Mux(dirty, with_data, without_data) + } + + def clientMetadataOnReset = ClientMetadata(clientInvalid) + + def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + Mux(isWrite(cmd), MuxLookup(meta.state, clientExclusiveDirty, Array( + clientExclusiveClean -> clientExclusiveDirty, + clientMigratoryClean -> clientMigratoryDirty)), + meta.state)) + + def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(cmd, meta.state, Array( + M_FLUSH -> clientInvalid, + M_PRODUCE -> Mux(clientStatesWithWritePermission.contains(meta.state), + clientShared, meta.state), + M_CLEAN -> MuxLookup(meta.state, meta.state, Array( + clientExclusiveDirty -> clientExclusiveClean, + clientMigratoryDirty -> clientMigratoryClean))))) + + def clientMetadataOnGrant(incoming: Grant, cmd: UInt, meta: ClientMetadata) = + ClientMetadata( + Mux(incoming.isBuiltInType(), clientInvalid, + MuxLookup(incoming.g_type, clientInvalid, Array( + grantShared -> clientShared, + grantExclusive -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean), + grantExclusiveAck -> clientExclusiveDirty, + grantReadMigratory -> Mux(isWrite(cmd), clientMigratoryDirty, clientMigratoryClean))))) + + def clientMetadataOnProbe(incoming: Probe, meta: ClientMetadata) = + ClientMetadata( + MuxLookup(incoming.p_type, meta.state, Array( + probeInvalidate -> clientInvalid, + probeInvalidateOthers -> clientInvalid, + probeCopy -> meta.state, + probeDowngrade -> MuxLookup(meta.state, clientShared, Array( + clientExclusiveClean -> clientSharedByTwo, + clientExclusiveDirty -> clientSharedByTwo, + clientSharedByTwo -> clientShared, + clientMigratoryClean -> clientSharedByTwo, + clientMigratoryDirty -> clientInvalid))))) + + // Manager states and functions: + val nManagerStates = 0 // TODO: we could add some states to reduce the number of message types + + def requiresProbes(a: Acquire, meta: ManagerMetadata) = + Mux(dir.none(meta.sharers), Bool(false), + Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive + Mux(a.isBuiltInType(), a.hasData(), a.a_type != acquireShared))) + + def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers) + + def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt = + MuxLookup(cmd, probeCopy, Array( + M_FLUSH -> probeInvalidate, + M_PRODUCE -> probeDowngrade)) + + def getProbeType(a: Acquire, meta: ManagerMetadata): UInt = + Mux(a.isBuiltInType(), + MuxLookup(a.a_type, probeCopy, Array( + Acquire.getBlockType -> probeCopy, + Acquire.putBlockType -> probeInvalidate, + Acquire.getType -> probeCopy, + Acquire.putType -> probeInvalidate, + Acquire.putAtomicType -> probeInvalidate)), + MuxLookup(a.a_type, probeCopy, Array( + acquireShared -> probeDowngrade, + acquireExclusive -> probeInvalidate, + acquireInvalidateOthers -> probeInvalidateOthers))) + + def getGrantType(a: Acquire, meta: ManagerMetadata): UInt = + MuxLookup(a.a_type, grantShared, Array( + acquireShared -> Mux(!dir.none(meta.sharers), grantShared, grantExclusive), + acquireExclusive -> grantExclusive, + acquireInvalidateOthers -> grantExclusiveAck)) //TODO: add this to MESI for broadcast? + + def managerMetadataOnReset = ManagerMetadata() + + def managerMetadataOnRelease(incoming: Release, src: UInt, meta: ManagerMetadata) = { + val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src)) + MuxBundle(meta, Array( + incoming.is(releaseInvalidateData) -> popped, + incoming.is(releaseInvalidateAck) -> popped, + incoming.is(releaseInvalidateDataMigratory) -> popped, + incoming.is(releaseInvalidateAckMigratory) -> popped)) + } +} diff --git a/uncore/src/main/scala/consts.scala b/uncore/src/main/scala/consts.scala new file mode 100644 index 00000000..69cdc41f --- /dev/null +++ b/uncore/src/main/scala/consts.scala @@ -0,0 +1,48 @@ +// See LICENSE for license details. + +package uncore +package constants + +import Chisel._ + +object MemoryOpConstants extends MemoryOpConstants +trait MemoryOpConstants { + val MT_SZ = 3 + val MT_X = Bits("b???") + val MT_B = Bits("b000") + val MT_H = Bits("b001") + val MT_W = Bits("b010") + val MT_D = Bits("b011") + val MT_BU = Bits("b100") + val MT_HU = Bits("b101") + val MT_WU = Bits("b110") + + val NUM_XA_OPS = 9 + val M_SZ = 5 + val M_X = Bits("b?????"); + val M_XRD = Bits("b00000"); // int load + val M_XWR = Bits("b00001"); // int store + val M_PFR = Bits("b00010"); // prefetch with intent to read + val M_PFW = Bits("b00011"); // prefetch with intent to write + val M_XA_SWAP = Bits("b00100"); + val M_NOP = Bits("b00101"); + val M_XLR = Bits("b00110"); + val M_XSC = Bits("b00111"); + val M_XA_ADD = Bits("b01000"); + val M_XA_XOR = Bits("b01001"); + val M_XA_OR = Bits("b01010"); + val M_XA_AND = Bits("b01011"); + val M_XA_MIN = Bits("b01100"); + val M_XA_MAX = Bits("b01101"); + val M_XA_MINU = Bits("b01110"); + val M_XA_MAXU = Bits("b01111"); + val M_FLUSH = Bits("b10000") // write back dirty data and cede R/W permissions + val M_PRODUCE = Bits("b10001") // write back dirty data and cede W permissions + val M_CLEAN = Bits("b10011") // write back dirty data and retain R/W permissions + + def isAMO(cmd: Bits) = cmd(3) || cmd === M_XA_SWAP + def isPrefetch(cmd: Bits) = cmd === M_PFR || cmd === M_PFW + def isRead(cmd: Bits) = cmd === M_XRD || cmd === M_XLR || isAMO(cmd) + def isWrite(cmd: Bits) = cmd === M_XWR || cmd === M_XSC || isAMO(cmd) + def isWriteIntent(cmd: Bits) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR +} diff --git a/uncore/src/main/scala/directory.scala b/uncore/src/main/scala/directory.scala new file mode 100644 index 00000000..db555ad3 --- /dev/null +++ b/uncore/src/main/scala/directory.scala @@ -0,0 +1,43 @@ +// See LICENSE for license details. + +package uncore +import Chisel._ + +// This class encapsulates transformations on different directory information +// storage formats +abstract class DirectoryRepresentation(val width: Int) { + def pop(prev: UInt, id: UInt): UInt + def push(prev: UInt, id: UInt): UInt + def flush: UInt + def none(s: UInt): Bool + def one(s: UInt): Bool + def count(s: UInt): UInt + def next(s: UInt): UInt + def full(s: UInt): UInt +} + +abstract trait HasDirectoryRepresentation { + val dir: DirectoryRepresentation +} + +class NullRepresentation(nClients: Int) extends DirectoryRepresentation(1) { + def pop(prev: UInt, id: UInt) = UInt(0) + def push(prev: UInt, id: UInt) = UInt(0) + def flush = UInt(0) + def none(s: UInt) = Bool(false) + def one(s: UInt) = Bool(false) + def count(s: UInt) = UInt(nClients) + def next(s: UInt) = UInt(0) + def full(s: UInt) = SInt(-1, width = nClients).toUInt +} + +class FullRepresentation(nClients: Int) extends DirectoryRepresentation(nClients) { + def pop(prev: UInt, id: UInt) = prev & ~UIntToOH(id) + def push(prev: UInt, id: UInt) = prev | UIntToOH(id) + def flush = UInt(0, width = width) + def none(s: UInt) = s === UInt(0) + def one(s: UInt) = PopCount(s) === UInt(1) + def count(s: UInt) = PopCount(s) + def next(s: UInt) = PriorityEncoder(s) + def full(s: UInt) = s +} diff --git a/uncore/src/main/scala/ecc.scala b/uncore/src/main/scala/ecc.scala new file mode 100644 index 00000000..b5864b2c --- /dev/null +++ b/uncore/src/main/scala/ecc.scala @@ -0,0 +1,146 @@ +// See LICENSE for license details. + +package uncore + +import Chisel._ + +abstract class Decoding +{ + def uncorrected: Bits + def corrected: Bits + def correctable: Bool + def uncorrectable: Bool + def error = correctable || uncorrectable +} + +abstract class Code +{ + def width(w0: Int): Int + def encode(x: Bits): Bits + def decode(x: Bits): Decoding +} + +class IdentityCode extends Code +{ + def width(w0: Int) = w0 + def encode(x: Bits) = x + def decode(y: Bits) = new Decoding { + def uncorrected = y + def corrected = y + def correctable = Bool(false) + def uncorrectable = Bool(false) + } +} + +class ParityCode extends Code +{ + def width(w0: Int) = w0+1 + def encode(x: Bits) = Cat(x.xorR, x) + def decode(y: Bits) = new Decoding { + def uncorrected = y(y.getWidth-2,0) + def corrected = uncorrected + def correctable = Bool(false) + def uncorrectable = y.xorR + } +} + +class SECCode extends Code +{ + def width(k: Int) = { + val m = new Unsigned(k).log2 + 1 + k + m + (if((1 << m) < m+k+1) 1 else 0) + } + def encode(x: Bits) = { + val k = x.getWidth + require(k > 0) + val n = width(k) + + val y = for (i <- 1 to n) yield { + if (isPow2(i)) { + val r = for (j <- 1 to n; if j != i && (j & i) != 0) + yield x(mapping(j)) + r reduce (_^_) + } else + x(mapping(i)) + } + Vec(y).toBits + } + def decode(y: Bits) = new Decoding { + val n = y.getWidth + require(n > 0 && !isPow2(n)) + + val p2 = for (i <- 0 until log2Up(n)) yield 1 << i + val syndrome = p2 map { i => + val r = for (j <- 1 to n; if (j & i) != 0) + yield y(j-1) + r reduce (_^_) + } + val s = Vec(syndrome).toBits + + private def swizzle(z: Bits) = Vec((1 to n).filter(i => !isPow2(i)).map(i => z(i-1))).toBits + def uncorrected = swizzle(y) + def corrected = swizzle(((y.toUInt << UInt(1)) ^ UIntToOH(s)) >> UInt(1)) + def correctable = s.orR + def uncorrectable = Bool(false) + } + private def mapping(i: Int) = i-1-log2Up(i) +} + +class SECDEDCode extends Code +{ + private val sec = new SECCode + private val par = new ParityCode + + def width(k: Int) = sec.width(k)+1 + def encode(x: Bits) = par.encode(sec.encode(x)) + def decode(x: Bits) = new Decoding { + val secdec = sec.decode(x(x.getWidth-2,0)) + val pardec = par.decode(x) + + def uncorrected = secdec.uncorrected + def corrected = secdec.corrected + def correctable = pardec.uncorrectable + def uncorrectable = !pardec.uncorrectable && secdec.correctable + } +} + +object ErrGen +{ + // generate a 1-bit error with approximate probability 2^-f + def apply(width: Int, f: Int): Bits = { + require(width > 0 && f >= 0 && log2Up(width) + f <= 16) + UIntToOH(LFSR16()(log2Up(width)+f-1,0))(width-1,0) + } + def apply(x: Bits, f: Int): Bits = x ^ apply(x.getWidth, f) +} + +class SECDEDTest extends Module +{ + val code = new SECDEDCode + val k = 4 + val n = code.width(k) + + val io = new Bundle { + val original = Bits(OUTPUT, k) + val encoded = Bits(OUTPUT, n) + val injected = Bits(OUTPUT, n) + val uncorrected = Bits(OUTPUT, k) + val corrected = Bits(OUTPUT, k) + val correctable = Bool(OUTPUT) + val uncorrectable = Bool(OUTPUT) + } + + val c = Counter(Bool(true), 1 << k) + val numErrors = Counter(c._2, 3)._1 + val e = code.encode(c._1) + val i = e ^ Mux(numErrors < UInt(1), UInt(0), ErrGen(n, 1)) ^ Mux(numErrors < UInt(2), UInt(0), ErrGen(n, 1)) + val d = code.decode(i) + + io.original := c._1 + io.encoded := e + io.injected := i + io.uncorrected := d.uncorrected + io.corrected := d.corrected + io.correctable := d.correctable + io.uncorrectable := d.uncorrectable +} diff --git a/uncore/src/main/scala/htif.scala b/uncore/src/main/scala/htif.scala new file mode 100644 index 00000000..8a79cda1 --- /dev/null +++ b/uncore/src/main/scala/htif.scala @@ -0,0 +1,255 @@ +// See LICENSE for license details. + +package uncore + +import Chisel._ +import Node._ +import uncore._ + +case object HTIFWidth extends Field[Int] +case object HTIFNSCR extends Field[Int] +case object HTIFOffsetBits extends Field[Int] +case object HTIFNCores extends Field[Int] + +abstract trait HTIFParameters extends UsesParameters { + val dataBits = params(TLDataBits) + val dataBeats = params(TLDataBeats) + val w = params(HTIFWidth) + val nSCR = params(HTIFNSCR) + val offsetBits = params(HTIFOffsetBits) + val nCores = params(HTIFNCores) +} + +abstract class HTIFBundle extends Bundle with HTIFParameters + +class HostIO extends HTIFBundle +{ + val clk = Bool(OUTPUT) + val clk_edge = Bool(OUTPUT) + val in = Decoupled(Bits(width = w)).flip + val out = Decoupled(Bits(width = w)) + val debug_stats_pcr = Bool(OUTPUT) +} + +class PCRReq extends Bundle +{ + val rw = Bool() + val addr = Bits(width = 12) + val data = Bits(width = 64) +} + +class HTIFIO extends HTIFBundle { + val reset = Bool(INPUT) + val id = UInt(INPUT, log2Up(nCores)) + val pcr_req = Decoupled(new PCRReq).flip + val pcr_rep = Decoupled(Bits(width = 64)) + val ipi_req = Decoupled(Bits(width = log2Up(nCores))) + val ipi_rep = Decoupled(Bool()).flip + val debug_stats_pcr = Bool(OUTPUT) + // wired directly to stats register + // expected to be used to quickly indicate to testbench to do logging b/c in 'interesting' work +} + +class SCRIO extends HTIFBundle { + val rdata = Vec.fill(nSCR){Bits(INPUT, 64)} + val wen = Bool(OUTPUT) + val waddr = UInt(OUTPUT, log2Up(nSCR)) + val wdata = Bits(OUTPUT, 64) +} + +class HTIFModuleIO extends HTIFBundle { + val host = new HostIO + val cpu = Vec.fill(nCores){new HTIFIO}.flip + val mem = new ClientUncachedTileLinkIO + val scr = new SCRIO +} + +class HTIF(pcr_RESET: Int) extends Module with HTIFParameters { + val io = new HTIFModuleIO + + io.host.debug_stats_pcr := io.cpu.map(_.debug_stats_pcr).reduce(_||_) + // system is 'interesting' if any tile is 'interesting' + + val short_request_bits = 64 + val long_request_bits = short_request_bits + dataBits*dataBeats + require(short_request_bits % w == 0) + + val rx_count_w = 13 + log2Up(64) - log2Up(w) // data size field is 12 bits + val rx_count = Reg(init=UInt(0,rx_count_w)) + val rx_shifter = Reg(Bits(width = short_request_bits)) + val rx_shifter_in = Cat(io.host.in.bits, rx_shifter(short_request_bits-1,w)) + val next_cmd = rx_shifter_in(3,0) + val cmd = Reg(Bits()) + val size = Reg(Bits()) + val pos = Reg(Bits()) + val seqno = Reg(Bits()) + val addr = Reg(Bits()) + when (io.host.in.valid && io.host.in.ready) { + rx_shifter := rx_shifter_in + rx_count := rx_count + UInt(1) + when (rx_count === UInt(short_request_bits/w-1)) { + cmd := next_cmd + size := rx_shifter_in(15,4) + pos := rx_shifter_in(15,4+offsetBits-3) + seqno := rx_shifter_in(23,16) + addr := rx_shifter_in(63,24) + } + } + + val rx_word_count = (rx_count >> UInt(log2Up(short_request_bits/w))) + val rx_word_done = io.host.in.valid && rx_count(log2Up(short_request_bits/w)-1,0).andR + val packet_ram_depth = long_request_bits/short_request_bits-1 + val packet_ram = Mem(Bits(width = short_request_bits), packet_ram_depth) + when (rx_word_done && io.host.in.ready) { + packet_ram(rx_word_count(log2Up(packet_ram_depth)-1,0) - UInt(1)) := rx_shifter_in + } + + val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(UInt(), 6) + + val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.getWidth-1, 0) + val pcr_coreid = addr(log2Up(nCores)-1+20+1,20) + val pcr_wdata = packet_ram(0) + + val bad_mem_packet = size(offsetBits-1-3,0).orR || addr(offsetBits-1-3,0).orR + val nack = Mux(cmd === cmd_readmem || cmd === cmd_writemem, bad_mem_packet, + Mux(cmd === cmd_readcr || cmd === cmd_writecr, size != UInt(1), + Bool(true))) + + val tx_count = Reg(init=UInt(0, rx_count_w)) + val tx_subword_count = tx_count(log2Up(short_request_bits/w)-1,0) + val tx_word_count = tx_count(rx_count_w-1, log2Up(short_request_bits/w)) + val packet_ram_raddr = tx_word_count(log2Up(packet_ram_depth)-1,0) - UInt(1) + when (io.host.out.valid && io.host.out.ready) { + tx_count := tx_count + UInt(1) + } + + val rx_done = rx_word_done && Mux(rx_word_count === UInt(0), next_cmd != cmd_writemem && next_cmd != cmd_writecr, rx_word_count === size || rx_word_count(log2Up(packet_ram_depth)-1,0) === UInt(0)) + val tx_size = Mux(!nack && (cmd === cmd_readmem || cmd === cmd_readcr || cmd === cmd_writecr), size, UInt(0)) + val tx_done = io.host.out.ready && tx_subword_count.andR && (tx_word_count === tx_size || tx_word_count > UInt(0) && packet_ram_raddr.andR) + + val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_rreq :: state_mem_wreq :: state_mem_rresp :: state_mem_wresp :: state_tx :: Nil = Enum(UInt(), 8) + val state = Reg(init=state_rx) + + val (cnt, cnt_done) = Counter((state === state_mem_wreq && io.mem.acquire.ready) || + (state === state_mem_rresp && io.mem.grant.valid), dataBeats) + val rx_cmd = Mux(rx_word_count === UInt(0), next_cmd, cmd) + when (state === state_rx && rx_done) { + state := Mux(rx_cmd === cmd_readmem, state_mem_rreq, + Mux(rx_cmd === cmd_writemem, state_mem_wreq, + Mux(rx_cmd === cmd_readcr || rx_cmd === cmd_writecr, state_pcr_req, + state_tx))) + } + when (state === state_mem_wreq) { + when (cnt_done) { state := state_mem_wresp } + } + when (state === state_mem_rreq) { + when(io.mem.acquire.ready) { state := state_mem_rresp } + } + when (state === state_mem_wresp && io.mem.grant.valid) { + state := Mux(cmd === cmd_readmem || pos === UInt(1), state_tx, state_rx) + pos := pos - UInt(1) + addr := addr + UInt(1 << offsetBits-3) + } + when (state === state_mem_rresp && cnt_done) { + state := Mux(cmd === cmd_readmem || pos === UInt(1), state_tx, state_rx) + pos := pos - UInt(1) + addr := addr + UInt(1 << offsetBits-3) + } + when (state === state_tx && tx_done) { + when (tx_word_count === tx_size) { + rx_count := UInt(0) + tx_count := UInt(0) + } + state := Mux(cmd === cmd_readmem && pos != UInt(0), state_mem_rreq, state_rx) + } + + val n = dataBits/short_request_bits + val mem_req_data = (0 until n).map { i => + val ui = UInt(i, log2Up(n)) + when (state === state_mem_rresp && io.mem.grant.valid) { + packet_ram(Cat(io.mem.grant.bits.addr_beat, ui)) := + io.mem.grant.bits.data((i+1)*short_request_bits-1, i*short_request_bits) + } + packet_ram(Cat(cnt, ui)) + }.reverse.reduce(_##_) + + val init_addr = addr.toUInt >> UInt(offsetBits-3) + io.mem.acquire.valid := state === state_mem_rreq || state === state_mem_wreq + io.mem.acquire.bits := Mux(cmd === cmd_writemem, + PutBlock( + addr_block = init_addr, + addr_beat = cnt, + client_xact_id = UInt(0), + data = mem_req_data), + GetBlock(addr_block = init_addr)) + io.mem.grant.ready := Bool(true) + + val pcrReadData = Reg(Bits(width = io.cpu(0).pcr_rep.bits.getWidth)) + for (i <- 0 until nCores) { + val my_reset = Reg(init=Bool(true)) + val my_ipi = Reg(init=Bool(false)) + + val cpu = io.cpu(i) + val me = pcr_coreid === UInt(i) + cpu.pcr_req.valid := state === state_pcr_req && me && pcr_addr != UInt(pcr_RESET) + cpu.pcr_req.bits.rw := cmd === cmd_writecr + cpu.pcr_req.bits.addr := pcr_addr + cpu.pcr_req.bits.data := pcr_wdata + cpu.reset := my_reset + + when (cpu.ipi_rep.ready) { + my_ipi := Bool(false) + } + cpu.ipi_rep.valid := my_ipi + cpu.ipi_req.ready := Bool(true) + for (j <- 0 until nCores) { + when (io.cpu(j).ipi_req.valid && io.cpu(j).ipi_req.bits === UInt(i)) { + my_ipi := Bool(true) + } + } + + when (cpu.pcr_req.valid && cpu.pcr_req.ready) { + state := state_pcr_resp + } + when (state === state_pcr_req && me && pcr_addr === UInt(pcr_RESET)) { + when (cmd === cmd_writecr) { + my_reset := pcr_wdata(0) + } + pcrReadData := my_reset.toBits + state := state_tx + } + + cpu.pcr_rep.ready := Bool(true) + when (cpu.pcr_rep.valid) { + pcrReadData := cpu.pcr_rep.bits + state := state_tx + } + } + + val scr_addr = addr(log2Up(nSCR)-1, 0) + val scr_rdata = Vec.fill(io.scr.rdata.size){Bits(width = 64)} + for (i <- 0 until scr_rdata.size) + scr_rdata(i) := io.scr.rdata(i) + scr_rdata(0) := UInt(nCores) + scr_rdata(1) := UInt((BigInt(dataBits*dataBeats/8) << params(TLBlockAddrBits)) >> 20) + + io.scr.wen := Bool(false) + io.scr.wdata := pcr_wdata + io.scr.waddr := scr_addr.toUInt + when (state === state_pcr_req && pcr_coreid === SInt(-1)) { + io.scr.wen := cmd === cmd_writecr + pcrReadData := scr_rdata(scr_addr) + state := state_tx + } + + val tx_cmd = Mux(nack, cmd_nack, cmd_ack) + val tx_cmd_ext = Cat(Bits(0, 4-tx_cmd.getWidth), tx_cmd) + val tx_header = Cat(addr, seqno, tx_size, tx_cmd_ext) + val tx_data = Mux(tx_word_count === UInt(0), tx_header, + Mux(cmd === cmd_readcr || cmd === cmd_writecr, pcrReadData, + packet_ram(packet_ram_raddr))) + + io.host.in.ready := state === state_rx + io.host.out.valid := state === state_tx + io.host.out.bits := tx_data >> Cat(tx_count(log2Up(short_request_bits/w)-1,0), Bits(0, log2Up(w))) +} diff --git a/uncore/src/main/scala/memserdes.scala b/uncore/src/main/scala/memserdes.scala new file mode 100644 index 00000000..7497e2e4 --- /dev/null +++ b/uncore/src/main/scala/memserdes.scala @@ -0,0 +1,584 @@ +// See LICENSE for license details. + +package uncore +import Chisel._ +import scala.math._ + +case object PAddrBits extends Field[Int] +case object VAddrBits extends Field[Int] +case object PgIdxBits extends Field[Int] +case object PgLevels extends Field[Int] +case object PgLevelBits extends Field[Int] +case object ASIdBits extends Field[Int] +case object PPNBits extends Field[Int] +case object VPNBits extends Field[Int] + +case object MIFAddrBits extends Field[Int] +case object MIFDataBits extends Field[Int] +case object MIFTagBits extends Field[Int] +case object MIFDataBeats extends Field[Int] + +trait MIFParameters extends UsesParameters { + val mifTagBits = params(MIFTagBits) + val mifAddrBits = params(MIFAddrBits) + val mifDataBits = params(MIFDataBits) + val mifDataBeats = params(MIFDataBeats) +} + +abstract class MIFBundle extends Bundle with MIFParameters +abstract class MIFModule extends Module with MIFParameters + +trait HasMemData extends MIFBundle { + val data = Bits(width = mifDataBits) +} + +trait HasMemAddr extends MIFBundle { + val addr = UInt(width = mifAddrBits) +} + +trait HasMemTag extends MIFBundle { + val tag = UInt(width = mifTagBits) +} + +class MemReqCmd extends HasMemAddr with HasMemTag { + val rw = Bool() +} + +class MemTag extends HasMemTag +class MemData extends HasMemData +class MemResp extends HasMemData with HasMemTag + +class MemIO extends Bundle { + val req_cmd = Decoupled(new MemReqCmd) + val req_data = Decoupled(new MemData) + val resp = Decoupled(new MemResp).flip +} + +class MemPipeIO extends Bundle { + val req_cmd = Decoupled(new MemReqCmd) + val req_data = Decoupled(new MemData) + val resp = Valid(new MemResp).flip +} + +class MemSerializedIO(w: Int) extends Bundle +{ + val req = Decoupled(Bits(width = w)) + val resp = Valid(Bits(width = w)).flip +} + +class MemSerdes(w: Int) extends MIFModule +{ + val io = new Bundle { + val wide = new MemIO().flip + val narrow = new MemSerializedIO(w) + } + val abits = io.wide.req_cmd.bits.toBits.getWidth + val dbits = io.wide.req_data.bits.toBits.getWidth + val rbits = io.wide.resp.bits.getWidth + + val out_buf = Reg(Bits()) + val in_buf = Reg(Bits()) + + val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(UInt(), 5) + val state = Reg(init=s_idle) + val send_cnt = Reg(init=UInt(0, log2Up((max(abits, dbits)+w-1)/w))) + val data_send_cnt = Reg(init=UInt(0, log2Up(mifDataBeats))) + val adone = io.narrow.req.ready && send_cnt === UInt((abits-1)/w) + val ddone = io.narrow.req.ready && send_cnt === UInt((dbits-1)/w) + + when (io.narrow.req.valid && io.narrow.req.ready) { + send_cnt := send_cnt + UInt(1) + out_buf := out_buf >> UInt(w) + } + when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) { + out_buf := io.wide.req_cmd.bits.toBits + } + when (io.wide.req_data.valid && io.wide.req_data.ready) { + out_buf := io.wide.req_data.bits.toBits + } + + io.wide.req_cmd.ready := state === s_idle + io.wide.req_data.ready := state === s_write_idle + io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data + io.narrow.req.bits := out_buf + + when (state === s_idle && io.wide.req_cmd.valid) { + state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr) + } + when (state === s_read_addr && adone) { + state := s_idle + send_cnt := UInt(0) + } + when (state === s_write_addr && adone) { + state := s_write_idle + send_cnt := UInt(0) + } + when (state === s_write_idle && io.wide.req_data.valid) { + state := s_write_data + } + when (state === s_write_data && ddone) { + data_send_cnt := data_send_cnt + UInt(1) + state := Mux(data_send_cnt === UInt(mifDataBeats-1), s_idle, s_write_idle) + send_cnt := UInt(0) + } + + val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w))) + val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats))) + val resp_val = Reg(init=Bool(false)) + + resp_val := Bool(false) + when (io.narrow.resp.valid) { + recv_cnt := recv_cnt + UInt(1) + when (recv_cnt === UInt((rbits-1)/w)) { + recv_cnt := UInt(0) + data_recv_cnt := data_recv_cnt + UInt(1) + resp_val := Bool(true) + } + in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+w-1)/w*w-1,w)) + } + + io.wide.resp.valid := resp_val + io.wide.resp.bits := io.wide.resp.bits.fromBits(in_buf) +} + +class MemDesserIO(w: Int) extends Bundle { + val narrow = new MemSerializedIO(w).flip + val wide = new MemIO +} + +class MemDesser(w: Int) extends Module // test rig side +{ + val io = new MemDesserIO(w) + val abits = io.wide.req_cmd.bits.toBits.getWidth + val dbits = io.wide.req_data.bits.toBits.getWidth + val rbits = io.wide.resp.bits.getWidth + val mifDataBeats = params(MIFDataBeats) + + require(dbits >= abits && rbits >= dbits) + val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w))) + val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats))) + val adone = io.narrow.req.valid && recv_cnt === UInt((abits-1)/w) + val ddone = io.narrow.req.valid && recv_cnt === UInt((dbits-1)/w) + val rdone = io.narrow.resp.valid && recv_cnt === UInt((rbits-1)/w) + + val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(UInt(), 5) + val state = Reg(init=s_cmd_recv) + + val in_buf = Reg(Bits()) + when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) { + recv_cnt := recv_cnt + UInt(1) + in_buf := Cat(io.narrow.req.bits, in_buf((rbits+w-1)/w*w-1,w)) + } + io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv + + when (state === s_cmd_recv && adone) { + state := s_cmd + recv_cnt := UInt(0) + } + when (state === s_cmd && io.wide.req_cmd.ready) { + state := Mux(io.wide.req_cmd.bits.rw, s_data_recv, s_reply) + } + when (state === s_data_recv && ddone) { + state := s_data + recv_cnt := UInt(0) + } + when (state === s_data && io.wide.req_data.ready) { + state := s_data_recv + when (data_recv_cnt === UInt(mifDataBeats-1)) { + state := s_cmd_recv + } + data_recv_cnt := data_recv_cnt + UInt(1) + } + when (rdone) { // state === s_reply + when (data_recv_cnt === UInt(mifDataBeats-1)) { + state := s_cmd_recv + } + recv_cnt := UInt(0) + data_recv_cnt := data_recv_cnt + UInt(1) + } + + val req_cmd = in_buf >> UInt(((rbits+w-1)/w - (abits+w-1)/w)*w) + io.wide.req_cmd.valid := state === s_cmd + io.wide.req_cmd.bits := io.wide.req_cmd.bits.fromBits(req_cmd) + + io.wide.req_data.valid := state === s_data + io.wide.req_data.bits.data := in_buf >> UInt(((rbits+w-1)/w - (dbits+w-1)/w)*w) + + val dataq = Module(new Queue(new MemResp, mifDataBeats)) + dataq.io.enq <> io.wide.resp + dataq.io.deq.ready := recv_cnt === UInt((rbits-1)/w) + + io.narrow.resp.valid := dataq.io.deq.valid + io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UInt(w)) +} + +//Adapter betweewn an UncachedTileLinkIO and a mem controller MemIO +class MemIOTileLinkIOConverter(qDepth: Int) extends TLModule with MIFParameters { + val io = new Bundle { + val tl = new ManagerTileLinkIO + val mem = new MemIO + } + val dataBits = tlDataBits*tlDataBeats + val dstIdBits = params(LNHeaderBits) + require(tlDataBits*tlDataBeats == mifDataBits*mifDataBeats, "Data sizes between LLC and MC don't agree") + require(dstIdBits + tlClientXactIdBits < mifTagBits, "MemIO converter is going truncate tags: " + dstIdBits + " + " + tlClientXactIdBits + " >= " + mifTagBits) + + io.tl.acquire.ready := Bool(false) + io.tl.probe.valid := Bool(false) + io.tl.release.ready := Bool(false) + io.tl.finish.ready := Bool(true) + io.mem.resp.ready := Bool(false) + + val gnt_arb = Module(new Arbiter(new GrantToDst, 2)) + io.tl.grant <> gnt_arb.io.out + + val dst_off = dstIdBits + tlClientXactIdBits + val acq_has_data = io.tl.acquire.bits.hasData() + val rel_has_data = io.tl.release.bits.hasData() + + // Decompose outgoing TL Acquires into MemIO cmd and data + val active_out = Reg(init=Bool(false)) + val cmd_sent_out = Reg(init=Bool(false)) + val tag_out = Reg(UInt(width = mifTagBits)) + val addr_out = Reg(UInt(width = mifAddrBits)) + val has_data = Reg(init=Bool(false)) + val data_from_rel = Reg(init=Bool(false)) + val (tl_cnt_out, tl_wrap_out) = + Counter((io.tl.acquire.fire() && acq_has_data) || + (io.tl.release.fire() && rel_has_data), tlDataBeats) + val tl_done_out = Reg(init=Bool(false)) + val make_grant_ack = Reg(init=Bool(false)) + + gnt_arb.io.in(1).valid := Bool(false) + gnt_arb.io.in(1).bits := Grant( + dst = (if(dstIdBits > 0) tag_out(dst_off, tlClientXactIdBits + 1) else UInt(0)), + is_builtin_type = Bool(true), + g_type = Mux(data_from_rel, Grant.voluntaryAckType, Grant.putAckType), + client_xact_id = tag_out >> UInt(1), + manager_xact_id = UInt(0)) + + if(tlDataBits != mifDataBits || tlDataBeats != mifDataBeats) { + val mem_cmd_q = Module(new Queue(new MemReqCmd, qDepth)) + val mem_data_q = Module(new Queue(new MemData, qDepth)) + mem_cmd_q.io.enq.valid := Bool(false) + mem_data_q.io.enq.valid := Bool(false) + val (mif_cnt_out, mif_wrap_out) = Counter(mem_data_q.io.enq.fire(), mifDataBeats) + val mif_done_out = Reg(init=Bool(false)) + val tl_buf_out = Vec.fill(tlDataBeats){ Reg(io.tl.acquire.bits.data.clone) } + val mif_buf_out = Vec.fill(mifDataBeats){ new MemData } + mif_buf_out := mif_buf_out.fromBits(tl_buf_out.toBits) + val mif_prog_out = (mif_cnt_out+UInt(1, width = log2Up(mifDataBeats+1)))*UInt(mifDataBits) + val tl_prog_out = tl_cnt_out*UInt(tlDataBits) + + when(!active_out){ + io.tl.release.ready := Bool(true) + io.tl.acquire.ready := !io.tl.release.valid + when(io.tl.release.valid) { + active_out := Bool(true) + cmd_sent_out := Bool(false) + tag_out := Cat(io.tl.release.bits.client_id, + io.tl.release.bits.client_xact_id, + io.tl.release.bits.isVoluntary()) + addr_out := io.tl.release.bits.addr_block + has_data := rel_has_data + data_from_rel := Bool(true) + make_grant_ack := io.tl.release.bits.requiresAck() + tl_done_out := tl_wrap_out + tl_buf_out(tl_cnt_out) := io.tl.release.bits.data + } .elsewhen(io.tl.acquire.valid) { + active_out := Bool(true) + cmd_sent_out := Bool(false) + tag_out := Cat(io.tl.release.bits.client_id, + io.tl.acquire.bits.client_xact_id, + io.tl.acquire.bits.isBuiltInType()) + addr_out := io.tl.acquire.bits.addr_block + has_data := acq_has_data + data_from_rel := Bool(false) + make_grant_ack := acq_has_data + tl_done_out := tl_wrap_out + tl_buf_out(tl_cnt_out) := io.tl.acquire.bits.data + } + } + when(active_out) { + mem_cmd_q.io.enq.valid := !cmd_sent_out + cmd_sent_out := cmd_sent_out || mem_cmd_q.io.enq.fire() + when(has_data) { + when(!tl_done_out) { + io.tl.acquire.ready := Bool(true) + when(io.tl.acquire.valid) { + tl_buf_out(tl_cnt_out) := Mux(data_from_rel, + io.tl.release.bits.data, + io.tl.acquire.bits.data) + } + } + when(!mif_done_out) { + mem_data_q.io.enq.valid := tl_done_out || mif_prog_out <= tl_prog_out + } + } + when(tl_wrap_out) { tl_done_out := Bool(true) } + when(mif_wrap_out) { mif_done_out := Bool(true) } + when(tl_done_out && make_grant_ack) { + gnt_arb.io.in(1).valid := Bool(true) + when(gnt_arb.io.in(1).ready) { make_grant_ack := Bool(false) } + } + when(cmd_sent_out && (!has_data || mif_done_out) && !make_grant_ack) { + active_out := Bool(false) + } + } + + mem_cmd_q.io.enq.bits.rw := has_data + mem_cmd_q.io.enq.bits.tag := tag_out + mem_cmd_q.io.enq.bits.addr := addr_out + mem_data_q.io.enq.bits.data := mif_buf_out(mif_cnt_out).data + io.mem.req_cmd <> mem_cmd_q.io.deq + io.mem.req_data <> mem_data_q.io.deq + } else { // Don't make the data buffers and try to flow cmd and data + io.mem.req_cmd.valid := Bool(false) + io.mem.req_data.valid := Bool(false) + io.mem.req_cmd.bits.rw := has_data + io.mem.req_cmd.bits.tag := tag_out + io.mem.req_cmd.bits.addr := addr_out + io.mem.req_data.bits.data := Mux(data_from_rel, + io.tl.release.bits.data, + io.tl.acquire.bits.data) + when(!active_out){ + io.tl.release.ready := io.mem.req_data.ready + io.tl.acquire.ready := io.mem.req_data.ready && !io.tl.release.valid + io.mem.req_data.valid := (io.tl.release.valid && rel_has_data) || + (io.tl.acquire.valid && acq_has_data) + when(io.mem.req_data.ready && (io.tl.release.valid || io.tl.acquire.valid)) { + active_out := !io.mem.req_cmd.ready || io.mem.req_data.valid + io.mem.req_cmd.valid := Bool(true) + cmd_sent_out := io.mem.req_cmd.ready + tl_done_out := tl_wrap_out + when(io.tl.release.valid) { + data_from_rel := Bool(true) + make_grant_ack := io.tl.release.bits.requiresAck() + io.mem.req_data.bits.data := io.tl.release.bits.data + val tag = Cat(io.tl.release.bits.client_id, + io.tl.release.bits.client_xact_id, + io.tl.release.bits.isVoluntary()) + val addr = io.tl.release.bits.addr_block + io.mem.req_cmd.bits.tag := tag + io.mem.req_cmd.bits.addr := addr + io.mem.req_cmd.bits.rw := rel_has_data + tag_out := tag + addr_out := addr + has_data := rel_has_data + } .elsewhen(io.tl.acquire.valid) { + data_from_rel := Bool(false) + make_grant_ack := acq_has_data // i.e. is it a Put + io.mem.req_data.bits.data := io.tl.acquire.bits.data + io.mem.req_cmd.bits.rw := acq_has_data + val tag = Cat(io.tl.acquire.bits.client_id, + io.tl.acquire.bits.client_xact_id, + io.tl.acquire.bits.isBuiltInType()) + val addr = io.tl.acquire.bits.addr_block + io.mem.req_cmd.bits.tag := tag + io.mem.req_cmd.bits.addr := addr + io.mem.req_cmd.bits.rw := acq_has_data + tag_out := tag + addr_out := addr + has_data := acq_has_data + } + } + } + when(active_out) { + io.mem.req_cmd.valid := !cmd_sent_out + cmd_sent_out := cmd_sent_out || io.mem.req_cmd.fire() + when(has_data && !tl_done_out) { + when(data_from_rel) { + io.tl.release.ready := io.mem.req_data.ready + io.mem.req_data.valid := io.tl.release.valid + } .otherwise { + io.tl.acquire.ready := io.mem.req_data.ready + io.mem.req_data.valid := io.tl.acquire.valid + } + } + when(tl_wrap_out) { tl_done_out := Bool(true) } + when(tl_done_out && make_grant_ack) { + gnt_arb.io.in(1).valid := Bool(true) // TODO: grants for voluntary acks? + when(gnt_arb.io.in(1).ready) { make_grant_ack := Bool(false) } + } + when(cmd_sent_out && (!has_data || tl_done_out) && !make_grant_ack) { + active_out := Bool(false) + } + } + } + + // Aggregate incoming MemIO responses into TL Grants + val active_in = Reg(init=Bool(false)) + val (tl_cnt_in, tl_wrap_in) = Counter(io.tl.grant.fire() && io.tl.grant.bits.hasMultibeatData(), tlDataBeats) + val tag_in = Reg(UInt(width = mifTagBits)) + + if(tlDataBits != mifDataBits || tlDataBeats != mifDataBeats) { + val (mif_cnt_in, mif_wrap_in) = Counter(io.mem.resp.fire(), mifDataBeats) // TODO: Assumes all resps have data + val mif_done_in = Reg(init=Bool(false)) + val mif_buf_in = Vec.fill(mifDataBeats){ Reg(new MemData) } + val tl_buf_in = Vec.fill(tlDataBeats){ io.tl.acquire.bits.data.clone } + tl_buf_in := tl_buf_in.fromBits(mif_buf_in.toBits) + val tl_prog_in = (tl_cnt_in+UInt(1, width = log2Up(tlDataBeats+1)))*UInt(tlDataBits) + val mif_prog_in = mif_cnt_in*UInt(mifDataBits) + gnt_arb.io.in(0).bits := Grant( + dst = (if(dstIdBits > 0) tag_in(dst_off, tlClientXactIdBits + 1) else UInt(0)), + is_builtin_type = tag_in(0), + g_type = Mux(tag_in(0), Grant.getDataBlockType, UInt(0)), // TODO: Assumes MI or MEI protocol + client_xact_id = tag_in >> UInt(1), + manager_xact_id = UInt(0), + addr_beat = tl_cnt_in, + data = tl_buf_in(tl_cnt_in)) + + when(!active_in) { + io.mem.resp.ready := Bool(true) + when(io.mem.resp.valid) { + active_in := Bool(true) + mif_done_in := mif_wrap_in + tag_in := io.mem.resp.bits.tag + mif_buf_in(tl_cnt_in).data := io.mem.resp.bits.data + } + } + when(active_in) { + gnt_arb.io.in(0).valid := mif_done_in || tl_prog_in <= mif_prog_in + when(!mif_done_in) { + io.mem.resp.ready := Bool(true) + when(io.mem.resp.valid) { + mif_buf_in(mif_cnt_in).data := io.mem.resp.bits.data + } + } + when(mif_wrap_in) { mif_done_in := Bool(true) } + when(tl_wrap_in) { active_in := Bool(false) } + } + } else { // Don't generate all the uneeded data buffers and flow resp + gnt_arb.io.in(0).valid := io.mem.resp.valid + io.mem.resp.ready := gnt_arb.io.in(0).ready + gnt_arb.io.in(0).bits := Grant( + dst = (if(dstIdBits > 0) io.mem.resp.bits.tag(dst_off, tlClientXactIdBits + 1) else UInt(0)), + is_builtin_type = io.mem.resp.bits.tag(0), + g_type = Mux(io.mem.resp.bits.tag(0), Grant.getDataBlockType, UInt(0)), // TODO: Assumes MI or MEI protocol + client_xact_id = io.mem.resp.bits.tag >> UInt(1), + manager_xact_id = UInt(0), + addr_beat = tl_cnt_in, + data = io.mem.resp.bits.data) + } +} + +class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Module +{ + val io = new QueueIO(data, entries) + require(entries > 1) + + val do_flow = Bool() + val do_enq = io.enq.fire() && !do_flow + val do_deq = io.deq.fire() && !do_flow + + val maybe_full = Reg(init=Bool(false)) + val enq_ptr = Counter(do_enq, entries)._1 + val (deq_ptr, deq_done) = Counter(do_deq, entries) + when (do_enq != do_deq) { maybe_full := do_enq } + + val ptr_match = enq_ptr === deq_ptr + val empty = ptr_match && !maybe_full + val full = ptr_match && maybe_full + val atLeastTwo = full || enq_ptr - deq_ptr >= UInt(2) + do_flow := empty && io.deq.ready + + val ram = Mem(data, entries, seqRead = true) + val ram_addr = Reg(Bits()) + val ram_out_valid = Reg(Bool()) + ram_out_valid := Bool(false) + when (do_enq) { ram(enq_ptr) := io.enq.bits } + when (io.deq.ready && (atLeastTwo || !io.deq.valid && !empty)) { + ram_out_valid := Bool(true) + ram_addr := Mux(io.deq.valid, Mux(deq_done, UInt(0), deq_ptr + UInt(1)), deq_ptr) + } + + io.deq.valid := Mux(empty, io.enq.valid, ram_out_valid) + io.enq.ready := !full + io.deq.bits := Mux(empty, io.enq.bits, ram(ram_addr)) +} + +class HellaQueue[T <: Data](val entries: Int)(data: => T) extends Module +{ + val io = new QueueIO(data, entries) + + val fq = Module(new HellaFlowQueue(entries)(data)) + io.enq <> fq.io.enq + io.deq <> Queue(fq.io.deq, 1, pipe = true) +} + +object HellaQueue +{ + def apply[T <: Data](enq: DecoupledIO[T], entries: Int) = { + val q = Module((new HellaQueue(entries)) { enq.bits.clone }) + q.io.enq.valid := enq.valid // not using <> so that override is allowed + q.io.enq.bits := enq.bits + enq.ready := q.io.enq.ready + q.io.deq + } +} + +class MemPipeIOMemIOConverter(numRequests: Int) extends MIFModule { + val io = new Bundle { + val cpu = new MemIO().flip + val mem = new MemPipeIO + } + + val numEntries = numRequests * mifDataBeats + val size = log2Down(numEntries) + 1 + + val inc = Bool() + val dec = Bool() + val count = Reg(init=UInt(numEntries, size)) + val watermark = count >= UInt(mifDataBeats) + + when (inc && !dec) { + count := count + UInt(1) + } + when (!inc && dec) { + count := count - UInt(mifDataBeats) + } + when (inc && dec) { + count := count - UInt(mifDataBeats-1) + } + + val cmdq_mask = io.cpu.req_cmd.bits.rw || watermark + + io.mem.req_cmd.valid := io.cpu.req_cmd.valid && cmdq_mask + io.cpu.req_cmd.ready := io.mem.req_cmd.ready && cmdq_mask + io.mem.req_cmd.bits := io.cpu.req_cmd.bits + + io.mem.req_data <> io.cpu.req_data + + // Have separate queues to allow for different mem implementations + val resp_data_q = Module((new HellaQueue(numEntries)) { new MemData }) + resp_data_q.io.enq.valid := io.mem.resp.valid + resp_data_q.io.enq.bits.data := io.mem.resp.bits.data + + val resp_tag_q = Module((new HellaQueue(numEntries)) { new MemTag }) + resp_tag_q.io.enq.valid := io.mem.resp.valid + resp_tag_q.io.enq.bits.tag := io.mem.resp.bits.tag + + io.cpu.resp.valid := resp_data_q.io.deq.valid && resp_tag_q.io.deq.valid + io.cpu.resp.bits.data := resp_data_q.io.deq.bits.data + io.cpu.resp.bits.tag := resp_tag_q.io.deq.bits.tag + resp_data_q.io.deq.ready := io.cpu.resp.ready + resp_tag_q.io.deq.ready := io.cpu.resp.ready + + inc := resp_data_q.io.deq.fire() && resp_tag_q.io.deq.fire() + dec := io.mem.req_cmd.fire() && !io.mem.req_cmd.bits.rw +} + +class MemPipeIOTileLinkIOConverter(outstanding: Int) extends MIFModule { + val io = new Bundle { + val tl = new ManagerTileLinkIO + val mem = new MemPipeIO + } + + val a = Module(new MemIOTileLinkIOConverter(1)) + val b = Module(new MemPipeIOMemIOConverter(outstanding)) + a.io.tl <> io.tl + b.io.cpu.req_cmd <> Queue(a.io.mem.req_cmd, 2, pipe=true) + b.io.cpu.req_data <> Queue(a.io.mem.req_data, mifDataBeats, pipe=true) + a.io.mem.resp <> b.io.cpu.resp + b.io.mem <> io.mem +} diff --git a/uncore/src/main/scala/metadata.scala b/uncore/src/main/scala/metadata.scala new file mode 100644 index 00000000..b8d4446c --- /dev/null +++ b/uncore/src/main/scala/metadata.scala @@ -0,0 +1,315 @@ +// See LICENSE for license details. + +package uncore +import Chisel._ + +/** Base class to represent coherence information in clients and managers */ +abstract class CoherenceMetadata extends Bundle { + val co = params(TLCoherencePolicy) + val id = params(TLId) +} + +/** Stores the client-side coherence information, + * such as permissions on the data and whether the data is dirty. + * Its API can be used to make TileLink messages in response to + * memory operations or [[uncore.Probe]] messages. + */ +class ClientMetadata extends CoherenceMetadata { + /** Actual state information stored in this bundle */ + val state = UInt(width = co.clientStateWidth) + + /** Metadata equality */ + def ===(rhs: ClientMetadata): Bool = this.state === rhs.state + def !=(rhs: ClientMetadata): Bool = !this.===(rhs) + + /** Is the block's data present in this cache */ + def isValid(dummy: Int = 0): Bool = co.isValid(this) + /** Does this cache have permissions on this block sufficient to perform op */ + def isHit(op_code: UInt): Bool = co.isHit(op_code, this) + /** Does this cache lack permissions on this block sufficient to perform op */ + def isMiss(op_code: UInt): Bool = !co.isHit(op_code, this) + /** Does a secondary miss on the block require another Acquire message */ + def requiresAcquireOnSecondaryMiss(first_op: UInt, second_op: UInt): Bool = + co.requiresAcquireOnSecondaryMiss(first_op, second_op, this) + /** Does op require a Release to be made to outer memory */ + def requiresReleaseOnCacheControl(op_code: UInt): Bool = + co.requiresReleaseOnCacheControl(op_code: UInt, this) + /** Does an eviction require a Release to be made to outer memory */ + def requiresVoluntaryWriteback(dummy: Int = 0): Bool = + co.requiresReleaseOnCacheControl(M_FLUSH, this) + + /** Constructs an Acquire message based on this metdata and a memory operation + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]] + */ + def makeAcquire( + client_xact_id: UInt, + addr_block: UInt, + op_code: UInt): Acquire = { + Bundle(Acquire( + is_builtin_type = Bool(false), + a_type = co.getAcquireType(op_code, this), + client_xact_id = client_xact_id, + addr_block = addr_block, + union = Cat(op_code, Bool(true))), + { case TLId => id }) + } + + /** Constructs a Release message based on this metadata on an eviction + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (which beat) + * @param data data being written back + */ + def makeVoluntaryWriteback( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)): Release = { + Bundle(Release( + voluntary = Bool(true), + r_type = co.getReleaseType(M_FLUSH, this), + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + data = data), { case TLId => id }) + } + + /** Constructs a Release message based on this metadata and a [[uncore.Probe]] + * + * @param the incoming [[uncore.Probe]] + * @param addr_beat sub-block address (which beat) + * @param data data being released + */ + def makeRelease( + prb: Probe, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)): Release = { + Bundle(Release( + voluntary = Bool(false), + r_type = co.getReleaseType(prb, this), + client_xact_id = UInt(0), + addr_block = prb.addr_block, + addr_beat = addr_beat, + data = data), { case TLId => id }) + } + + /** New metadata after receiving a [[uncore.Grant]] + * + * @param incoming the incoming [[uncore.Grant]] + * @param pending the mem op that triggered this transaction + */ + def onGrant(incoming: Grant, pending: UInt): ClientMetadata = + Bundle(co.clientMetadataOnGrant(incoming, pending, this), { case TLId => id }) + + /** New metadata after receiving a [[uncore.Probe]] + * + * @param incoming the incoming [[uncore.Probe]] + */ + def onProbe(incoming: Probe): ClientMetadata = + Bundle(co.clientMetadataOnProbe(incoming, this), { case TLId => id }) + + /** New metadata after a op_code hits this block + * + * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]] + */ + def onHit(op_code: UInt): ClientMetadata = + Bundle(co.clientMetadataOnHit(op_code, this), { case TLId => id }) + + /** New metadata after receiving a [[uncore.Probe]] + * + * @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]] + */ + def onCacheControl(op_code: UInt): ClientMetadata = + Bundle(co.clientMetadataOnCacheControl(op_code, this), { case TLId => id }) +} + +/** Factories for ClientMetadata, including on reset */ +object ClientMetadata { + def apply(state: UInt) = { + val meta = new ClientMetadata + meta.state := state + meta + } + def onReset = new ClientMetadata().co.clientMetadataOnReset +} + +/** Stores manager-side information about the status + * of a cache block, including whether it has any known sharers. + * + * Its API can be used to create [[uncore.Probe]] and [[uncore.Grant]] messages. + */ +class ManagerMetadata extends CoherenceMetadata { + // Currently no coherence policies assume manager-side state information + // val state = UInt(width = co.masterStateWidth) TODO: Fix 0-width wires in Chisel + + /** The directory information for this block */ + val sharers = UInt(width = co.dir.width) + + /** Metadata equality */ + def ===(rhs: ManagerMetadata): Bool = //this.state === rhs.state && TODO: Fix 0-width wires in Chisel + this.sharers === rhs.sharers + def !=(rhs: ManagerMetadata): Bool = !this.===(rhs) + + /** Converts the directory info into an N-hot sharer bitvector (i.e. full representation) */ + def full(dummy: Int = 0): UInt = co.dir.full(this.sharers) + + /** Does this [[uncore.Acquire]] require [[uncore.Probe Probes]] to be sent */ + def requiresProbes(acq: Acquire): Bool = co.requiresProbes(acq, this) + /** Does this memory op require [[uncore.Probe Probes]] to be sent */ + def requiresProbes(op_code: UInt): Bool = co.requiresProbes(op_code, this) + /** Does an eviction require [[uncore.Probe Probes]] to be sent */ + def requiresProbesOnVoluntaryWriteback(dummy: Int = 0): Bool = + co.requiresProbes(M_FLUSH, this) + + /** Construct an appropriate [[uncore.ProbeToDst]] for a given [[uncore.Acquire]] + * + * @param dst Destination client id for this Probe + * @param acq Acquire message triggering this Probe + */ + def makeProbe(dst: UInt, acq: Acquire): ProbeToDst = + Bundle(Probe(dst, co.getProbeType(acq, this), acq.addr_block), { case TLId => id }) + + /** Construct an appropriate [[uncore.ProbeToDst]] for a given mem op + * + * @param dst Destination client id for this Probe + * @param op_code memory operation triggering this Probe + * @param addr_block address of the cache block being probed + */ + def makeProbe(dst: UInt, op_code: UInt, addr_block: UInt): ProbeToDst = + Bundle(Probe(dst, co.getProbeType(op_code, this), addr_block), { case TLId => id }) + + /** Construct an appropriate [[uncore.ProbeToDst]] for an eviction + * + * @param dst Destination client id for this Probe + * @param addr_block address of the cache block being probed prior to eviction + */ + def makeProbeForVoluntaryWriteback(dst: UInt, addr_block: UInt): ProbeToDst = + makeProbe(dst, M_FLUSH, addr_block) + + /** Construct an appropriate [[uncore.GrantToDst]] to acknowledge an [[uncore.Release]] + * + * @param rel Release message being acknowledged by this Grant + * @param manager_xact_id manager's transaction id + */ + def makeGrant(rel: ReleaseFromSrc, manager_xact_id: UInt): GrantToDst = { + Bundle(Grant( + dst = rel.client_id, + is_builtin_type = Bool(true), + g_type = Grant.voluntaryAckType, + client_xact_id = rel.client_xact_id, + manager_xact_id = manager_xact_id), { case TLId => id }) + } + + /** Construct an appropriate [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]] + * + * May contain single or multiple beats of data, or just be a permissions upgrade. + * + * @param acq Acquire message being responded to by this Grant + * @param manager_xact_id manager's transaction id + * @param addr_beat beat id of the data + * @param data data being refilled to the original requestor + */ + def makeGrant( + acq: AcquireFromSrc, + manager_xact_id: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)): GrantToDst = { + Bundle(Grant( + dst = acq.client_id, + is_builtin_type = acq.isBuiltInType(), + g_type = Mux(acq.isBuiltInType(), + acq.getBuiltInGrantType(), + co.getGrantType(acq, this)), + client_xact_id = acq.client_xact_id, + manager_xact_id = manager_xact_id, + addr_beat = addr_beat, + data = data), { case TLId => id }) + } + + /** Construct an [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]] with some overrides + * + * Used to respond to secondary misses merged into this transaction. + * May contain single or multiple beats of data. + * + * @param pri Primary miss's Acquire message, used to get g_type and dst + * @param sec Secondary miss info, used to get beat and client_xact_id + * @param manager_xact_id manager's transaction id + * @param data data being refilled to the original requestor + */ + def makeGrant( + pri: AcquireFromSrc, + sec: SecondaryMissInfo, + manager_xact_id: UInt, + data: UInt): GrantToDst = { + val g = makeGrant(pri, manager_xact_id, sec.addr_beat, data) + g.client_xact_id := sec.client_xact_id + g + } + + /** New metadata after receiving a [[uncore.ReleaseFromSrc]] + * + * @param incoming the incoming [[uncore.ReleaseFromSrc]] + */ + def onRelease(incoming: ReleaseFromSrc): ManagerMetadata = + Bundle(co.managerMetadataOnRelease(incoming, incoming.client_id, this), { case TLId => id }) + + /** New metadata after sending a [[uncore.GrantToDst]] + * + * @param outgoing the outgoing [[uncore.GrantToDst]] + */ + def onGrant(outgoing: GrantToDst): ManagerMetadata = + Bundle(co.managerMetadataOnGrant(outgoing, outgoing.client_id, this), { case TLId => id }) +} + +/** Factories for ManagerMetadata, including on reset */ +object ManagerMetadata { + def apply(sharers: UInt, state: UInt = UInt(width = 0)) = { + val meta = new ManagerMetadata + //meta.state := state TODO: Fix 0-width wires in Chisel + meta.sharers := sharers + meta + } + def apply() = { + val meta = new ManagerMetadata + //meta.state := UInt(width = 0) TODO: Fix 0-width wires in Chisel + meta.sharers := meta.co.dir.flush + meta + } + def onReset = new ManagerMetadata().co.managerMetadataOnReset +} + +/** HierarchicalMetadata is used in a cache in a multi-level memory hierarchy + * that is a manager with respect to some inner caches and a client with + * respect to some outer cache. + * + * This class makes use of two different sets of TileLink parameters, which are + * applied by contextually mapping [[uncore.TLId]] to one of + * [[uncore.InnerTLId]] or [[uncore.OuterTLId]]. + */ +class HierarchicalMetadata extends CoherenceMetadata { + val inner: ManagerMetadata = Bundle(new ManagerMetadata, {case TLId => params(InnerTLId)}) + val outer: ClientMetadata = Bundle(new ClientMetadata, {case TLId => params(OuterTLId)}) + def ===(rhs: HierarchicalMetadata): Bool = + this.inner === rhs.inner && this.outer === rhs.outer + def !=(rhs: HierarchicalMetadata): Bool = !this.===(rhs) +} + +/** Factories for HierarchicalMetadata, including on reset */ +object HierarchicalMetadata { + def apply(inner: ManagerMetadata, outer: ClientMetadata): HierarchicalMetadata = { + val m = new HierarchicalMetadata + m.inner := inner + m.outer := outer + m + } + def onReset: HierarchicalMetadata = apply(ManagerMetadata.onReset, ClientMetadata.onReset) +} + +/** Identifies the TLId of the inner network in a hierarchical cache controller */ +case object InnerTLId extends Field[String] +/** Identifies the TLId of the outer network in a hierarchical cache controller */ +case object OuterTLId extends Field[String] diff --git a/uncore/src/main/scala/network.scala b/uncore/src/main/scala/network.scala new file mode 100644 index 00000000..4b00091d --- /dev/null +++ b/uncore/src/main/scala/network.scala @@ -0,0 +1,104 @@ +// See LICENSE for license details. + +package uncore +import Chisel._ + +case object LNEndpoints extends Field[Int] +case object LNHeaderBits extends Field[Int] + +class PhysicalHeader(n: Int) extends Bundle { + val src = UInt(width = log2Up(n)) + val dst = UInt(width = log2Up(n)) +} + +class PhysicalNetworkIO[T <: Data](n: Int, dType: T) extends Bundle { + val header = new PhysicalHeader(n) + val payload = dType.clone + override def clone = new PhysicalNetworkIO(n,dType).asInstanceOf[this.type] +} + +class BasicCrossbarIO[T <: Data](n: Int, dType: T) extends Bundle { + val in = Vec.fill(n){Decoupled(new PhysicalNetworkIO(n,dType))}.flip + val out = Vec.fill(n){Decoupled(new PhysicalNetworkIO(n,dType))} +} + +abstract class PhysicalNetwork extends Module + +class BasicCrossbar[T <: Data](n: Int, dType: T, count: Int = 1, needsLock: Option[PhysicalNetworkIO[T] => Bool] = None) extends PhysicalNetwork { + val io = new BasicCrossbarIO(n, dType) + + val rdyVecs = List.fill(n){Vec.fill(n)(Bool())} + + io.out.zip(rdyVecs).zipWithIndex.map{ case ((out, rdys), i) => { + val rrarb = Module(new LockingRRArbiter(io.in(0).bits, n, count, needsLock)) + (rrarb.io.in, io.in, rdys).zipped.map{ case (arb, in, rdy) => { + arb.valid := in.valid && (in.bits.header.dst === UInt(i)) + arb.bits := in.bits + rdy := arb.ready && (in.bits.header.dst === UInt(i)) + }} + out <> rrarb.io.out + }} + for(i <- 0 until n) { + io.in(i).ready := rdyVecs.map(r => r(i)).reduceLeft(_||_) + } +} + +abstract class LogicalNetwork extends Module + +class LogicalHeader extends Bundle { + val src = UInt(width = params(LNHeaderBits)) + val dst = UInt(width = params(LNHeaderBits)) +} + +class LogicalNetworkIO[T <: Data](dType: T) extends Bundle { + val header = new LogicalHeader + val payload = dType.clone + override def clone = { new LogicalNetworkIO(dType).asInstanceOf[this.type] } +} + +object DecoupledLogicalNetworkIOWrapper { + def apply[T <: Data]( + in: DecoupledIO[T], + src: UInt = UInt(0), + dst: UInt = UInt(0)): DecoupledIO[LogicalNetworkIO[T]] = { + val out = Decoupled(new LogicalNetworkIO(in.bits.clone)).asDirectionless + out.valid := in.valid + out.bits.payload := in.bits + out.bits.header.dst := dst + out.bits.header.src := src + in.ready := out.ready + out + } +} + +object DecoupledLogicalNetworkIOUnwrapper { + def apply[T <: Data](in: DecoupledIO[LogicalNetworkIO[T]]): DecoupledIO[T] = { + val out = Decoupled(in.bits.payload.clone).asDirectionless + out.valid := in.valid + out.bits := in.bits.payload + in.ready := out.ready + out + } +} + +object DefaultFromPhysicalShim { + def apply[T <: Data](in: DecoupledIO[PhysicalNetworkIO[T]]): DecoupledIO[LogicalNetworkIO[T]] = { + val out = Decoupled(new LogicalNetworkIO(in.bits.payload)).asDirectionless + out.bits.header := in.bits.header + out.bits.payload := in.bits.payload + out.valid := in.valid + in.ready := out.ready + out + } +} + +object DefaultToPhysicalShim { + def apply[T <: Data](n: Int, in: DecoupledIO[LogicalNetworkIO[T]]): DecoupledIO[PhysicalNetworkIO[T]] = { + val out = Decoupled(new PhysicalNetworkIO(n, in.bits.payload)).asDirectionless + out.bits.header := in.bits.header + out.bits.payload := in.bits.payload + out.valid := in.valid + in.ready := out.ready + out + } +} diff --git a/uncore/src/main/scala/package.scala b/uncore/src/main/scala/package.scala new file mode 100644 index 00000000..2c6c4a5f --- /dev/null +++ b/uncore/src/main/scala/package.scala @@ -0,0 +1,6 @@ +// See LICENSE for license details. + +package object uncore extends uncore.constants.MemoryOpConstants +{ + implicit def toOption[A](a: A) = Option(a) +} diff --git a/uncore/src/main/scala/slowio.scala b/uncore/src/main/scala/slowio.scala new file mode 100644 index 00000000..95ca34e6 --- /dev/null +++ b/uncore/src/main/scala/slowio.scala @@ -0,0 +1,70 @@ +// See LICENSE for license details. + +package uncore +import Chisel._ + +class SlowIO[T <: Data](val divisor_max: Int)(data: => T) extends Module +{ + val io = new Bundle { + val out_fast = Decoupled(data).flip + val out_slow = Decoupled(data) + val in_fast = Decoupled(data) + val in_slow = Decoupled(data).flip + val clk_slow = Bool(OUTPUT) + val set_divisor = Valid(Bits(width = 32)).flip + val divisor = Bits(OUTPUT, 32) + } + + require(divisor_max >= 8 && divisor_max <= 65536 && isPow2(divisor_max)) + val divisor = Reg(init=UInt(divisor_max-1)) + val d_shadow = Reg(init=UInt(divisor_max-1)) + val hold = Reg(init=UInt(divisor_max/4-1)) + val h_shadow = Reg(init=UInt(divisor_max/4-1)) + when (io.set_divisor.valid) { + d_shadow := io.set_divisor.bits(log2Up(divisor_max)-1, 0).toUInt + h_shadow := io.set_divisor.bits(log2Up(divisor_max)-1+16, 16).toUInt + } + io.divisor := hold << UInt(16) | divisor + + val count = Reg{UInt(width = log2Up(divisor_max))} + val myclock = Reg{Bool()} + count := count + UInt(1) + + val rising = count === (divisor >> UInt(1)) + val falling = count === divisor + val held = count === (divisor >> UInt(1)) + hold + + when (falling) { + divisor := d_shadow + hold := h_shadow + count := UInt(0) + myclock := Bool(false) + } + when (rising) { + myclock := Bool(true) + } + + val in_slow_rdy = Reg(init=Bool(false)) + val out_slow_val = Reg(init=Bool(false)) + val out_slow_bits = Reg(data) + + val fromhost_q = Module(new Queue(data,1)) + fromhost_q.io.enq.valid := rising && (io.in_slow.valid && in_slow_rdy || this.reset) + fromhost_q.io.enq.bits := io.in_slow.bits + fromhost_q.io.deq <> io.in_fast + + val tohost_q = Module(new Queue(data,1)) + tohost_q.io.enq <> io.out_fast + tohost_q.io.deq.ready := rising && io.out_slow.ready && out_slow_val + + when (held) { + in_slow_rdy := fromhost_q.io.enq.ready + out_slow_val := tohost_q.io.deq.valid + out_slow_bits := Mux(this.reset, fromhost_q.io.deq.bits, tohost_q.io.deq.bits) + } + + io.in_slow.ready := in_slow_rdy + io.out_slow.valid := out_slow_val + io.out_slow.bits := out_slow_bits + io.clk_slow := myclock +} diff --git a/uncore/src/main/scala/tilelink.scala b/uncore/src/main/scala/tilelink.scala new file mode 100644 index 00000000..306c8731 --- /dev/null +++ b/uncore/src/main/scala/tilelink.scala @@ -0,0 +1,1221 @@ +// See LICENSE for license details. + +package uncore +import Chisel._ +import scala.math.max + +/** Parameters exposed to the top-level design, set based on + * external requirements or design space exploration + */ +/** Unique name per TileLink network*/ +case object TLId extends Field[String] +/** Coherency policy used to define custom mesage types */ +case object TLCoherencePolicy extends Field[CoherencePolicy] +/** Number of manager agents */ +case object TLNManagers extends Field[Int] +/** Number of client agents */ +case object TLNClients extends Field[Int] +/** Number of client agents that cache data and use custom [[uncore.Acquire]] types */ +case object TLNCachingClients extends Field[Int] +/** Number of client agents that do not cache data and use built-in [[uncore.Acquire]] types */ +case object TLNCachelessClients extends Field[Int] +/** Maximum number of unique outstanding transactions per client */ +case object TLMaxClientXacts extends Field[Int] +/** Maximum number of clients multiplexed onto a single port */ +case object TLMaxClientsPerPort extends Field[Int] +/** Maximum number of unique outstanding transactions per manager */ +case object TLMaxManagerXacts extends Field[Int] +/** Width of cache block addresses */ +case object TLBlockAddrBits extends Field[Int] +/** Width of data beats */ +case object TLDataBits extends Field[Int] +/** Number of data beats per cache block */ +case object TLDataBeats extends Field[Int] +/** Whether the underlying physical network preserved point-to-point ordering of messages */ +case object TLNetworkIsOrderedP2P extends Field[Boolean] + +/** Utility trait for building Modules and Bundles that use TileLink parameters */ +trait TileLinkParameters extends UsesParameters { + val tlCoh = params(TLCoherencePolicy) + val tlNManagers = params(TLNManagers) + val tlNClients = params(TLNClients) + val tlNCachingClients = params(TLNCachingClients) + val tlNCachelessClients = params(TLNCachelessClients) + val tlClientIdBits = log2Up(tlNClients) + val tlManagerIdBits = log2Up(tlNManagers) + val tlMaxClientXacts = params(TLMaxClientXacts) + val tlMaxClientsPerPort = params(TLMaxClientsPerPort) + val tlMaxManagerXacts = params(TLMaxManagerXacts) + val tlClientXactIdBits = log2Up(tlMaxClientXacts*tlMaxClientsPerPort) + val tlManagerXactIdBits = log2Up(tlMaxManagerXacts) + val tlBlockAddrBits = params(TLBlockAddrBits) + val tlDataBits = params(TLDataBits) + val tlDataBytes = tlDataBits/8 + val tlDataBeats = params(TLDataBeats) + val tlWriteMaskBits = if(tlDataBits/8 < 1) 1 else tlDataBits/8 + val tlBeatAddrBits = log2Up(tlDataBeats) + val tlByteAddrBits = log2Up(tlWriteMaskBits) + val tlMemoryOpcodeBits = M_SZ + val tlMemoryOperandSizeBits = MT_SZ + val tlAcquireTypeBits = max(log2Up(Acquire.nBuiltInTypes), + tlCoh.acquireTypeWidth) + val tlAcquireUnionBits = max(tlWriteMaskBits, + (tlByteAddrBits + + tlMemoryOperandSizeBits + + tlMemoryOpcodeBits)) + 1 + val tlGrantTypeBits = max(log2Up(Grant.nBuiltInTypes), + tlCoh.grantTypeWidth) + 1 + val tlNetworkPreservesPointToPointOrdering = params(TLNetworkIsOrderedP2P) + val tlNetworkDoesNotInterleaveBeats = true + val amoAluOperandBits = params(AmoAluOperandBits) +} + +abstract class TLBundle extends Bundle with TileLinkParameters +abstract class TLModule extends Module with TileLinkParameters + +/** Base trait for all TileLink channels */ +trait TileLinkChannel extends TLBundle { + def hasData(dummy: Int = 0): Bool + def hasMultibeatData(dummy: Int = 0): Bool +} +/** Directionality of message channel. Used to hook up logical network ports to physical network ports */ +trait ClientToManagerChannel extends TileLinkChannel +/** Directionality of message channel. Used to hook up logical network ports to physical network ports */ +trait ManagerToClientChannel extends TileLinkChannel +/** Directionality of message channel. Used to hook up logical network ports to physical network ports */ +trait ClientToClientChannel extends TileLinkChannel // Unused for now + +/** Common signals that are used in multiple channels. + * These traits are useful for type parameterizing bundle wiring functions. + */ + +/** Address of a cache block. */ +trait HasCacheBlockAddress extends TLBundle { + val addr_block = UInt(width = tlBlockAddrBits) + + def conflicts(that: HasCacheBlockAddress) = this.addr_block === that.addr_block + def conflicts(addr: UInt) = this.addr_block === addr +} + +/** Sub-block address or beat id of multi-beat data */ +trait HasTileLinkBeatId extends TLBundle { + val addr_beat = UInt(width = tlBeatAddrBits) +} + +/* Client-side transaction id. Usually Miss Status Handling Register File index */ +trait HasClientTransactionId extends TLBundle { + val client_xact_id = Bits(width = tlClientXactIdBits) +} + +/** Manager-side transaction id. Usually Transaction Status Handling Register File index. */ +trait HasManagerTransactionId extends TLBundle { + val manager_xact_id = Bits(width = tlManagerXactIdBits) +} + +/** A single beat of cache block data */ +trait HasTileLinkData extends HasTileLinkBeatId { + val data = UInt(width = tlDataBits) + + def hasData(dummy: Int = 0): Bool + def hasMultibeatData(dummy: Int = 0): Bool +} + +/** The id of a client source or destination. Used in managers. */ +trait HasClientId extends TLBundle { + val client_id = UInt(width = tlClientIdBits) +} + +/** TileLink channel bundle definitions */ + +/** The Acquire channel is used to intiate coherence protocol transactions in + * order to gain access to a cache block's data with certain permissions + * enabled. Messages sent over this channel may be custom types defined by + * a [[uncore.CoherencePolicy]] for cached data accesse or may be built-in types + * used for uncached data accesses. Acquires may contain data for Put or + * PutAtomic built-in types. After sending an Acquire, clients must + * wait for a manager to send them a [[uncore.Grant]] message in response. + */ +class Acquire extends ClientToManagerChannel + with HasCacheBlockAddress + with HasClientTransactionId + with HasTileLinkData { + // Actual bundle fields: + val is_builtin_type = Bool() + val a_type = UInt(width = tlAcquireTypeBits) + val union = Bits(width = tlAcquireUnionBits) + + // Utility funcs for accessing subblock union: + val opCodeOff = 1 + val opSizeOff = tlMemoryOpcodeBits + opCodeOff + val addrByteOff = tlMemoryOperandSizeBits + opSizeOff + val addrByteMSB = tlByteAddrBits + addrByteOff + /** Hint whether to allocate the block in any interveneing caches */ + def allocate(dummy: Int = 0) = union(0) + /** Op code for [[uncore.PutAtomic]] operations */ + def op_code(dummy: Int = 0) = Mux( + isBuiltInType(Acquire.putType) || isBuiltInType(Acquire.putBlockType), + M_XWR, union(opSizeOff-1, opCodeOff)) + /** Operand size for [[uncore.PutAtomic]] */ + def op_size(dummy: Int = 0) = union(addrByteOff-1, opSizeOff) + /** Byte address for [[uncore.PutAtomic]] operand */ + def addr_byte(dummy: Int = 0) = union(addrByteMSB-1, addrByteOff) + private def amo_offset(dummy: Int = 0) = addr_byte()(tlByteAddrBits-1, log2Up(amoAluOperandBits/8)) + /** Bit offset of [[uncore.PutAtomic]] operand */ + def amo_shift_bits(dummy: Int = 0) = UInt(amoAluOperandBits)*amo_offset() + /** Write mask for [[uncore.Put]], [[uncore.PutBlock]], [[uncore.PutAtomic]] */ + def wmask(dummy: Int = 0) = + Mux(isBuiltInType(Acquire.putAtomicType), + FillInterleaved(amoAluOperandBits/8, UIntToOH(amo_offset())), + Mux(isBuiltInType(Acquire.putBlockType) || isBuiltInType(Acquire.putType), + union(tlWriteMaskBits, 1), + UInt(0, width = tlWriteMaskBits))) + /** Full, beat-sized writemask */ + def full_wmask(dummy: Int = 0) = FillInterleaved(8, wmask()) + /** Complete physical address for block, beat or operand */ + def addr(dummy: Int = 0) = Cat(this.addr_block, this.addr_beat, this.addr_byte()) + + // Other helper functions: + /** Message type equality */ + def is(t: UInt) = a_type === t //TODO: make this more opaque; def ===? + + /** Is this message a built-in or custom type */ + def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type + /** Is this message a particular built-in type */ + def isBuiltInType(t: UInt): Bool = is_builtin_type && a_type === t + + /** Does this message refer to subblock operands using info in the Acquire.union subbundle */ + def isSubBlockType(dummy: Int = 0): Bool = isBuiltInType() && Acquire.typesOnSubBlocks.contains(a_type) + + /** Is this message a built-in prefetch message */ + def isPrefetch(dummy: Int = 0): Bool = isBuiltInType() && is(Acquire.prefetchType) + + /** Does this message contain data? Assumes that no custom message types have data. */ + def hasData(dummy: Int = 0): Bool = isBuiltInType() && Acquire.typesWithData.contains(a_type) + + /** Does this message contain multiple beats of data? Assumes that no custom message types have data. */ + def hasMultibeatData(dummy: Int = 0): Bool = Bool(tlDataBeats > 1) && isBuiltInType() && + Acquire.typesWithMultibeatData.contains(a_type) + + /** Does this message require the manager to probe the client the very client that sent it? + * Needed if multiple caches are attached to the same port. + */ + def requiresSelfProbe(dummy: Int = 0) = Bool(false) + + /** Mapping between each built-in Acquire type (defined in companion object) + * and a built-in Grant type. + */ + def getBuiltInGrantType(dummy: Int = 0): UInt = { + MuxLookup(this.a_type, Grant.putAckType, Array( + Acquire.getType -> Grant.getDataBeatType, + Acquire.getBlockType -> Grant.getDataBlockType, + Acquire.putType -> Grant.putAckType, + Acquire.putBlockType -> Grant.putAckType, + Acquire.putAtomicType -> Grant.getDataBeatType, + Acquire.prefetchType -> Grant.prefetchAckType)) + } +} + +/** [[uncore.Acquire]] with an extra field stating its source id */ +class AcquireFromSrc extends Acquire with HasClientId + +/** Contains definitions of the the built-in Acquire types and a factory + * for [[uncore.Acquire]] + * + * In general you should avoid using this factory directly and use + * [[uncore.ClientMetadata.makeAcquire]] for custom cached Acquires and + * [[uncore.Get]], [[uncore.Put]], etc. for built-in uncached Acquires. + * + * @param is_builtin_type built-in or custom type message? + * @param a_type built-in type enum or custom type enum + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (which beat) + * @param data data being put outwards + * @param union additional fields used for uncached types + */ +object Acquire { + val nBuiltInTypes = 5 + //TODO: Use Enum + def getType = UInt("b000") // Get a single beat of data + def getBlockType = UInt("b001") // Get a whole block of data + def putType = UInt("b010") // Put a single beat of data + def putBlockType = UInt("b011") // Put a whole block of data + def putAtomicType = UInt("b100") // Perform an atomic memory op + def prefetchType = UInt("b101") // Prefetch a whole block of data + def typesWithData = Vec(putType, putBlockType, putAtomicType) + def typesWithMultibeatData = Vec(putBlockType) + def typesOnSubBlocks = Vec(putType, getType, putAtomicType) + + def fullWriteMask = SInt(-1, width = new Acquire().tlWriteMaskBits).toUInt + + // Most generic constructor + def apply( + is_builtin_type: Bool, + a_type: Bits, + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0), + union: UInt = UInt(0)): Acquire = { + val acq = new Acquire + acq.is_builtin_type := is_builtin_type + acq.a_type := a_type + acq.client_xact_id := client_xact_id + acq.addr_block := addr_block + acq.addr_beat := addr_beat + acq.data := data + acq.union := union + acq + } + // Copy constructor + def apply(a: Acquire): Acquire = { + val acq = new Acquire + acq := a + acq + } +} + +/** Get a single beat of data from the outer memory hierarchy + * + * The client can hint whether he block containing this beat should be + * allocated in the intervening levels of the hierarchy. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (which beat) + * @param alloc hint whether the block should be allocated in intervening caches + */ +object Get { + def apply( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt, + alloc: Bool = Bool(true)): Acquire = { + Acquire( + is_builtin_type = Bool(true), + a_type = Acquire.getType, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + union = Cat(M_XRD, alloc)) + } +} + +/** Get a whole cache block of data from the outer memory hierarchy + * + * The client can hint whether the block should be allocated in the + * intervening levels of the hierarchy. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param alloc hint whether the block should be allocated in intervening caches + */ +object GetBlock { + def apply( + client_xact_id: UInt = UInt(0), + addr_block: UInt, + alloc: Bool = Bool(true)): Acquire = { + Acquire( + is_builtin_type = Bool(true), + a_type = Acquire.getBlockType, + client_xact_id = client_xact_id, + addr_block = addr_block, + union = Cat(M_XRD, alloc)) + } +} + +/** Prefetch a cache block into the next-outermost level of the memory hierarchy + * with read permissions. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + */ +object GetPrefetch { + def apply( + client_xact_id: UInt, + addr_block: UInt): Acquire = { + Acquire( + is_builtin_type = Bool(true), + a_type = Acquire.prefetchType, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = UInt(0), + union = Cat(M_XRD, Bool(true))) + } +} + +/** Put a single beat of data into the outer memory hierarchy + * + * The block will be allocated in the next-outermost level of the hierarchy. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (which beat) + * @param data data being refilled to the original requestor + * @param wmask per-byte write mask for this beat + */ +object Put { + def apply( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt, + data: UInt, + wmask: UInt = Acquire.fullWriteMask): Acquire = { + Acquire( + is_builtin_type = Bool(true), + a_type = Acquire.putType, + addr_block = addr_block, + addr_beat = addr_beat, + client_xact_id = client_xact_id, + data = data, + union = Cat(wmask, Bool(true))) + } +} + +/** Put a whole cache block of data into the outer memory hierarchy + * + * If the write mask is not full, the block will be allocated in the + * next-outermost level of the hierarchy. If the write mask is full, the + * client can hint whether the block should be allocated or not. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (which beat of several) + * @param data data being refilled to the original requestor + * @param wmask per-byte write mask for this beat + * @param alloc hint whether the block should be allocated in intervening caches + */ +object PutBlock { + def apply( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt, + data: UInt, + wmask: UInt): Acquire = { + Acquire( + is_builtin_type = Bool(true), + a_type = Acquire.putBlockType, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + data = data, + union = Cat(wmask, (wmask != Acquire.fullWriteMask))) + } + def apply( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt, + data: UInt, + alloc: Bool = Bool(true)): Acquire = { + Acquire( + is_builtin_type = Bool(true), + a_type = Acquire.putBlockType, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + data = data, + union = Cat(Acquire.fullWriteMask, alloc)) + } +} + +/** Prefetch a cache block into the next-outermost level of the memory hierarchy + * with write permissions. + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + */ +object PutPrefetch { + def apply( + client_xact_id: UInt, + addr_block: UInt): Acquire = { + Acquire( + is_builtin_type = Bool(true), + a_type = Acquire.prefetchType, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = UInt(0), + union = Cat(M_XWR, Bool(true))) + } +} + +/** Perform an atomic memory operation in the next-outermost level of the memory hierarchy + * + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat sub-block address (within which beat) + * @param addr_byte sub-block address (which byte) + * @param atomic_opcode {swap, add, xor, and, min, max, minu, maxu} from [[uncore.MemoryOpConstants]] + * @param operand_size {byte, half, word, double} from [[uncore.MemoryOpConstants]] + * @param data source operand data + */ +object PutAtomic { + def apply( + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt, + addr_byte: UInt, + atomic_opcode: UInt, + operand_size: UInt, + data: UInt): Acquire = { + Acquire( + is_builtin_type = Bool(true), + a_type = Acquire.putAtomicType, + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + data = data, + union = Cat(addr_byte, operand_size, atomic_opcode, Bool(true))) + } +} + +/** The Probe channel is used to force clients to release data or cede permissions + * on a cache block. Clients respond to Probes with [[uncore.Release]] messages. + * The available types of Probes are customized by a particular + * [[uncore.CoherencePolicy]]. + */ +class Probe extends ManagerToClientChannel + with HasCacheBlockAddress { + val p_type = UInt(width = tlCoh.probeTypeWidth) + + def is(t: UInt) = p_type === t + def hasData(dummy: Int = 0) = Bool(false) + def hasMultibeatData(dummy: Int = 0) = Bool(false) +} + +/** [[uncore.Probe]] with an extra field stating its destination id */ +class ProbeToDst extends Probe with HasClientId + +/** Contains factories for [[uncore.Probe]] and [[uncore.ProbeToDst]] + * + * In general you should avoid using these factories directly and use + * [[uncore.ManagerMetadata.makeProbe(UInt,Acquire)*]] instead. + * + * @param dst id of client to which probe should be sent + * @param p_type custom probe type + * @param addr_block address of the cache block + */ +object Probe { + def apply(p_type: UInt, addr_block: UInt): Probe = { + val prb = new Probe + prb.p_type := p_type + prb.addr_block := addr_block + prb + } + def apply(dst: UInt, p_type: UInt, addr_block: UInt): ProbeToDst = { + val prb = new ProbeToDst + prb.client_id := dst + prb.p_type := p_type + prb.addr_block := addr_block + prb + } +} + +/** The Release channel is used to release data or permission back to the manager + * in response to [[uncore.Probe]] messages. It can also be used to voluntarily + * write back data, for example in the event that dirty data must be evicted on + * a cache miss. The available types of Release messages are always customized by + * a particular [[uncore.CoherencePolicy]]. Releases may contain data or may be + * simple acknowledgements. Voluntary Releases are acknowledged with [[uncore.Grant Grants]]. + */ +class Release extends ClientToManagerChannel + with HasCacheBlockAddress + with HasClientTransactionId + with HasTileLinkData { + val r_type = UInt(width = tlCoh.releaseTypeWidth) + val voluntary = Bool() + + // Helper funcs + def is(t: UInt) = r_type === t + def hasData(dummy: Int = 0) = tlCoh.releaseTypesWithData.contains(r_type) + //TODO: Assumes all releases write back full cache blocks: + def hasMultibeatData(dummy: Int = 0) = Bool(tlDataBeats > 1) && tlCoh.releaseTypesWithData.contains(r_type) + def isVoluntary(dummy: Int = 0) = voluntary + def requiresAck(dummy: Int = 0) = !Bool(tlNetworkPreservesPointToPointOrdering) +} + +/** [[uncore.Release]] with an extra field stating its source id */ +class ReleaseFromSrc extends Release with HasClientId + +/** Contains a [[uncore.Release]] factory + * + * In general you should avoid using this factory directly and use + * [[uncore.ClientMetadata.makeRelease]] instead. + * + * @param voluntary is this a voluntary writeback + * @param r_type type enum defined by coherence protocol + * @param client_xact_id client's transaction id + * @param addr_block address of the cache block + * @param addr_beat beat id of the data + * @param data data being written back + */ +object Release { + def apply( + voluntary: Bool, + r_type: UInt, + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)): Release = { + val rel = new Release + rel.r_type := r_type + rel.client_xact_id := client_xact_id + rel.addr_block := addr_block + rel.addr_beat := addr_beat + rel.data := data + rel.voluntary := voluntary + rel + } +} + +/** The Grant channel is used to refill data or grant permissions requested of the + * manager agent via an [[uncore.Acquire]] message. It is also used to acknowledge + * the receipt of voluntary writeback from clients in the form of [[uncore.Release]] + * messages. There are built-in Grant messages used for Gets and Puts, and + * coherence policies may also define custom Grant types. Grants may contain data + * or may be simple acknowledgements. Grants are responded to with [[uncore.Finish]]. + */ +class Grant extends ManagerToClientChannel + with HasTileLinkData + with HasClientTransactionId + with HasManagerTransactionId { + val is_builtin_type = Bool() + val g_type = UInt(width = tlGrantTypeBits) + + // Helper funcs + def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type + def isBuiltInType(t: UInt): Bool = is_builtin_type && g_type === t + def is(t: UInt):Bool = g_type === t + def hasData(dummy: Int = 0): Bool = Mux(isBuiltInType(), + Grant.typesWithData.contains(g_type), + tlCoh.grantTypesWithData.contains(g_type)) + def hasMultibeatData(dummy: Int = 0): Bool = + Bool(tlDataBeats > 1) && Mux(isBuiltInType(), + Grant.typesWithMultibeatData.contains(g_type), + tlCoh.grantTypesWithData.contains(g_type)) + def isVoluntary(dummy: Int = 0): Bool = isBuiltInType() && (g_type === Grant.voluntaryAckType) + def requiresAck(dummy: Int = 0): Bool = !Bool(tlNetworkPreservesPointToPointOrdering) && !isVoluntary() + def makeFinish(dummy: Int = 0): Finish = { + val f = Bundle(new Finish, { case TLMaxManagerXacts => tlMaxManagerXacts }) + f.manager_xact_id := this.manager_xact_id + f + } +} + +/** [[uncore.Grant]] with an extra field stating its destination */ +class GrantToDst extends Grant with HasClientId + +/** Contains definitions of the the built-in grant types and factories + * for [[uncore.Grant]] and [[uncore.GrantToDst]] + * + * In general you should avoid using these factories directly and use + * [[uncore.ManagerMetadata.makeGrant(uncore.AcquireFromSrc* makeGrant]] instead. + * + * @param dst id of client to which grant should be sent + * @param is_builtin_type built-in or custom type message? + * @param g_type built-in type enum or custom type enum + * @param client_xact_id client's transaction id + * @param manager_xact_id manager's transaction id + * @param addr_beat beat id of the data + * @param data data being refilled to the original requestor + */ +object Grant { + val nBuiltInTypes = 5 + def voluntaryAckType = UInt("b000") // For acking Releases + def prefetchAckType = UInt("b001") // For acking any kind of Prefetch + def putAckType = UInt("b011") // For acking any kind of non-prfetch Put + def getDataBeatType = UInt("b100") // Supplying a single beat of Get + def getDataBlockType = UInt("b101") // Supplying all beats of a GetBlock + def typesWithData = Vec(getDataBlockType, getDataBeatType) + def typesWithMultibeatData= Vec(getDataBlockType) + + def apply( + is_builtin_type: Bool, + g_type: UInt, + client_xact_id: UInt, + manager_xact_id: UInt, + addr_beat: UInt, + data: UInt): Grant = { + val gnt = new Grant + gnt.is_builtin_type := is_builtin_type + gnt.g_type := g_type + gnt.client_xact_id := client_xact_id + gnt.manager_xact_id := manager_xact_id + gnt.addr_beat := addr_beat + gnt.data := data + gnt + } + + def apply( + dst: UInt, + is_builtin_type: Bool, + g_type: UInt, + client_xact_id: UInt, + manager_xact_id: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)): GrantToDst = { + val gnt = new GrantToDst + gnt.client_id := dst + gnt.is_builtin_type := is_builtin_type + gnt.g_type := g_type + gnt.client_xact_id := client_xact_id + gnt.manager_xact_id := manager_xact_id + gnt.addr_beat := addr_beat + gnt.data := data + gnt + } +} + +/** The Finish channel is used to provide a global ordering of transactions + * in networks that do not guarantee point-to-point ordering of messages. + * A Finsish message is sent as acknowledgement of receipt of a [[uncore.Grant]]. + * When a Finish message is received, a manager knows it is safe to begin + * processing other transactions that touch the same cache block. + */ +class Finish extends ClientToManagerChannel with HasManagerTransactionId { + def hasData(dummy: Int = 0) = Bool(false) + def hasMultibeatData(dummy: Int = 0) = Bool(false) +} + +/** Complete IO definition for incoherent TileLink, including networking headers */ +class UncachedTileLinkIO extends TLBundle { + val acquire = new DecoupledIO(new LogicalNetworkIO(new Acquire)) + val grant = new DecoupledIO(new LogicalNetworkIO(new Grant)).flip + val finish = new DecoupledIO(new LogicalNetworkIO(new Finish)) +} + +/** Complete IO definition for coherent TileLink, including networking headers */ +class TileLinkIO extends UncachedTileLinkIO { + val probe = new DecoupledIO(new LogicalNetworkIO(new Probe)).flip + val release = new DecoupledIO(new LogicalNetworkIO(new Release)) +} + +/** This version of UncachedTileLinkIO does not contain network headers. + * It is intended for use within client agents. + * + * Headers are provided in the top-level that instantiates the clients and network, + * probably using a [[uncore.ClientTileLinkNetworkPort]] module. + * By eliding the header subbundles within the clients we can enable + * hierarchical P-and-R while minimizing unconnected port errors in GDS. + * + * Secondly, this version of the interface elides [[uncore.Finish]] messages, with the + * assumption that a [[uncore.FinishUnit]] has been coupled to the TileLinkIO port + * to deal with acking received [[uncore.Grant Grants]]. + */ +class ClientUncachedTileLinkIO extends TLBundle { + val acquire = new DecoupledIO(new Acquire) + val grant = new DecoupledIO(new Grant).flip +} + +/** This version of TileLinkIO does not contain network headers. + * It is intended for use within client agents. + */ +class ClientTileLinkIO extends ClientUncachedTileLinkIO { + val probe = new DecoupledIO(new Probe).flip + val release = new DecoupledIO(new Release) +} + +/** This version of TileLinkIO does not contain network headers, but + * every channel does include an extra client_id subbundle. + * It is intended for use within Management agents. + * + * Managers need to track where [[uncore.Acquire]] and [[uncore.Release]] messages + * originated so that they can send a [[uncore.Grant]] to the right place. + * Similarly they must be able to issues Probes to particular clients. + * However, we'd still prefer to have [[uncore.ManagerTileLinkNetworkPort]] fill in + * the header.src to enable hierarchical p-and-r of the managers. Additionally, + * coherent clients might be mapped to random network port ids, and we'll leave it to the + * [[uncore.ManagerTileLinkNetworkPort]] to apply the correct mapping. Managers do need to + * see Finished so they know when to allow new transactions on a cache + * block to proceed. + */ +class ManagerTileLinkIO extends TLBundle { + val acquire = new DecoupledIO(new AcquireFromSrc).flip + val grant = new DecoupledIO(new GrantToDst) + val finish = new DecoupledIO(new Finish).flip + val probe = new DecoupledIO(new ProbeToDst) + val release = new DecoupledIO(new ReleaseFromSrc).flip +} + +/** Utilities for safely wrapping a *UncachedTileLink by pinning probe.ready and release.valid low */ +object TileLinkIOWrapper { + def apply(utl: ClientUncachedTileLinkIO, p: Parameters): ClientTileLinkIO = { + val conv = Module(new ClientTileLinkIOWrapper)(p) + conv.io.in <> utl + conv.io.out + } + def apply(utl: ClientUncachedTileLinkIO): ClientTileLinkIO = { + val conv = Module(new ClientTileLinkIOWrapper) + conv.io.in <> utl + conv.io.out + } + def apply(tl: ClientTileLinkIO): ClientTileLinkIO = tl + def apply(utl: UncachedTileLinkIO, p: Parameters): TileLinkIO = { + val conv = Module(new TileLinkIOWrapper)(p) + conv.io.in <> utl + conv.io.out + } + def apply(utl: UncachedTileLinkIO): TileLinkIO = { + val conv = Module(new TileLinkIOWrapper) + conv.io.in <> utl + conv.io.out + } + def apply(tl: TileLinkIO): TileLinkIO = tl +} + +class TileLinkIOWrapper extends TLModule { + val io = new Bundle { + val in = new UncachedTileLinkIO().flip + val out = new TileLinkIO + } + io.out.acquire <> io.in.acquire + io.out.grant <> io.in.grant + io.out.finish <> io.in.finish + io.out.probe.ready := Bool(true) + io.out.release.valid := Bool(false) +} + +class ClientTileLinkIOWrapper extends TLModule { + val io = new Bundle { + val in = new ClientUncachedTileLinkIO().flip + val out = new ClientTileLinkIO + } + io.out.acquire <> io.in.acquire + io.out.grant <> io.in.grant + io.out.probe.ready := Bool(true) + io.out.release.valid := Bool(false) +} + +/** Used to track metadata for transactions where multiple secondary misses have been merged + * and handled by a single transaction tracker. + */ +class SecondaryMissInfo extends TLBundle // TODO: add a_type to merge e.g. Get+GetBlocks, and/or HasClientId + with HasTileLinkBeatId + with HasClientTransactionId + +/** A helper module that automatically issues [[uncore.Finish]] messages in repsonse + * to [[uncore.Grant]] that it receives from a manager and forwards to a client + */ +class FinishUnit(srcId: Int = 0, outstanding: Int = 2) extends TLModule with HasDataBeatCounters { + val io = new Bundle { + val grant = Decoupled(new LogicalNetworkIO(new Grant)).flip + val refill = Decoupled(new Grant) + val finish = Decoupled(new LogicalNetworkIO(new Finish)) + val ready = Bool(OUTPUT) + } + + val g = io.grant.bits.payload + + if(tlNetworkPreservesPointToPointOrdering) { + io.finish.valid := Bool(false) + io.refill.valid := io.grant.valid + io.refill.bits := g + io.grant.ready := io.refill.ready + io.ready := Bool(true) + } else { + // We only want to send Finishes after we have collected all beats of + // a multibeat Grant. But Grants from multiple managers or transactions may + // get interleaved, so we could need a counter for each. + val done = if(tlNetworkDoesNotInterleaveBeats) { + connectIncomingDataBeatCounterWithHeader(io.grant) + } else { + val entries = 1 << tlClientXactIdBits + def getId(g: LogicalNetworkIO[Grant]) = g.payload.client_xact_id + assert(getId(io.grant.bits) <= UInt(entries), "Not enough grant beat counters, only " + entries + " entries.") + connectIncomingDataBeatCountersWithHeader(io.grant, entries, getId).reduce(_||_) + } + val q = Module(new FinishQueue(outstanding)) + q.io.enq.valid := io.grant.fire() && g.requiresAck() && (!g.hasMultibeatData() || done) + q.io.enq.bits.fin := g.makeFinish() + q.io.enq.bits.dst := io.grant.bits.header.src + + io.finish.bits.header.src := UInt(srcId) + io.finish.bits.header.dst := q.io.deq.bits.dst + io.finish.bits.payload := q.io.deq.bits.fin + io.finish.valid := q.io.deq.valid + q.io.deq.ready := io.finish.ready + + io.refill.valid := io.grant.valid + io.refill.bits := g + io.grant.ready := (q.io.enq.ready || !g.requiresAck()) && io.refill.ready + io.ready := q.io.enq.ready + } +} + +class FinishQueueEntry extends TLBundle { + val fin = new Finish + val dst = UInt(width = log2Up(params(LNEndpoints))) +} + +class FinishQueue(entries: Int) extends Queue(new FinishQueueEntry, entries) + +/** A port to convert [[uncore.ClientTileLinkIO]].flip into [[uncore.TileLinkIO]] + * + * Creates network headers for [[uncore.Acquire]] and [[uncore.Release]] messages, + * calculating header.dst and filling in header.src. + * Strips headers from [[uncore.Probe Probes]]. + * Responds to [[uncore.Grant]] by automatically issuing [[uncore.Finish]] to the granting managers. + * + * @param clientId network port id of this agent + * @param addrConvert how a physical address maps to a destination manager port id + */ +class ClientTileLinkNetworkPort(clientId: Int, addrConvert: UInt => UInt) extends TLModule { + val io = new Bundle { + val client = new ClientTileLinkIO().flip + val network = new TileLinkIO + } + + val finisher = Module(new FinishUnit(clientId)) + finisher.io.grant <> io.network.grant + io.network.finish <> finisher.io.finish + + val acq_with_header = ClientTileLinkHeaderCreator(io.client.acquire, clientId, addrConvert) + val rel_with_header = ClientTileLinkHeaderCreator(io.client.release, clientId, addrConvert) + val prb_without_header = DecoupledLogicalNetworkIOUnwrapper(io.network.probe) + val gnt_without_header = finisher.io.refill + + io.network.acquire.bits := acq_with_header.bits + io.network.acquire.valid := acq_with_header.valid && finisher.io.ready + acq_with_header.ready := io.network.acquire.ready && finisher.io.ready + io.network.release <> rel_with_header + io.client.probe <> prb_without_header + io.client.grant <> gnt_without_header +} + +object ClientTileLinkHeaderCreator { + def apply[T <: ClientToManagerChannel with HasCacheBlockAddress]( + in: DecoupledIO[T], + clientId: Int, + addrConvert: UInt => UInt): DecoupledIO[LogicalNetworkIO[T]] = { + val out = new DecoupledIO(new LogicalNetworkIO(in.bits.clone)).asDirectionless + out.bits.payload := in.bits + out.bits.header.src := UInt(clientId) + out.bits.header.dst := addrConvert(in.bits.addr_block) + out.valid := in.valid + in.ready := out.ready + out + } +} + +/** A port to convert [[uncore.ManagerTileLinkIO]].flip into [[uncore.TileLinkIO]].flip + * + * Creates network headers for [[uncore.Probe]] and [[uncore.Grant]] messagess, + * calculating header.dst and filling in header.src. + * Strips headers from [[uncore.Acquire]], [[uncore.Release]] and [[uncore.Finish]], + * but supplies client_id instead. + * + * @param managerId the network port id of this agent + * @param idConvert how a sharer id maps to a destination client port id + */ +class ManagerTileLinkNetworkPort(managerId: Int, idConvert: UInt => UInt) extends TLModule { + val io = new Bundle { + val manager = new ManagerTileLinkIO().flip + val network = new TileLinkIO().flip + } + io.network.grant <> ManagerTileLinkHeaderCreator(io.manager.grant, managerId, (u: UInt) => u) + io.network.probe <> ManagerTileLinkHeaderCreator(io.manager.probe, managerId, idConvert) + io.manager.acquire.bits.client_id := io.network.acquire.bits.header.src + io.manager.acquire <> DecoupledLogicalNetworkIOUnwrapper(io.network.acquire) + io.manager.release.bits.client_id := io.network.release.bits.header.src + io.manager.release <> DecoupledLogicalNetworkIOUnwrapper(io.network.release) + io.manager.finish <> DecoupledLogicalNetworkIOUnwrapper(io.network.finish) +} + +object ManagerTileLinkHeaderCreator { + def apply[T <: ManagerToClientChannel with HasClientId]( + in: DecoupledIO[T], + managerId: Int, + idConvert: UInt => UInt): DecoupledIO[LogicalNetworkIO[T]] = { + val out = new DecoupledIO(new LogicalNetworkIO(in.bits.clone)).asDirectionless + out.bits.payload := in.bits + out.bits.header.src := UInt(managerId) + out.bits.header.dst := idConvert(in.bits.client_id) + out.valid := in.valid + in.ready := out.ready + out + } +} + +/** Struct for describing per-channel queue depths */ +case class TileLinkDepths(acq: Int, prb: Int, rel: Int, gnt: Int, fin: Int) + +/** Optionally enqueues each [[uncore.TileLinkChannel]] individually */ +class TileLinkEnqueuer(depths: TileLinkDepths) extends Module { + val io = new Bundle { + val client = new TileLinkIO().flip + val manager = new TileLinkIO + } + io.manager.acquire <> (if(depths.acq > 0) Queue(io.client.acquire, depths.acq) else io.client.acquire) + io.client.probe <> (if(depths.prb > 0) Queue(io.manager.probe, depths.prb) else io.manager.probe) + io.manager.release <> (if(depths.rel > 0) Queue(io.client.release, depths.rel) else io.client.release) + io.client.grant <> (if(depths.gnt > 0) Queue(io.manager.grant, depths.gnt) else io.manager.grant) + io.manager.finish <> (if(depths.fin > 0) Queue(io.client.finish, depths.fin) else io.client.finish) +} + +object TileLinkEnqueuer { + def apply(in: TileLinkIO, depths: TileLinkDepths)(p: Parameters): TileLinkIO = { + val t = Module(new TileLinkEnqueuer(depths))(p) + t.io.client <> in + t.io.manager + } + def apply(in: TileLinkIO, depth: Int)(p: Parameters): TileLinkIO = { + apply(in, TileLinkDepths(depth, depth, depth, depth, depth))(p) + } +} + +/** Utility functions for constructing TileLinkIO arbiters */ +trait TileLinkArbiterLike extends TileLinkParameters { + // Some shorthand type variables + type ManagerSourcedWithId = ManagerToClientChannel with HasClientTransactionId + type ClientSourcedWithId = ClientToManagerChannel with HasClientTransactionId + type ClientSourcedWithIdAndData = ClientToManagerChannel with HasClientTransactionId with HasTileLinkData + + val arbN: Int // The number of ports on the client side + + // These abstract funcs are filled in depending on whether the arbiter mucks with the + // outgoing client ids to track sourcing and then needs to revert them on the way back + def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int): Bits + def managerSourcedClientXactId(in: ManagerSourcedWithId): Bits + def arbIdx(in: ManagerSourcedWithId): UInt + + // The following functions are all wiring helpers for each of the different types of TileLink channels + + def hookupClientSource[M <: ClientSourcedWithIdAndData]( + clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], + mngr: DecoupledIO[LogicalNetworkIO[M]]) { + def hasData(m: LogicalNetworkIO[M]) = m.payload.hasMultibeatData() + val arb = Module(new LockingRRArbiter(mngr.bits.clone, arbN, tlDataBeats, Some(hasData _))) + clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => { + arb.valid := req.valid + arb.bits := req.bits + arb.bits.payload.client_xact_id := clientSourcedClientXactId(req.bits.payload, id) + req.ready := arb.ready + }} + arb.io.out <> mngr + } + + def hookupClientSourceHeaderless[M <: ClientSourcedWithIdAndData]( + clts: Seq[DecoupledIO[M]], + mngr: DecoupledIO[M]) { + def hasData(m: M) = m.hasMultibeatData() + val arb = Module(new LockingRRArbiter(mngr.bits.clone, arbN, tlDataBeats, Some(hasData _))) + clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => { + arb.valid := req.valid + arb.bits := req.bits + arb.bits.client_xact_id := clientSourcedClientXactId(req.bits, id) + req.ready := arb.ready + }} + arb.io.out <> mngr + } + + def hookupManagerSourceWithHeader[M <: ManagerToClientChannel]( + clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], + mngr: DecoupledIO[LogicalNetworkIO[M]]) { + mngr.ready := Bool(false) + for (i <- 0 until arbN) { + clts(i).valid := Bool(false) + when (mngr.bits.header.dst === UInt(i)) { + clts(i).valid := mngr.valid + mngr.ready := clts(i).ready + } + clts(i).bits := mngr.bits + } + } + + def hookupManagerSourceWithId[M <: ManagerSourcedWithId]( + clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], + mngr: DecoupledIO[LogicalNetworkIO[M]]) { + mngr.ready := Bool(false) + for (i <- 0 until arbN) { + clts(i).valid := Bool(false) + when (arbIdx(mngr.bits.payload) === UInt(i)) { + clts(i).valid := mngr.valid + mngr.ready := clts(i).ready + } + clts(i).bits := mngr.bits + clts(i).bits.payload.client_xact_id := managerSourcedClientXactId(mngr.bits.payload) + } + } + + def hookupManagerSourceHeaderlessWithId[M <: ManagerSourcedWithId]( + clts: Seq[DecoupledIO[M]], + mngr: DecoupledIO[M]) { + mngr.ready := Bool(false) + for (i <- 0 until arbN) { + clts(i).valid := Bool(false) + when (arbIdx(mngr.bits) === UInt(i)) { + clts(i).valid := mngr.valid + mngr.ready := clts(i).ready + } + clts(i).bits := mngr.bits + clts(i).bits.client_xact_id := managerSourcedClientXactId(mngr.bits) + } + } + + def hookupManagerSourceBroadcast[M <: Data](clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) { + clts.map{ _.valid := mngr.valid } + clts.map{ _.bits := mngr.bits } + mngr.ready := clts.map(_.ready).reduce(_&&_) + } + + def hookupFinish[M <: LogicalNetworkIO[Finish]]( clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) { + val arb = Module(new RRArbiter(mngr.bits.clone, arbN)) + arb.io.in <> clts + arb.io.out <> mngr + } +} + +/** Abstract base case for any Arbiters that have UncachedTileLinkIOs */ +abstract class UncachedTileLinkIOArbiter(val arbN: Int) extends Module with TileLinkArbiterLike { + val io = new Bundle { + val in = Vec.fill(arbN){new UncachedTileLinkIO}.flip + val out = new UncachedTileLinkIO + } + hookupClientSource(io.in.map(_.acquire), io.out.acquire) + hookupFinish(io.in.map(_.finish), io.out.finish) + hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant) +} + +/** Abstract base case for any Arbiters that have cached TileLinkIOs */ +abstract class TileLinkIOArbiter(val arbN: Int) extends Module with TileLinkArbiterLike { + val io = new Bundle { + val in = Vec.fill(arbN){new TileLinkIO}.flip + val out = new TileLinkIO + } + hookupClientSource(io.in.map(_.acquire), io.out.acquire) + hookupClientSource(io.in.map(_.release), io.out.release) + hookupFinish(io.in.map(_.finish), io.out.finish) + hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe) + hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant) +} + +/** Appends the port index of the arbiter to the client_xact_id */ +trait AppendsArbiterId extends TileLinkArbiterLike { + def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = + Cat(in.client_xact_id, UInt(id, log2Up(arbN))) + def managerSourcedClientXactId(in: ManagerSourcedWithId) = + in.client_xact_id >> UInt(log2Up(arbN)) + def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id(log2Up(arbN)-1,0).toUInt +} + +/** Uses the client_xact_id as is (assumes it has been set to port index) */ +trait PassesId extends TileLinkArbiterLike { + def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = in.client_xact_id + def managerSourcedClientXactId(in: ManagerSourcedWithId) = in.client_xact_id + def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id +} + +/** Overwrites some default client_xact_id with the port idx */ +trait UsesNewId extends TileLinkArbiterLike { + def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = UInt(id, log2Up(arbN)) + def managerSourcedClientXactId(in: ManagerSourcedWithId) = UInt(0) + def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id +} + +// Now we can mix-in thevarious id-generation traits to make concrete arbiter classes +class UncachedTileLinkIOArbiterThatAppendsArbiterId(val n: Int) extends UncachedTileLinkIOArbiter(n) with AppendsArbiterId +class UncachedTileLinkIOArbiterThatPassesId(val n: Int) extends UncachedTileLinkIOArbiter(n) with PassesId +class UncachedTileLinkIOArbiterThatUsesNewId(val n: Int) extends UncachedTileLinkIOArbiter(n) with UsesNewId +class TileLinkIOArbiterThatAppendsArbiterId(val n: Int) extends TileLinkIOArbiter(n) with AppendsArbiterId +class TileLinkIOArbiterThatPassesId(val n: Int) extends TileLinkIOArbiter(n) with PassesId +class TileLinkIOArbiterThatUsesNewId(val n: Int) extends TileLinkIOArbiter(n) with UsesNewId + +/** Concrete uncached client-side arbiter that appends the arbiter's port id to client_xact_id */ +class ClientUncachedTileLinkIOArbiter(val arbN: Int) extends Module with TileLinkArbiterLike with AppendsArbiterId { + val io = new Bundle { + val in = Vec.fill(arbN){new ClientUncachedTileLinkIO}.flip + val out = new ClientUncachedTileLinkIO + } + hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire) + hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant) +} + +/** Concrete client-side arbiter that appends the arbiter's port id to client_xact_id */ +class ClientTileLinkIOArbiter(val arbN: Int) extends Module with TileLinkArbiterLike with AppendsArbiterId { + val io = new Bundle { + val in = Vec.fill(arbN){new ClientTileLinkIO}.flip + val out = new ClientTileLinkIO + } + hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire) + hookupClientSourceHeaderless(io.in.map(_.release), io.out.release) + hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe) + hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant) +} + +/** Utility trait containing wiring functions to keep track of how many data beats have + * been sent or recieved over a particular [[uncore.TileLinkChannel]] or pair of channels. + * + * Won't count message types that don't have data. + * Used in [[uncore.XactTracker]] and [[uncore.FinishUnit]]. + */ +trait HasDataBeatCounters { + type HasBeat = TileLinkChannel with HasTileLinkBeatId + + /** Returns the current count on this channel and when a message is done + * @param inc increment the counter (usually .valid or .fire()) + * @param data the actual channel data + * @param beat count to return for single-beat messages + */ + def connectDataBeatCounter[S <: TileLinkChannel](inc: Bool, data: S, beat: UInt) = { + val multi = data.hasMultibeatData() + val (multi_cnt, multi_done) = Counter(inc && multi, data.tlDataBeats) + val cnt = Mux(multi, multi_cnt, beat) + val done = Mux(multi, multi_done, inc) + (cnt, done) + } + + /** Counter for beats on outgoing [[chisel.DecoupledIO]] */ + def connectOutgoingDataBeatCounter[T <: TileLinkChannel](in: DecoupledIO[T], beat: UInt = UInt(0)): (UInt, Bool) = + connectDataBeatCounter(in.fire(), in.bits, beat) + + /** Returns done but not cnt. Use the addr_beat subbundle instead of cnt for beats on + * incoming channels in case of network reordering. + */ + def connectIncomingDataBeatCounter[T <: TileLinkChannel](in: DecoupledIO[T]): Bool = + connectDataBeatCounter(in.fire(), in.bits, UInt(0))._2 + + /** Counter for beats on incoming DecoupledIO[LogicalNetworkIO[]]s returns done */ + def connectIncomingDataBeatCounterWithHeader[T <: TileLinkChannel](in: DecoupledIO[LogicalNetworkIO[T]]): Bool = + connectDataBeatCounter(in.fire(), in.bits.payload, UInt(0))._2 + + /** If the network might interleave beats from different messages, we need a Vec of counters, + * one for every outstanding message id that might be interleaved. + * + * @param getId mapping from Message to counter id + */ + def connectIncomingDataBeatCountersWithHeader[T <: TileLinkChannel with HasClientTransactionId]( + in: DecoupledIO[LogicalNetworkIO[T]], + entries: Int, + getId: LogicalNetworkIO[T] => UInt): Vec[Bool] = { + Vec((0 until entries).map { i => + connectDataBeatCounter(in.fire() && getId(in.bits) === UInt(i), in.bits.payload, UInt(0))._2 + }) + } + + /** Provides counters on two channels, as well a meta-counter that tracks how many + * messages have been sent over the up channel but not yet responded to over the down channel + * + * @param max max number of outstanding ups with no down + * @param up outgoing channel + * @param down incoming channel + * @param beat overrides cnts on single-beat messages + * @param track whether up's message should be tracked + * @return a tuple containing whether their are outstanding messages, up's count, + * up's done, down's count, down's done + */ + def connectTwoWayBeatCounter[T <: TileLinkChannel, S <: TileLinkChannel]( + max: Int, + up: DecoupledIO[T], + down: DecoupledIO[S], + beat: UInt = UInt(0), + track: T => Bool = (t: T) => Bool(true)): (Bool, UInt, Bool, UInt, Bool) = { + val cnt = Reg(init = UInt(0, width = log2Up(max+1))) + val (up_idx, up_done) = connectDataBeatCounter(up.fire(), up.bits, beat) + val (down_idx, down_done) = connectDataBeatCounter(down.fire(), down.bits, beat) + val do_inc = up_done && track(up.bits) + val do_dec = down_done + cnt := Mux(do_dec, + Mux(do_inc, cnt, cnt - UInt(1)), + Mux(do_inc, cnt + UInt(1), cnt)) + (cnt > UInt(0), up_idx, up_done, down_idx, down_done) + } +} diff --git a/uncore/src/main/scala/uncore.scala b/uncore/src/main/scala/uncore.scala new file mode 100644 index 00000000..d7573aec --- /dev/null +++ b/uncore/src/main/scala/uncore.scala @@ -0,0 +1,129 @@ +// See LICENSE for license details. + +package uncore +import Chisel._ + +case object NReleaseTransactors extends Field[Int] +case object NProbeTransactors extends Field[Int] +case object NAcquireTransactors extends Field[Int] + +trait CoherenceAgentParameters extends UsesParameters { + val nReleaseTransactors = 1 + val nAcquireTransactors = params(NAcquireTransactors) + val nTransactors = nReleaseTransactors + nAcquireTransactors + def outerTLParams = params.alterPartial({ case TLId => params(OuterTLId)}) + val outerDataBeats = outerTLParams(TLDataBeats) + val outerDataBits = outerTLParams(TLDataBits) + val outerBeatAddrBits = log2Up(outerDataBeats) + val outerByteAddrBits = log2Up(outerDataBits/8) + def innerTLParams = params.alterPartial({case TLId => params(InnerTLId)}) + val innerDataBeats = innerTLParams(TLDataBeats) + val innerDataBits = innerTLParams(TLDataBits) + val innerBeatAddrBits = log2Up(innerDataBeats) + val innerByteAddrBits = log2Up(innerDataBits/8) + require(outerDataBeats == innerDataBeats) //TODO: must fix all xact_data Vecs to remove this requirement +} + +abstract class CoherenceAgentBundle extends Bundle with CoherenceAgentParameters +abstract class CoherenceAgentModule extends Module with CoherenceAgentParameters + +trait HasCoherenceAgentWiringHelpers { + def doOutputArbitration[T <: TileLinkChannel]( + out: DecoupledIO[T], + ins: Seq[DecoupledIO[T]]) { + def lock(o: T) = o.hasMultibeatData() + val arb = Module(new LockingRRArbiter(out.bits.clone, ins.size, out.bits.tlDataBeats, lock _)) + out <> arb.io.out + arb.io.in <> ins + } + + def doInputRouting[T <: HasManagerTransactionId]( + in: DecoupledIO[T], + outs: Seq[DecoupledIO[T]]) { + val idx = in.bits.manager_xact_id + outs.map(_.bits := in.bits) + outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && idx === UInt(i) } + in.ready := Vec(outs.map(_.ready)).read(idx) + } +} + +trait HasInnerTLIO extends CoherenceAgentBundle { + val inner = Bundle(new ManagerTileLinkIO)(innerTLParams) + val incoherent = Vec.fill(inner.tlNCachingClients){Bool()}.asInput + def iacq(dummy: Int = 0) = inner.acquire.bits + def iprb(dummy: Int = 0) = inner.probe.bits + def irel(dummy: Int = 0) = inner.release.bits + def ignt(dummy: Int = 0) = inner.grant.bits + def ifin(dummy: Int = 0) = inner.finish.bits +} + +trait HasUncachedOuterTLIO extends CoherenceAgentBundle { + val outer = Bundle(new ClientUncachedTileLinkIO)(outerTLParams) + def oacq(dummy: Int = 0) = outer.acquire.bits + def ognt(dummy: Int = 0) = outer.grant.bits +} + +trait HasCachedOuterTLIO extends CoherenceAgentBundle { + val outer = Bundle(new ClientTileLinkIO)(outerTLParams) + def oacq(dummy: Int = 0) = outer.acquire.bits + def oprb(dummy: Int = 0) = outer.probe.bits + def orel(dummy: Int = 0) = outer.release.bits + def ognt(dummy: Int = 0) = outer.grant.bits +} + +class ManagerTLIO extends HasInnerTLIO with HasUncachedOuterTLIO + +abstract class CoherenceAgent extends CoherenceAgentModule { + def innerTL: ManagerTileLinkIO + def outerTL: ClientTileLinkIO + def incoherent: Vec[Bool] +} + +abstract class ManagerCoherenceAgent extends CoherenceAgent + with HasCoherenceAgentWiringHelpers { + val io = new ManagerTLIO + def innerTL = io.inner + def outerTL = TileLinkIOWrapper(io.outer, outerTLParams) + def incoherent = io.incoherent +} + +class HierarchicalTLIO extends HasInnerTLIO with HasCachedOuterTLIO + +abstract class HierarchicalCoherenceAgent extends CoherenceAgent { + val io = new HierarchicalTLIO + def innerTL = io.inner + def outerTL = io.outer + def incoherent = io.incoherent +} + +trait HasTrackerConflictIO extends Bundle { + val has_acquire_conflict = Bool(OUTPUT) + val has_acquire_match = Bool(OUTPUT) + val has_release_match = Bool(OUTPUT) +} + +class ManagerXactTrackerIO extends ManagerTLIO with HasTrackerConflictIO +class HierarchicalXactTrackerIO extends HierarchicalTLIO with HasTrackerConflictIO + +abstract class XactTracker extends CoherenceAgentModule with HasDataBeatCounters { + def addPendingBitWhenBeat[T <: HasBeat](inc: Bool, in: T): UInt = + Fill(in.tlDataBeats, inc) & UIntToOH(in.addr_beat) + def dropPendingBitWhenBeat[T <: HasBeat](dec: Bool, in: T): UInt = + ~Fill(in.tlDataBeats, dec) | ~UIntToOH(in.addr_beat) + + def addPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T]): UInt = + addPendingBitWhenBeat(in.fire() && in.bits.hasData(), in.bits) + + def addPendingBitWhenBeatIsGetOrAtomic(in: DecoupledIO[AcquireFromSrc]): UInt = { + val a = in.bits + val isGetOrAtomic = a.isBuiltInType() && + (Vec(Acquire.getType, Acquire.getBlockType, Acquire.putAtomicType).contains(a.a_type)) + addPendingBitWhenBeat(in.fire() && isGetOrAtomic, a) + } + + def dropPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T]): UInt = + dropPendingBitWhenBeat(in.fire() && in.bits.hasData(), in.bits) + + def dropPendingBitAtDest(in: DecoupledIO[ProbeToDst]): UInt = + ~Fill(in.bits.tlNCachingClients, in.fire()) | ~UIntToOH(in.bits.client_id) +} diff --git a/uncore/src/main/scala/util.scala b/uncore/src/main/scala/util.scala new file mode 100644 index 00000000..65c5d6cd --- /dev/null +++ b/uncore/src/main/scala/util.scala @@ -0,0 +1,106 @@ +// See LICENSE for license details. + +package uncore + +import Chisel._ +import scala.math._ + +class Unsigned(x: Int) { + require(x >= 0) + def clog2: Int = { require(x > 0); ceil(log(x)/log(2)).toInt } + def log2: Int = { require(x > 0); floor(log(x)/log(2)).toInt } + def isPow2: Boolean = x > 0 && (x & (x-1)) == 0 + def nextPow2: Int = if (x == 0) 1 else 1 << clog2 +} + +object MuxBundle { + def apply[T <: Data] (default: T, mapping: Seq[(Bool, T)]): T = { + mapping.reverse.foldLeft(default)((b, a) => Mux(a._1, a._2, b)) + } +} + +// Produces 0-width value when counting to 1 +class ZCounter(val n: Int) { + val value = Reg(init=UInt(0, log2Ceil(n))) + def inc(): Bool = { + if (n == 1) Bool(true) + else { + val wrap = value === UInt(n-1) + value := Mux(Bool(!isPow2(n)) && wrap, UInt(0), value + UInt(1)) + wrap + } + } +} + +object ZCounter { + def apply(n: Int) = new ZCounter(n) + def apply(cond: Bool, n: Int): (UInt, Bool) = { + val c = new ZCounter(n) + var wrap: Bool = null + when (cond) { wrap = c.inc() } + (c.value, cond && wrap) + } +} + +class FlowThroughSerializer[T <: HasTileLinkData](gen: T, n: Int) extends Module { + val io = new Bundle { + val in = Decoupled(gen.clone).flip + val out = Decoupled(gen.clone) + val cnt = UInt(OUTPUT, log2Up(n)) + val done = Bool(OUTPUT) + } + val narrowWidth = io.in.bits.data.getWidth / n + require(io.in.bits.data.getWidth % narrowWidth == 0) + + if(n == 1) { + io.in <> io.out + io.cnt := UInt(width = 0) + io.done := Bool(true) + } else { + val cnt = Reg(init=UInt(0, width = log2Up(n))) + val wrap = cnt === UInt(n-1) + val rbits = Reg(io.in.bits.clone) + val active = Reg(init=Bool(false)) + + val shifter = Vec.fill(n){Bits(width = narrowWidth)} + (0 until n).foreach { + i => shifter(i) := rbits.data((i+1)*narrowWidth-1,i*narrowWidth) + } + + io.done := Bool(false) + io.cnt := cnt + io.in.ready := !active + io.out.valid := active || io.in.valid + io.out.bits := io.in.bits + when(!active && io.in.valid) { + when(io.in.bits.hasData()) { + cnt := Mux(io.out.ready, UInt(1), UInt(0)) + rbits := io.in.bits + active := Bool(true) + } + io.done := !io.in.bits.hasData() + } + when(active) { + io.out.bits := rbits + io.out.bits.data := shifter(cnt) + when(io.out.ready) { + cnt := cnt + UInt(1) + when(wrap) { + cnt := UInt(0) + io.done := Bool(true) + active := Bool(false) + } + } + } + } +} + +object FlowThroughSerializer { + def apply[T <: HasTileLinkData](in: DecoupledIO[T], n: Int): DecoupledIO[T] = { + val fs = Module(new FlowThroughSerializer(in.bits, n)) + fs.io.in.valid := in.valid + fs.io.in.bits := in.bits + in.ready := fs.io.in.ready + fs.io.out + } +}