1
0

Heterogeneous Tiles (#550)

Fundamental new features:

* Added tile package: This package is intended to hold components re-usable across different types of tile. Will be the future location of TL2-RoCC accelerators and new diplomatic versions of intra-tile interfaces.
* Adopted [ModuleName]Params convention: Code base was very inconsistent about what to name case classes that provide parameters to modules. Settled on calling them [ModuleName]Params to distinguish them from config.Parameters and config.Config. So far applied mostly only to case classes defined within rocket and tile.
* Defined RocketTileParams: A nested case class containing case classes for all the components of a tile (L1 caches and core). Allows all such parameters to vary per-tile.
* Defined RocketCoreParams: All the parameters that can be varied per-core.
* Defined L1CacheParams: A trait defining the parameters common to L1 caches, made concrete in different derived case classes.
* Defined RocketTilesKey: A sequence of RocketTileParams, one for every tile to be created.
* Provided HeterogeneousDualCoreConfig: An example of making a heterogeneous chip with two cores, one big and one little.
* Changes to legacy code: ReplacementPolicy moved to package util. L1Metadata moved to package tile. Legacy L2 cache agent removed because it can no longer share the metadata array implementation with the L1. Legacy GroundTests on life support.

Additional changes that got rolled in along the way:

* rocket: 	Fix critical path through BTB for I$ index bits > pgIdxBits
* coreplex: tiles connected via :=*
* groundtest: updated to use TileParams
* tilelink: cache cork requirements are relaxed to allow more cacheless masters
This commit is contained in:
Henry Cook
2017-02-09 13:59:09 -08:00
committed by GitHub
parent f9acd4988c
commit e8c8d2af71
57 changed files with 1084 additions and 1933 deletions

View File

@@ -1,980 +0,0 @@
// See LICENSE.Berkeley for license details.
// See LICENSE.SiFive for license details.
package uncore.agents
import Chisel._
import scala.reflect.ClassTag
import rocket.PAddrBits
import uncore.coherence._
import uncore.tilelink._
import uncore.constants._
import uncore.util._
import util._
import config.{Parameters, Field}
case object CacheId extends Field[Int]
case object L2DirectoryRepresentation extends Field[DirectoryRepresentation]
trait HasOuterCacheParameters extends HasCacheParameters with HasCoherenceAgentParameters {
val cacheId = p(CacheId)
val idxLSB = cacheIdBits
val idxMSB = idxLSB + idxBits - 1
val tagLSB = idxLSB + idxBits
val tagMSB = tagLSB + tagBits - 1
def inSameSet(block_a: HasCacheBlockAddress, block_b: HasCacheBlockAddress): Bool =
inSameSet(block_a, block_b.addr_block)
def inSameSet(block: HasCacheBlockAddress, addr: UInt): Bool =
inSet(block, addr(idxMSB, idxLSB))
def inSet(block: HasCacheBlockAddress, idx: UInt): Bool =
block.addr_block(idxMSB,idxLSB) === idx
def haveSameTag(block: HasCacheBlockAddress, addr: UInt): Bool =
hasTag(block, addr(tagMSB, tagLSB))
def hasTag(block: HasCacheBlockAddress, tag: UInt): Bool =
block.addr_block(tagMSB, tagLSB) === tag
def isSameBlock(block: HasCacheBlockAddress, tag: UInt, idx: UInt) =
hasTag(block, tag) && inSet(block, idx)
//val blockAddrBits = p(TLBlockAddrBits)
val refillCyclesPerBeat = outerDataBits/rowBits
val refillCycles = refillCyclesPerBeat*outerDataBeats
val internalDataBeats = p(CacheBlockBytes)*8/rowBits
require(refillCyclesPerBeat == 1)
val amoAluOperandBits = p(AmoAluOperandBits)
require(amoAluOperandBits <= innerDataBits)
require(rowBits == innerDataBits) // TODO: relax this by improving s_data_* states
val nSecondaryMisses = p(NSecondaryMisses)
val isLastLevelCache = true
val alwaysWriteFullBeat = !p(ECCCode).isEmpty
}
abstract class L2HellaCacheModule(implicit val p: Parameters) extends Module
with HasOuterCacheParameters {
def doInternalOutputArbitration[T <: Data : ClassTag](
out: DecoupledIO[T],
ins: Seq[DecoupledIO[T]],
block_transfer: T => Bool = (t: T) => Bool(false)) {
val arb = Module(new RRArbiter(out.bits, ins.size))
out.valid := arb.io.out.valid && !block_transfer(arb.io.out.bits)
out.bits := arb.io.out.bits
arb.io.out.ready := out.ready && !block_transfer(arb.io.out.bits)
arb.io.in <> ins
}
def doInternalInputRouting[T <: Bundle with HasL2Id](in: ValidIO[T], outs: Seq[ValidIO[T]]) {
outs.map(_.bits := in.bits)
outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && in.bits.id === UInt(i) }
}
}
abstract class L2HellaCacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasOuterCacheParameters
trait HasL2Id extends HasCoherenceAgentParameters {
val id = UInt(width = log2Up(nTransactors + 1))
}
trait HasL2InternalRequestState extends HasOuterCacheParameters {
val tag_match = Bool()
val meta = new L2Metadata
val way_en = Bits(width = nWays)
}
trait HasL2BeatAddr extends HasOuterCacheParameters {
val addr_beat = UInt(width = log2Up(refillCycles))
}
trait HasL2Data extends HasOuterCacheParameters
with HasL2BeatAddr {
val data = UInt(width = rowBits)
def hasData(dummy: Int = 0) = Bool(true)
def hasMultibeatData(dummy: Int = 0) = Bool(refillCycles > 1)
}
class L2Metadata(implicit p: Parameters) extends Metadata()(p) with HasOuterCacheParameters {
val coh = new HierarchicalMetadata
}
object L2Metadata {
def apply(tag: Bits, coh: HierarchicalMetadata)
(implicit p: Parameters): L2Metadata = {
val meta = Wire(new L2Metadata)
meta.tag := tag
meta.coh := coh
meta
}
def apply(
tag: Bits,
inner: ManagerMetadata,
outer: ClientMetadata)(implicit p: Parameters): L2Metadata = {
val coh = Wire(new HierarchicalMetadata)
coh.inner := inner
coh.outer := outer
apply(tag, coh)
}
}
class L2MetaReadReq(implicit p: Parameters) extends MetaReadReq()(p) with HasL2Id {
val tag = Bits(width = tagBits)
}
class L2MetaWriteReq(implicit p: Parameters) extends MetaWriteReq[L2Metadata](new L2Metadata)(p)
with HasL2Id {
override def cloneType = new L2MetaWriteReq().asInstanceOf[this.type]
}
class L2MetaResp(implicit p: Parameters) extends L2HellaCacheBundle()(p)
with HasL2Id
with HasL2InternalRequestState
trait HasL2MetaReadIO extends HasOuterCacheParameters {
val read = Decoupled(new L2MetaReadReq)
val resp = Valid(new L2MetaResp).flip
}
trait HasL2MetaWriteIO extends HasOuterCacheParameters {
val write = Decoupled(new L2MetaWriteReq)
}
class L2MetaRWIO(implicit p: Parameters) extends L2HellaCacheBundle()(p)
with HasL2MetaReadIO
with HasL2MetaWriteIO
class L2MetaReadOnlyIO(implicit p: Parameters) extends L2HellaCacheBundle()(p)
with HasL2MetaReadIO
trait HasL2MetaRWIO extends HasOuterCacheParameters {
val meta = new L2MetaRWIO
}
class L2MetadataArray(implicit p: Parameters) extends L2HellaCacheModule()(p) {
val io = new L2MetaRWIO().flip
def onReset = L2Metadata(UInt(0), HierarchicalMetadata.onReset)
val meta = Module(new MetadataArray(onReset _))
meta.io.read <> io.read
meta.io.write <> io.write
val way_en_1h = UInt((BigInt(1) << nWays) - 1)
val s1_way_en_1h = RegEnable(way_en_1h, io.read.valid)
meta.io.read.bits.way_en := way_en_1h
val s1_tag = RegEnable(io.read.bits.tag, io.read.valid)
val s1_id = RegEnable(io.read.bits.id, io.read.valid)
def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f))
val s1_clk_en = Reg(next = io.read.fire())
val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === s1_tag)
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta.io.resp(w).coh.outer.isValid() && s1_way_en_1h(w).toBool).asUInt
val s1_idx = RegEnable(io.read.bits.idx, io.read.valid) // deal with stalls?
val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en)
val s2_tag_match = s2_tag_match_way.orR
val s2_hit_coh = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en)))
val replacer = p(L2Replacer)()
val s1_hit_way = Wire(Bits())
s1_hit_way := Bits(0)
(0 until nWays).foreach(i => when (s1_tag_match_way(i)) { s1_hit_way := Bits(i) })
replacer.access(io.read.bits.idx)
replacer.update(s1_clk_en, s1_tag_match_way.orR, s1_idx, s1_hit_way)
val s1_replaced_way_en = UIntToOH(replacer.way)
val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en))
val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) =>
RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq)
io.resp.valid := Reg(next = s1_clk_en)
io.resp.bits.id := RegEnable(s1_id, s1_clk_en)
io.resp.bits.tag_match := s2_tag_match
io.resp.bits.meta := Mux(s2_tag_match,
L2Metadata(s2_repl_meta.tag, s2_hit_coh),
s2_repl_meta)
io.resp.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en)
}
class L2DataReadReq(implicit p: Parameters) extends L2HellaCacheBundle()(p)
with HasL2BeatAddr
with HasL2Id {
val addr_idx = UInt(width = idxBits)
val way_en = Bits(width = nWays)
}
object L2DataReadReq {
def apply(
id: UInt,
way_en: UInt,
addr_idx: UInt,
addr_beat: UInt)(implicit p: Parameters) = {
val req = Wire(new L2DataReadReq)
req.id := id
req.way_en := way_en
req.addr_idx := addr_idx
req.addr_beat := addr_beat
req
}
}
class L2DataWriteReq(implicit p: Parameters) extends L2DataReadReq()(p)
with HasL2Data {
val wmask = Bits(width = rowBits/8)
}
object L2DataWriteReq {
def apply(
id: UInt,
way_en: UInt,
addr_idx: UInt,
addr_beat: UInt,
wmask: UInt,
data: UInt)(implicit p: Parameters) = {
val req = Wire(new L2DataWriteReq)
req.id := id
req.way_en := way_en
req.addr_idx := addr_idx
req.addr_beat := addr_beat
req.wmask := wmask
req.data := data
req
}
}
class L2DataResp(implicit p: Parameters) extends L2HellaCacheBundle()(p)
with HasL2Id
with HasL2Data
trait HasL2DataReadIO extends HasOuterCacheParameters {
val read = Decoupled(new L2DataReadReq)
val resp = Valid(new L2DataResp).flip
}
trait HasL2DataWriteIO extends HasOuterCacheParameters {
val write = Decoupled(new L2DataWriteReq)
}
class L2DataRWIO(implicit p: Parameters) extends L2HellaCacheBundle()(p)
with HasL2DataReadIO
with HasL2DataWriteIO
trait HasL2DataRWIO extends HasOuterCacheParameters {
val data = new L2DataRWIO
}
class L2DataArray(delay: Int)(implicit p: Parameters) extends L2HellaCacheModule()(p) {
val io = new L2DataRWIO().flip
val array = SeqMem(nWays*nSets*refillCycles, Vec(rowBits/8, Bits(width=8)))
val ren = !io.write.valid && io.read.valid
val raddr = Cat(OHToUInt(io.read.bits.way_en), io.read.bits.addr_idx, io.read.bits.addr_beat)
val waddr = Cat(OHToUInt(io.write.bits.way_en), io.write.bits.addr_idx, io.write.bits.addr_beat)
val wdata = Vec.tabulate(rowBits/8)(i => io.write.bits.data(8*(i+1)-1,8*i))
val wmask = io.write.bits.wmask.toBools
when (io.write.valid) { array.write(waddr, wdata, wmask) }
val r_req = Pipe(io.read.fire(), io.read.bits)
io.resp := Pipe(r_req.valid, r_req.bits, delay)
io.resp.bits.data := Pipe(r_req.valid, array.read(raddr, ren).asUInt, delay).bits
io.read.ready := !io.write.valid
io.write.ready := Bool(true)
}
class L2HellaCacheBank(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p)
with HasOuterCacheParameters {
require(isPow2(nSets))
require(isPow2(nWays))
val meta = Module(new L2MetadataArray) // TODO: add delay knob
val data = Module(new L2DataArray(1))
val tshrfile = Module(new TSHRFile)
io.inner <> tshrfile.io.inner
io.outer <> tshrfile.io.outer
tshrfile.io.incoherent <> io.incoherent
meta.io <> tshrfile.io.meta
data.io <> tshrfile.io.data
disconnectOuterProbeAndFinish()
}
class TSHRFileIO(implicit p: Parameters) extends HierarchicalTLIO()(p)
with HasL2MetaRWIO
with HasL2DataRWIO
class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p)
with HasCoherenceAgentWiringHelpers {
val io = new TSHRFileIO
// Create TSHRs for outstanding transactions
val irelTrackerList =
(0 until nReleaseTransactors).map(id =>
Module(new CacheVoluntaryReleaseTracker(id)))
val iacqTrackerList =
(nReleaseTransactors until nTransactors).map(id =>
Module(new CacheAcquireTracker(id)))
val trackerList = irelTrackerList ++ iacqTrackerList
// Don't allow a writeback request to go through if we are taking
// a voluntary release for the same block.
// The writeback can go forward once the voluntary release is handled
def writebackConflictsWithVolRelease(wb: L2WritebackReq): Bool =
irelTrackerList
.map(tracker =>
!tracker.io.alloc.idle &&
isSameBlock(tracker.io.alloc, wb.tag, wb.idx))
.reduce(_ || _) ||
(io.inner.release.valid &&
isSameBlock(io.inner.release.bits, wb.tag, wb.idx))
// WritebackUnit evicts data from L2, including invalidating L1s
val wb = Module(new L2WritebackUnit(nTransactors))
val trackerAndWbIOs = trackerList.map(_.io) :+ wb.io
doInternalOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req),
block_transfer = writebackConflictsWithVolRelease _)
doInternalInputRouting(wb.io.wb.resp, trackerList.map(_.io.wb.resp))
// Propagate incoherence flags
(trackerList.map(_.io.incoherent) :+ wb.io.incoherent) foreach { _ := io.incoherent }
// Handle acquire transaction initiation
val irel_vs_iacq_conflict =
io.inner.acquire.valid &&
io.inner.release.valid &&
inSameSet(io.inner.acquire.bits, io.inner.release.bits)
doInputRoutingWithAllocation(
in = io.inner.acquire,
outs = trackerList.map(_.io.inner.acquire),
allocs = trackerList.map(_.io.alloc.iacq),
allocOverride = Some(!irel_vs_iacq_conflict))
assert(PopCount(trackerList.map(_.io.alloc.iacq.should)) <= UInt(1),
"At most a single tracker should now be allocated for any given Acquire")
// Wire releases from clients
doInputRoutingWithAllocation(
in = io.inner.release,
outs = trackerAndWbIOs.map(_.inner.release),
allocs = trackerAndWbIOs.map(_.alloc.irel))
assert(PopCount(trackerAndWbIOs.map(_.alloc.irel.should)) <= UInt(1),
"At most a single tracker should now be allocated for any given Release")
// Wire probe requests and grant reply to clients, finish acks from clients
doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe) :+ wb.io.inner.probe)
doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant) :+ wb.io.inner.grant)
doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish))
// Create an arbiter for the one memory port
val outerList = trackerList.map(_.io.outer) :+ wb.io.outer
val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size)
(p.alterPartial({ case TLId => p(OuterTLId)})))
outer_arb.io.in <> outerList
io.outer <> outer_arb.io.out
// Wire local memory arrays
doInternalOutputArbitration(io.meta.read, trackerList.map(_.io.meta.read) :+ wb.io.meta.read)
doInternalOutputArbitration(io.meta.write, trackerList.map(_.io.meta.write))
doInternalOutputArbitration(io.data.read, trackerList.map(_.io.data.read) :+ wb.io.data.read)
doInternalOutputArbitration(io.data.write, trackerList.map(_.io.data.write))
doInternalInputRouting(io.meta.resp, trackerList.map(_.io.meta.resp) :+ wb.io.meta.resp)
doInternalInputRouting(io.data.resp, trackerList.map(_.io.data.resp) :+ wb.io.data.resp)
}
class L2XactTrackerIO(implicit p: Parameters) extends HierarchicalXactTrackerIO()(p)
with HasL2DataRWIO
with HasL2MetaRWIO
with HasL2WritebackIO
trait HasRowBeatCounters extends HasOuterCacheParameters with HasPendingBitHelpers {
def mergeData(dataBits: Int)(beat: UInt, incoming: UInt): Unit
def connectDataBeatCounter[S <: L2HellaCacheBundle](inc: Bool, data: S, beat: UInt, full_block: Bool) = {
if(data.refillCycles > 1) {
val (multi_cnt, multi_done) = Counter(full_block && inc, data.refillCycles)
(Mux(!full_block, beat, multi_cnt), Mux(!full_block, inc, multi_done))
} else { (UInt(0), inc) }
}
def connectInternalDataBeatCounter[T <: L2HellaCacheBundle with HasL2BeatAddr](
in: DecoupledIO[T],
beat: UInt = UInt(0),
full_block: Bool = Bool(true)): (UInt, Bool) = {
connectDataBeatCounter(in.fire(), in.bits, beat, full_block)
}
def connectInternalDataBeatCounter[T <: L2HellaCacheBundle with HasL2Data](
in: ValidIO[T],
full_block: Bool): Bool = {
connectDataBeatCounter(in.valid, in.bits, UInt(0), full_block)._2
}
def addPendingBitInternal[T <: L2HellaCacheBundle with HasL2BeatAddr](in: DecoupledIO[T]) =
Fill(in.bits.refillCycles, in.fire()) & UIntToOH(in.bits.addr_beat)
def addPendingBitInternal[T <: L2HellaCacheBundle with HasL2BeatAddr](in: ValidIO[T]) =
Fill(in.bits.refillCycles, in.valid) & UIntToOH(in.bits.addr_beat)
def dropPendingBit[T <: L2HellaCacheBundle with HasL2BeatAddr] (in: DecoupledIO[T]) =
~Fill(in.bits.refillCycles, in.fire()) | ~UIntToOH(in.bits.addr_beat)
def dropPendingBitInternal[T <: L2HellaCacheBundle with HasL2BeatAddr] (in: ValidIO[T]) =
~Fill(in.bits.refillCycles, in.valid) | ~UIntToOH(in.bits.addr_beat)
// TODO: Deal with the possibility that rowBits != tlDataBits
def mergeDataInternal[T <: L2HellaCacheBundle with HasL2Data with HasL2BeatAddr](in: ValidIO[T]) {
when(in.valid) { mergeData(rowBits)(in.bits.addr_beat, in.bits.data) }
}
}
trait ReadsFromOuterCacheDataArray extends HasCoherenceMetadataBuffer
with HasRowBeatCounters
with HasDataBuffer {
def io: HasL2DataRWIO
val pending_reads = Reg(init=Bits(0, width = innerDataBeats))
val pending_resps = Reg(init=Bits(0, width = innerDataBeats))
val curr_read_beat = PriorityEncoder(pending_reads)
def readDataArray(drop_pending_bit: UInt,
add_pending_bit: UInt = UInt(0),
block_pending_read: Bool = Bool(false),
can_update_pending: Bool = Bool(true)) {
val port = io.data
when (can_update_pending) {
pending_reads := (pending_reads | add_pending_bit) &
dropPendingBit(port.read) & drop_pending_bit
}
port.read.valid := state === s_busy && pending_reads.orR && !block_pending_read
port.read.bits := L2DataReadReq(
id = UInt(trackerId),
way_en = xact_way_en,
addr_idx = xact_addr_idx,
addr_beat = curr_read_beat)
pending_resps := (pending_resps & dropPendingBitInternal(port.resp)) |
addPendingBitInternal(port.read)
scoreboard += (pending_reads.orR, pending_resps.orR)
mergeDataInternal(port.resp)
}
}
trait WritesToOuterCacheDataArray extends HasCoherenceMetadataBuffer
with HasRowBeatCounters
with HasDataBuffer {
def io: HasL2DataRWIO
val pending_writes = Reg(init=Bits(0, width = innerDataBeats))
val curr_write_beat = PriorityEncoder(pending_writes)
def writeDataArray(add_pending_bit: UInt = UInt(0),
block_pending_write: Bool = Bool(false),
can_update_pending: Bool = Bool(true)) {
val port = io.data
when (can_update_pending) {
pending_writes := (pending_writes & dropPendingBit(port.write)) |
add_pending_bit
}
port.write.valid := state === s_busy && pending_writes.orR && !block_pending_write
port.write.bits := L2DataWriteReq(
id = UInt(trackerId),
way_en = xact_way_en,
addr_idx = xact_addr_idx,
addr_beat = curr_write_beat,
wmask = ~UInt(0, port.write.bits.wmask.getWidth),
data = data_buffer(curr_write_beat))
scoreboard += pending_writes.orR
}
}
trait HasAMOALU extends HasAcquireMetadataBuffer
with HasByteWriteMaskBuffer
with HasRowBeatCounters {
val io: L2XactTrackerIO
// Provide a single ALU per tracker to merge Puts and AMOs with data being
// refilled, written back, or extant in the cache
val amoalu = Module(new AMOALU(amoAluOperandBits, rhsIsAligned = true))
val amo_result = Reg(init = UInt(0, innerDataBits))
def initializeAMOALUIOs() {
amoalu.io.addr := Cat(xact_addr_block, xact_addr_beat, xact_addr_byte)
amoalu.io.cmd := xact_op_code
amoalu.io.typ := xact_op_size
amoalu.io.lhs := io.data.resp.bits.data // default, overwritten by calls to mergeData
amoalu.io.rhs := data_buffer.head // default, overwritten by calls to mergeData
}
// Utility function for applying any buffered stored data to the cache line
// before storing it back into the data array
override def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) {
val old_data = incoming // Refilled, written back, or de-cached data
val new_data = data_buffer(beat) // Newly Put data is already in the buffer
val amo_shift_bits = xact_amo_shift_bytes << 3
amoalu.io.lhs := old_data >> amo_shift_bits
amoalu.io.rhs := new_data >> amo_shift_bits
val wmask = FillInterleaved(8, wmask_buffer(beat))
data_buffer(beat) := ~wmask & old_data |
wmask & Mux(xact_iacq.isAtomic(), amoalu.io.out << amo_shift_bits, new_data)
wmask_buffer(beat) := ~UInt(0, innerWriteMaskBits)
when(xact_iacq.isAtomic() && xact_addr_beat === beat) { amo_result := old_data }
}
}
trait HasCoherenceMetadataBuffer extends HasOuterCacheParameters
with HasBlockAddressBuffer
with HasXactTrackerStates {
def trackerId: Int
val xact_way_en = Reg{ Bits(width = nWays) }
val xact_old_meta = Reg{ new L2Metadata }
val pending_coh = Reg{ xact_old_meta.coh }
val pending_meta_write = Reg{ Bool() } // pending_meta_write has own state (s_meta_write)
val inner_coh = pending_coh.inner
val outer_coh = pending_coh.outer
val xact_addr_idx = xact_addr_block(idxMSB,idxLSB)
val xact_addr_tag = xact_addr_block >> UInt(tagLSB)
// Utility function for updating the metadata that will be kept in this cache
def updatePendingCohWhen(flag: Bool, next: HierarchicalMetadata) {
when(flag && pending_coh =/= next) {
pending_meta_write := Bool(true)
pending_coh := next
}
}
def metaRead(port: HasL2MetaReadIO, next_state: UInt, way_en_known: Bool = Bool(false)) {
port.read.valid := state === s_meta_read
port.read.bits.id := UInt(trackerId)
port.read.bits.idx := xact_addr_idx
port.read.bits.tag := xact_addr_tag
port.read.bits.way_en := Mux(way_en_known, xact_way_en, ~UInt(0, nWays))
when(state === s_meta_read && port.read.ready) { state := s_meta_resp }
when(state === s_meta_resp && port.resp.valid) {
xact_old_meta := port.resp.bits.meta
when (!way_en_known) { xact_way_en := port.resp.bits.way_en }
state := next_state
}
}
def metaWrite(port: HasL2MetaWriteIO, to_write: L2Metadata, next_state: UInt) {
port.write.valid := state === s_meta_write
port.write.bits.id := UInt(trackerId)
port.write.bits.idx := xact_addr_idx
port.write.bits.way_en := xact_way_en
port.write.bits.data := to_write
when(state === s_meta_write && port.write.ready) { state := next_state }
}
}
trait TriggersWritebacks extends HasCoherenceMetadataBuffer {
def triggerWriteback(wb: L2WritebackIO, next_state: UInt) {
wb.req.valid := state === s_wb_req
wb.req.bits.id := UInt(trackerId)
wb.req.bits.idx := xact_addr_idx
wb.req.bits.tag := xact_old_meta.tag
wb.req.bits.way_en := xact_way_en
when(state === s_wb_req && wb.req.ready) { state := s_wb_resp }
when(state === s_wb_resp && wb.resp.valid) { state := s_outer_acquire }
}
}
class CacheVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters)
extends VoluntaryReleaseTracker(trackerId)(p)
with HasDataBuffer
with WritesToOuterCacheDataArray {
val io = new L2XactTrackerIO
pinAllReadyValidLow(io)
// Avoid metatdata races with writebacks
routeInParent(
iacqMatches = inSameSet(_, xact_addr_block),
irelCanAlloc = Bool(true))
// Initialize and accept pending Release beats
innerRelease(
block_vol_ignt = pending_writes.orR,
next = s_meta_read)
io.inner.release.ready := state === s_idle || irel_can_merge || irel_same_xact
// Begin a transaction by getting the current block metadata
metaRead(io.meta, s_busy)
// Write the voluntarily written back data to this cache
writeDataArray(add_pending_bit = addPendingBitWhenBeatHasData(io.inner.release),
can_update_pending = state =/= s_idle || io.alloc.irel.should)
// End a transaction by updating the block metadata
metaWrite(
io.meta,
L2Metadata(
tag = xact_addr_tag,
inner = xact_old_meta.coh.inner.onRelease(xact_vol_irel),
outer = Mux(xact_vol_irel.hasData(),
xact_old_meta.coh.outer.onHit(M_XWR),
xact_old_meta.coh.outer)),
s_idle)
mergeDataInner(io.inner.release)
when(irel_is_allocating) {
pending_writes := addPendingBitWhenBeatHasData(io.inner.release)
}
quiesce(s_meta_write) {}
// Checks for illegal behavior
assert(!(state === s_meta_resp && io.meta.resp.valid && !io.meta.resp.bits.tag_match),
"VoluntaryReleaseTracker accepted Release for a block not resident in this cache!")
}
class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters)
extends AcquireTracker(trackerId)(p)
with HasByteWriteMaskBuffer
with HasAMOALU
with TriggersWritebacks
with ReadsFromOuterCacheDataArray
with WritesToOuterCacheDataArray {
val io = new L2XactTrackerIO
pinAllReadyValidLow(io)
initializeAMOALUIOs()
val pending_coh_on_ognt = HierarchicalMetadata(
ManagerMetadata.onReset,
pending_coh.outer.onGrant(io.outer.grant.bits, xact_op_code))
val pending_coh_on_ignt = HierarchicalMetadata(
pending_coh.inner.onGrant(io.ignt()),
Mux(ognt_counter.down.done,
pending_coh_on_ognt.outer,
pending_coh.outer))
val pending_coh_on_irel = HierarchicalMetadata(
pending_coh.inner.onRelease(io.irel()), // Drop sharer
Mux(io.irel().hasData(), // Dirty writeback
pending_coh.outer.onHit(M_XWR),
pending_coh.outer))
val pending_coh_on_hit = HierarchicalMetadata(
io.meta.resp.bits.meta.coh.inner,
io.meta.resp.bits.meta.coh.outer.onHit(xact_op_code))
val pending_coh_on_miss = HierarchicalMetadata.onReset
val before_wb_req = state.isOneOf(s_meta_read, s_meta_resp)
routeInParent(
iacqMatches = inSameSet(_, xact_addr_block),
irelMatches = (irel: HasCacheBlockAddress) =>
Mux(before_wb_req, inSameSet(irel, xact_addr_block), exactAddrMatch(irel)),
iacqCanAlloc = Bool(true))
// TileLink allows for Gets-under-Get
// and Puts-under-Put, and either may also merge with a preceding prefetch
// that requested the correct permissions (via op_code)
def acquiresAreMergeable(sec: AcquireMetadata): Bool = {
val allowedTypes = List((Acquire.getType, Acquire.getType),
(Acquire.putType, Acquire.putType),
(Acquire.putBlockType, Acquire.putBlockType),
(Acquire.getPrefetchType, Acquire.getPrefetchType),
(Acquire.putPrefetchType, Acquire.putPrefetchType),
(Acquire.getPrefetchType, Acquire.getType),
(Acquire.putPrefetchType, Acquire.putType),
(Acquire.putPrefetchType, Acquire.putBlockType))
allowedTypes.map { case(a, b) => xact_iacq.isBuiltInType(a) && sec.isBuiltInType(b) }.reduce(_||_) &&
xact_op_code === sec.op_code() &&
sec.conflicts(xact_addr_block) &&
xact_allocate
}
// First, take care of accpeting new acquires or secondary misses
// Handling of primary and secondary misses' data and write mask merging
def iacq_can_merge = acquiresAreMergeable(io.iacq()) &&
state =/= s_idle &&
state =/= s_meta_resp &&
state =/= s_meta_write &&
!all_pending_done &&
!io.inner.release.fire() &&
!io.outer.grant.fire() &&
!io.data.resp.valid &&
ignt_q.io.enq.ready && ignt_q.io.deq.valid
innerAcquire(
can_alloc = Bool(true),
next = s_meta_read)
io.inner.acquire.ready := state === s_idle || iacq_can_merge ||
iacq_same_xact_multibeat
// Begin a transaction by getting the current block metadata
// Defined here because of Chisel default wire demands, used in s_meta_resp
val coh = io.meta.resp.bits.meta.coh
val tag_match = io.meta.resp.bits.tag_match
val is_hit = (if(!isLastLevelCache) tag_match && coh.outer.isHit(xact_op_code)
else tag_match && coh.outer.isValid())
val needs_writeback = !tag_match &&
xact_allocate &&
(coh.outer.requiresVoluntaryWriteback() ||
coh.inner.requiresProbesOnVoluntaryWriteback())
val needs_inner_probes = tag_match && coh.inner.requiresProbes(xact_iacq)
val should_update_meta = !tag_match && xact_allocate ||
is_hit && pending_coh_on_hit =/= coh
def full_representation = coh.inner.full()
metaRead(
io.meta,
Mux(needs_writeback, s_wb_req,
Mux(needs_inner_probes, s_inner_probe,
Mux(!is_hit, s_outer_acquire, s_busy))))
updatePendingCohWhen(
io.meta.resp.valid,
Mux(is_hit, pending_coh_on_hit,
Mux(tag_match, coh, pending_coh_on_miss)))
// Issue a request to the writeback unit
triggerWriteback(io.wb, s_outer_acquire)
// Track which clients yet need to be probed and make Probe message
// If we're probing, we know the tag matches, so if this is the
// last level cache, we can use the data without upgrading permissions
val skip_outer_acquire =
(if(!isLastLevelCache) xact_old_meta.coh.outer.isHit(xact_op_code)
else xact_old_meta.coh.outer.isValid())
innerProbe(
inner_coh.makeProbe(curr_probe_dst, xact_iacq, xact_addr_block),
Mux(!skip_outer_acquire, s_outer_acquire, s_busy))
// Handle incoming releases from clients, which may reduce sharer counts
// and/or write back dirty data, and may be unexpected voluntary releases
innerRelease() // Don't block on pending_writes because they won't happen until s_busy
def irel_can_merge = io.irel().conflicts(xact_addr_block) &&
io.irel().isVoluntary() &&
!state.isOneOf(s_idle, s_meta_read, s_meta_resp, s_meta_write) &&
!all_pending_done &&
!io.outer.grant.fire() &&
!io.inner.grant.fire() &&
!vol_ignt_counter.pending
io.inner.release.ready := irel_can_merge || irel_same_xact
updatePendingCohWhen(io.inner.release.fire(), pending_coh_on_irel)
mergeDataInner(io.inner.release)
// Send outer request
outerAcquire(
caching = xact_allocate,
coh = xact_old_meta.coh.outer, // TODO outer_coh?
data = data_buffer(ognt_counter.up.idx),
wmask = wmask_buffer(ognt_counter.up.idx),
next = s_busy)
// Handle the response from outer memory
updatePendingCohWhen(ognt_counter.down.done, pending_coh_on_ognt)
mergeDataOuter(io.outer.grant)
// Send read request and get resp
// Going back to the original inner transaction:
// We read from the the cache at this level if data wasn't written back or refilled.
// We may still merge further Gets, requiring further beats to be read.
// If ECC requires a full writemask, we'll read out data on partial writes as well.
readDataArray(
drop_pending_bit = (dropPendingBitWhenBeatHasData(io.inner.release) &
dropPendingBitWhenBeatHasData(io.outer.grant)),
add_pending_bit = addPendingBitWhenBeatNeedsRead(
io.inner.acquire,
always = Bool(alwaysWriteFullBeat),
unless = data_valid(io.iacq().addr_beat)),
block_pending_read = ognt_counter.pending,
can_update_pending = state =/= s_idle || io.alloc.irel.should)
// No override for first accepted acquire
val alloc_override = xact_allocate && (state =/= s_idle)
// Do write
// We write data to the cache at this level if it was Put here with allocate flag,
// written back dirty, or refilled from outer memory.
writeDataArray(
add_pending_bit = (addPendingBitWhenBeatHasDataAndAllocs(io.inner.acquire, alloc_override) |
addPendingBitWhenBeatHasData(io.inner.release) |
addPendingBitWhenBeatHasData(io.outer.grant, xact_allocate)),
block_pending_write = (ognt_counter.pending ||
pending_put_data.orR ||
pending_reads(curr_write_beat) ||
pending_resps(curr_write_beat)),
can_update_pending = state =/= s_idle || io.alloc.iacq.should || io.alloc.irel.should)
// Acknowledge or respond with data
innerGrant(
data = Mux(xact_iacq.isAtomic(), amo_result, data_buffer(ignt_data_idx)),
external_pending = pending_writes.orR || ognt_counter.pending,
add_pending_bits = addPendingBitInternal(io.data.resp))
updatePendingCohWhen(io.inner.grant.fire() && io.ignt().last(), pending_coh_on_ignt)
// End a transaction by updating the block metadata
metaWrite(io.meta, L2Metadata(xact_addr_tag, pending_coh), s_idle)
// Initialization of some scoreboard logic based on the original
// Acquire message on on the results of the metadata read:
when(state === s_meta_resp && io.meta.resp.valid) {
// If some kind of Put is marked no-allocate but is already in the cache,
// we need to write its data to the data array
when(is_hit && !xact_allocate && xact_iacq.hasData()) {
pending_writes := addPendingBitsFromAcquire(xact_iacq)
xact_allocate := Bool(true)
}
when (needs_inner_probes) { initializeProbes() }
pending_meta_write := should_update_meta //TODO what edge case was this covering?
}
// Initialize more transaction metadata. Pla
when(iacq_is_allocating) {
amo_result := UInt(0)
pending_meta_write := Bool(false)
pending_reads := Mux( // Pick out the specific beats of data that need to be read
io.iacq().isBuiltInType(Acquire.getBlockType) || !io.iacq().isBuiltInType(),
~UInt(0, width = innerDataBeats),
addPendingBitWhenBeatNeedsRead(io.inner.acquire, Bool(alwaysWriteFullBeat)))
pending_writes := addPendingBitWhenBeatHasDataAndAllocs(io.inner.acquire)
pending_resps := UInt(0)
}
initDataInner(io.inner.acquire, iacq_is_allocating || iacq_is_merging)
// Wait for everything to quiesce
quiesce(Mux(pending_meta_write, s_meta_write, s_idle)) { clearWmaskBuffer() }
}
class L2WritebackReq(implicit p: Parameters)
extends L2HellaCacheBundle()(p) with HasL2Id {
val tag = Bits(width = tagBits)
val idx = Bits(width = idxBits)
val way_en = Bits(width = nWays)
}
class L2WritebackResp(implicit p: Parameters) extends L2HellaCacheBundle()(p) with HasL2Id
class L2WritebackIO(implicit p: Parameters) extends L2HellaCacheBundle()(p) {
val req = Decoupled(new L2WritebackReq)
val resp = Valid(new L2WritebackResp).flip
}
trait HasL2WritebackIO extends HasOuterCacheParameters {
val wb = new L2WritebackIO()
}
class L2WritebackUnitIO(implicit p: Parameters)
extends HierarchicalXactTrackerIO()(p) with HasL2DataRWIO {
val wb = new L2WritebackIO().flip()
val meta = new L2MetaReadOnlyIO
}
class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p)
with AcceptsVoluntaryReleases
with EmitsVoluntaryReleases
with EmitsInnerProbes
with ReadsFromOuterCacheDataArray
with RoutesInParent {
val io = new L2WritebackUnitIO
pinAllReadyValidLow(io)
val xact_id = Reg{ io.wb.req.bits.id }
val pending_coh_on_irel = HierarchicalMetadata(
inner_coh.onRelease(io.irel()), // Drop sharer
Mux(io.irel().hasData(), // Dirty writeback
outer_coh.onHit(M_XWR),
outer_coh))
routeInParent()
// Start the writeback sub-transaction
io.wb.req.ready := state === s_idle
val coh = io.meta.resp.bits.meta.coh
val needs_inner_probes = coh.inner.requiresProbesOnVoluntaryWriteback()
val needs_outer_release = coh.outer.requiresVoluntaryWriteback()
def full_representation = coh.inner.full()
// Even though we already read the metadata in the acquire tracker that
// sent the writeback request, we have to read it again in the writeback
// unit, since it may have been updated in the meantime.
metaRead(io.meta,
next_state = Mux(needs_inner_probes, s_inner_probe, s_busy),
way_en_known = Bool(true))
// Track which clients yet need to be probed and make Probe message
innerProbe(
inner_coh.makeProbeForVoluntaryWriteback(curr_probe_dst, xact_addr_block),
s_busy)
// Handle incoming releases from clients, which may reduce sharer counts
// and/or write back dirty data
innerRelease()
def irel_can_merge = io.irel().conflicts(xact_addr_block) &&
io.irel().isVoluntary() &&
!state.isOneOf(s_idle, s_meta_read, s_meta_resp) &&
!(state === s_busy && all_pending_done) &&
!vol_ignt_counter.pending &&
!blockInnerRelease()
io.inner.release.ready := irel_can_merge || irel_same_xact
updatePendingCohWhen(io.inner.release.fire(), pending_coh_on_irel)
mergeDataInner(io.inner.release)
// If a release didn't write back data, have to read it from data array
readDataArray(
drop_pending_bit = dropPendingBitWhenBeatHasData(io.inner.release))
// Once the data is buffered we can write it back to outer memory
outerRelease(
coh = outer_coh,
data = data_buffer(vol_ognt_counter.up.idx),
add_pending_data_bits = addPendingBitInternal(io.data.resp),
add_pending_send_bit = io.meta.resp.valid && needs_outer_release)
// Respond to the initiating transaction handler signalling completion of the writeback
io.wb.resp.valid := state === s_busy && all_pending_done
io.wb.resp.bits.id := xact_id
quiesce() {}
// State machine updates and transaction handler metadata intialization
when(state === s_idle && io.wb.req.valid) {
xact_id := io.wb.req.bits.id
xact_way_en := io.wb.req.bits.way_en
xact_addr_block := (if (cacheIdBits == 0) Cat(io.wb.req.bits.tag, io.wb.req.bits.idx)
else Cat(io.wb.req.bits.tag, io.wb.req.bits.idx, UInt(cacheId, cacheIdBits)))
state := s_meta_read
}
when (state === s_meta_resp && io.meta.resp.valid) {
pending_reads := Fill(innerDataBeats, needs_outer_release)
pending_coh := coh
when(needs_inner_probes) { initializeProbes() }
}
assert(!io.meta.resp.valid || io.meta.resp.bits.tag_match,
"L2 requested Writeback for block not present in cache")
}