dcfcac9530
It was possible that the result of a store-conditional could get lost if it did not depend on the result of the corresponding load-reserved. This was because the MSHR does not update the client state based on the secondary requests. So the LR would acquire the line in clientExcusiveClean, but then we would fail to update the metadata array to change the state to clientExclusiveDirty. The solution is to track whether a secondary acquire would cause the line to be dirty. If so, use M_XWR instead of the primary command to generate the update coherence state.
1248 lines
46 KiB
Scala
1248 lines
46 KiB
Scala
// See LICENSE for license details.
|
|
|
|
package rocket
|
|
|
|
import Chisel._
|
|
import junctions._
|
|
import uncore.tilelink._
|
|
import uncore.coherence._
|
|
import uncore.agents._
|
|
import uncore.util._
|
|
import uncore.constants._
|
|
import cde.{Parameters, Field}
|
|
import Util._
|
|
|
|
case object WordBits extends Field[Int]
|
|
case object StoreDataQueueDepth extends Field[Int]
|
|
case object ReplayQueueDepth extends Field[Int]
|
|
case object NMSHRs extends Field[Int]
|
|
case object LRSCCycles extends Field[Int]
|
|
|
|
trait HasL1HellaCacheParameters extends HasL1CacheParameters {
|
|
val wordBits = p(WordBits)
|
|
val wordBytes = wordBits/8
|
|
val wordOffBits = log2Up(wordBytes)
|
|
val beatBytes = p(CacheBlockBytes) / outerDataBeats
|
|
val beatWords = beatBytes / wordBytes
|
|
val beatOffBits = log2Up(beatBytes)
|
|
val idxMSB = untagBits-1
|
|
val idxLSB = blockOffBits
|
|
val offsetmsb = idxLSB-1
|
|
val offsetlsb = wordOffBits
|
|
val rowWords = rowBits/wordBits
|
|
val doNarrowRead = coreDataBits * nWays % rowBits == 0
|
|
val encDataBits = code.width(coreDataBits)
|
|
val encRowBits = encDataBits*rowWords
|
|
val sdqDepth = p(StoreDataQueueDepth)
|
|
val nMSHRs = p(NMSHRs)
|
|
val nIOMSHRs = 1
|
|
val lrscCycles = p(LRSCCycles)
|
|
|
|
require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed
|
|
require(isPow2(nSets))
|
|
require(rowBits <= outerDataBits)
|
|
require(!usingVM || untagBits <= pgIdxBits)
|
|
}
|
|
|
|
abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module
|
|
with HasL1HellaCacheParameters
|
|
abstract class L1HellaCacheBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p)
|
|
with HasL1HellaCacheParameters
|
|
|
|
trait HasCoreMemOp extends HasCoreParameters {
|
|
val addr = UInt(width = coreMaxAddrBits)
|
|
val tag = Bits(width = coreDCacheReqTagBits)
|
|
val cmd = Bits(width = M_SZ)
|
|
val typ = Bits(width = MT_SZ)
|
|
}
|
|
|
|
trait HasCoreData extends HasCoreParameters {
|
|
val data = Bits(width = coreDataBits)
|
|
}
|
|
|
|
trait HasSDQId extends HasL1HellaCacheParameters {
|
|
val sdq_id = UInt(width = log2Up(sdqDepth))
|
|
}
|
|
|
|
trait HasMissInfo extends HasL1HellaCacheParameters {
|
|
val tag_match = Bool()
|
|
val old_meta = new L1Metadata
|
|
val way_en = Bits(width = nWays)
|
|
}
|
|
|
|
class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p)
|
|
with HasCoreMemOp {
|
|
val phys = Bool()
|
|
}
|
|
|
|
class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData
|
|
|
|
class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p)
|
|
with HasCoreMemOp
|
|
with HasCoreData {
|
|
val replay = Bool()
|
|
val has_data = Bool()
|
|
val data_word_bypass = Bits(width = coreDataBits)
|
|
val store_data = Bits(width = coreDataBits)
|
|
}
|
|
|
|
class AlignmentExceptions extends Bundle {
|
|
val ld = Bool()
|
|
val st = Bool()
|
|
}
|
|
|
|
class HellaCacheExceptions extends Bundle {
|
|
val ma = new AlignmentExceptions
|
|
val pf = new AlignmentExceptions
|
|
}
|
|
|
|
// interface between D$ and processor/DTLB
|
|
class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
|
|
val req = Decoupled(new HellaCacheReq)
|
|
val s1_kill = Bool(OUTPUT) // kill previous cycle's req
|
|
val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req
|
|
val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
|
|
|
|
val resp = Valid(new HellaCacheResp).flip
|
|
val replay_next = Bool(INPUT)
|
|
val xcpt = (new HellaCacheExceptions).asInput
|
|
val invalidate_lr = Bool(OUTPUT)
|
|
val ordered = Bool(INPUT)
|
|
}
|
|
|
|
class L1DataReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
|
|
val way_en = Bits(width = nWays)
|
|
val addr = Bits(width = untagBits)
|
|
}
|
|
|
|
class L1DataWriteReq(implicit p: Parameters) extends L1DataReadReq()(p) {
|
|
val wmask = Bits(width = rowWords)
|
|
val data = Bits(width = encRowBits)
|
|
}
|
|
|
|
class L1RefillReq(implicit p: Parameters) extends L1DataReadReq()(p)
|
|
|
|
class L1MetaReadReq(implicit p: Parameters) extends MetaReadReq {
|
|
val tag = Bits(width = tagBits)
|
|
override def cloneType = new L1MetaReadReq()(p).asInstanceOf[this.type] //TODO remove
|
|
}
|
|
|
|
class L1MetaWriteReq(implicit p: Parameters) extends
|
|
MetaWriteReq[L1Metadata](new L1Metadata)
|
|
|
|
object L1Metadata {
|
|
def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = {
|
|
val meta = Wire(new L1Metadata)
|
|
meta.tag := tag
|
|
meta.coh := coh
|
|
meta
|
|
}
|
|
}
|
|
class L1Metadata(implicit p: Parameters) extends Metadata()(p) with HasL1HellaCacheParameters {
|
|
val coh = new ClientMetadata
|
|
}
|
|
|
|
class Replay(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData
|
|
class ReplayInternal(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasSDQId
|
|
|
|
class MSHRReq(implicit p: Parameters) extends Replay()(p) with HasMissInfo
|
|
class MSHRReqInternal(implicit p: Parameters) extends ReplayInternal()(p) with HasMissInfo
|
|
|
|
class ProbeInternal(implicit p: Parameters) extends Probe()(p) with HasClientTransactionId
|
|
|
|
class WritebackReq(implicit p: Parameters) extends Release()(p) with HasCacheParameters {
|
|
val way_en = Bits(width = nWays)
|
|
}
|
|
|
|
class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|
val io = new Bundle {
|
|
val req = Decoupled(new HellaCacheReq).flip
|
|
val acquire = Decoupled(new Acquire)
|
|
val grant = Valid(new GrantFromSrc).flip
|
|
val finish = Decoupled(new FinishToDst)
|
|
val resp = Decoupled(new HellaCacheResp)
|
|
val replay_next = Bool(OUTPUT)
|
|
}
|
|
|
|
def beatOffset(addr: UInt) = addr.extract(beatOffBits - 1, wordOffBits)
|
|
|
|
def wordFromBeat(addr: UInt, dat: UInt) = {
|
|
val shift = Cat(beatOffset(addr), UInt(0, wordOffBits + log2Up(wordBytes)))
|
|
(dat >> shift)(wordBits - 1, 0)
|
|
}
|
|
|
|
val req = Reg(new HellaCacheReq)
|
|
val req_cmd_sc = req.cmd === M_XSC
|
|
val grant_word = Reg(UInt(width = wordBits))
|
|
val fq = Module(new FinishQueue(1))
|
|
|
|
val s_idle :: s_acquire :: s_grant :: s_resp :: s_finish :: Nil = Enum(Bits(), 5)
|
|
val state = Reg(init = s_idle)
|
|
io.req.ready := (state === s_idle)
|
|
|
|
fq.io.enq.valid := io.grant.valid && io.grant.bits.requiresAck()
|
|
fq.io.enq.bits := io.grant.bits.makeFinish()
|
|
io.finish.valid := fq.io.deq.valid && (state === s_finish)
|
|
io.finish.bits := fq.io.deq.bits
|
|
fq.io.deq.ready := io.finish.ready && (state === s_finish)
|
|
|
|
val storegen = new StoreGen(req.typ, req.addr, req.data, wordBytes)
|
|
val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc, wordBytes)
|
|
|
|
val beat_mask = (storegen.mask << Cat(beatOffset(req.addr), UInt(0, wordOffBits)))
|
|
val beat_data = Fill(beatWords, storegen.data)
|
|
|
|
val addr_block = req.addr(paddrBits - 1, blockOffBits)
|
|
val addr_beat = req.addr(blockOffBits - 1, beatOffBits)
|
|
val addr_byte = req.addr(beatOffBits - 1, 0)
|
|
|
|
val get_acquire = Get(
|
|
client_xact_id = UInt(id),
|
|
addr_block = addr_block,
|
|
addr_beat = addr_beat,
|
|
addr_byte = addr_byte,
|
|
operand_size = req.typ,
|
|
alloc = Bool(false))
|
|
|
|
val put_acquire = Put(
|
|
client_xact_id = UInt(id),
|
|
addr_block = addr_block,
|
|
addr_beat = addr_beat,
|
|
data = beat_data,
|
|
wmask = Some(beat_mask),
|
|
alloc = Bool(false))
|
|
|
|
val putAtomic_acquire = PutAtomic(
|
|
client_xact_id = UInt(id),
|
|
addr_block = addr_block,
|
|
addr_beat = addr_beat,
|
|
addr_byte = addr_byte,
|
|
atomic_opcode = req.cmd,
|
|
operand_size = req.typ,
|
|
data = beat_data)
|
|
|
|
io.acquire.valid := (state === s_acquire)
|
|
io.acquire.bits := Mux(isAMO(req.cmd), putAtomic_acquire, Mux(isRead(req.cmd), get_acquire, put_acquire))
|
|
|
|
io.replay_next := (state === s_grant) || io.resp.valid && !io.resp.ready
|
|
io.resp.valid := (state === s_resp)
|
|
io.resp.bits := req
|
|
io.resp.bits.has_data := isRead(req.cmd)
|
|
io.resp.bits.data := loadgen.data | req_cmd_sc
|
|
io.resp.bits.store_data := req.data
|
|
io.resp.bits.replay := Bool(true)
|
|
|
|
when (io.req.fire()) {
|
|
req := io.req.bits
|
|
state := s_acquire
|
|
}
|
|
|
|
when (io.acquire.fire()) {
|
|
state := s_grant
|
|
}
|
|
|
|
when (state === s_grant && io.grant.valid) {
|
|
state := s_resp
|
|
when (isRead(req.cmd)) {
|
|
grant_word := wordFromBeat(req.addr, io.grant.bits.data)
|
|
}
|
|
}
|
|
|
|
when (io.resp.fire()) {
|
|
state := s_finish
|
|
}
|
|
|
|
when (io.finish.fire()) {
|
|
state := s_idle
|
|
}
|
|
}
|
|
|
|
class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|
val io = new Bundle {
|
|
val req_pri_val = Bool(INPUT)
|
|
val req_pri_rdy = Bool(OUTPUT)
|
|
val req_sec_val = Bool(INPUT)
|
|
val req_sec_rdy = Bool(OUTPUT)
|
|
val req_bits = new MSHRReqInternal().asInput
|
|
|
|
val idx_match = Bool(OUTPUT)
|
|
val tag = Bits(OUTPUT, tagBits)
|
|
|
|
val mem_req = Decoupled(new Acquire)
|
|
val refill = new L1RefillReq().asOutput // Data is bypassed
|
|
val meta_read = Decoupled(new L1MetaReadReq)
|
|
val meta_write = Decoupled(new L1MetaWriteReq)
|
|
val replay = Decoupled(new ReplayInternal)
|
|
val mem_grant = Valid(new GrantFromSrc).flip
|
|
val mem_finish = Decoupled(new FinishToDst)
|
|
val wb_req = Decoupled(new WritebackReq)
|
|
val probe_rdy = Bool(OUTPUT)
|
|
}
|
|
|
|
val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(UInt(), 9)
|
|
val state = Reg(init=s_invalid)
|
|
|
|
def stateIsOneOf(check_states: Seq[UInt]): Bool =
|
|
check_states.map(state === _).reduce(_ || _)
|
|
|
|
def stateIsOneOf(st1: UInt, st2: UInt*): Bool =
|
|
stateIsOneOf(st1 +: st2)
|
|
|
|
val new_coh_state = Reg(init=ClientMetadata.onReset)
|
|
val req = Reg(new MSHRReqInternal())
|
|
val req_idx = req.addr(untagBits-1,blockOffBits)
|
|
val idx_match = req_idx === io.req_bits.addr(untagBits-1,blockOffBits)
|
|
// We only accept secondary misses if we haven't yet sent an Acquire to outer memory
|
|
// or if the Acquire that was sent will obtain a Grant with sufficient permissions
|
|
// to let us replay this new request. I.e. we don't handle multiple outstanding
|
|
// Acquires on the same block for now.
|
|
val cmd_requires_second_acquire =
|
|
req.old_meta.coh.requiresAcquireOnSecondaryMiss(req.cmd, io.req_bits.cmd)
|
|
// Track whether or not a secondary acquire will cause the coherence state
|
|
// to go from clean to dirty.
|
|
val dirties_coh = Reg(Bool())
|
|
val states_before_refill = Seq(s_wb_req, s_wb_resp, s_meta_clear)
|
|
val gnt_multi_data = io.mem_grant.bits.hasMultibeatData()
|
|
val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles)
|
|
val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done)
|
|
val sec_rdy = idx_match &&
|
|
(stateIsOneOf(states_before_refill) ||
|
|
(stateIsOneOf(s_refill_req, s_refill_resp) &&
|
|
!cmd_requires_second_acquire && !refill_done))
|
|
|
|
val rpq = Module(new Queue(new ReplayInternal, p(ReplayQueueDepth)))
|
|
rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(io.req_bits.cmd)
|
|
rpq.io.enq.bits := io.req_bits
|
|
rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid
|
|
|
|
val coh_on_grant = req.old_meta.coh.onGrant(
|
|
incoming = io.mem_grant.bits,
|
|
pending = Mux(dirties_coh, M_XWR, req.cmd))
|
|
val coh_on_hit = io.req_bits.old_meta.coh.onHit(io.req_bits.cmd)
|
|
|
|
when (state === s_drain_rpq && !rpq.io.deq.valid) {
|
|
state := s_invalid
|
|
}
|
|
when (state === s_meta_write_resp) {
|
|
// this wait state allows us to catch RAW hazards on the tags via nack_victim
|
|
state := s_drain_rpq
|
|
}
|
|
when (state === s_meta_write_req && io.meta_write.ready) {
|
|
state := s_meta_write_resp
|
|
}
|
|
when (state === s_refill_resp && refill_done) {
|
|
state := s_meta_write_req
|
|
new_coh_state := coh_on_grant
|
|
}
|
|
when (io.mem_req.fire()) { // s_refill_req
|
|
state := s_refill_resp
|
|
}
|
|
when (state === s_meta_clear && io.meta_write.ready) {
|
|
state := s_refill_req
|
|
}
|
|
when (state === s_wb_resp && io.mem_grant.valid) {
|
|
state := s_meta_clear
|
|
}
|
|
when (io.wb_req.fire()) { // s_wb_req
|
|
state := Mux(io.wb_req.bits.requiresAck(), s_wb_resp, s_meta_clear)
|
|
}
|
|
when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req
|
|
//If we get a secondary miss that needs more permissions before we've sent
|
|
// out the primary miss's Acquire, we can upgrade the permissions we're
|
|
// going to ask for in s_refill_req
|
|
when(cmd_requires_second_acquire) {
|
|
req.cmd := io.req_bits.cmd
|
|
}
|
|
dirties_coh := dirties_coh || isWrite(io.req_bits.cmd)
|
|
}
|
|
when (io.req_pri_val && io.req_pri_rdy) {
|
|
val coh = io.req_bits.old_meta.coh
|
|
req := io.req_bits
|
|
dirties_coh := isWrite(io.req_bits.cmd)
|
|
when (io.req_bits.tag_match) {
|
|
when(coh.isHit(io.req_bits.cmd)) { // set dirty bit
|
|
state := s_meta_write_req
|
|
new_coh_state := coh_on_hit
|
|
}.otherwise { // upgrade permissions
|
|
state := s_refill_req
|
|
}
|
|
}.otherwise { // writback if necessary and refill
|
|
state := Mux(coh.requiresVoluntaryWriteback(), s_wb_req, s_meta_clear)
|
|
}
|
|
}
|
|
|
|
val fq = Module(new FinishQueue(1))
|
|
val g = io.mem_grant.bits
|
|
val can_finish = state === s_invalid || state === s_refill_req
|
|
fq.io.enq.valid := io.mem_grant.valid && g.requiresAck() && refill_done
|
|
fq.io.enq.bits := g.makeFinish()
|
|
io.mem_finish.valid := fq.io.deq.valid && can_finish
|
|
fq.io.deq.ready := io.mem_finish.ready && can_finish
|
|
io.mem_finish.bits := fq.io.deq.bits
|
|
|
|
io.idx_match := (state =/= s_invalid) && idx_match
|
|
io.refill.way_en := req.way_en
|
|
io.refill.addr := ((req_idx << log2Ceil(refillCycles)) | refill_cnt) << rowOffBits
|
|
io.tag := req.addr >> untagBits
|
|
io.req_pri_rdy := state === s_invalid
|
|
io.req_sec_rdy := sec_rdy && rpq.io.enq.ready
|
|
|
|
val meta_hazard = Reg(init=UInt(0,2))
|
|
when (meta_hazard =/= UInt(0)) { meta_hazard := meta_hazard + 1 }
|
|
when (io.meta_write.fire()) { meta_hazard := 1 }
|
|
io.probe_rdy := !idx_match || (!stateIsOneOf(states_before_refill) && meta_hazard === 0)
|
|
|
|
io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear
|
|
io.meta_write.bits.idx := req_idx
|
|
io.meta_write.bits.data.coh := Mux(state === s_meta_clear,
|
|
req.old_meta.coh.onCacheControl(M_FLUSH),
|
|
new_coh_state)
|
|
io.meta_write.bits.data.tag := io.tag
|
|
io.meta_write.bits.way_en := req.way_en
|
|
|
|
io.wb_req.valid := state === s_wb_req
|
|
io.wb_req.bits := req.old_meta.coh.makeVoluntaryWriteback(
|
|
client_xact_id = UInt(id),
|
|
addr_block = Cat(req.old_meta.tag, req_idx))
|
|
io.wb_req.bits.way_en := req.way_en
|
|
|
|
io.mem_req.valid := state === s_refill_req && fq.io.enq.ready
|
|
io.mem_req.bits := req.old_meta.coh.makeAcquire(
|
|
addr_block = Cat(io.tag, req_idx).toUInt,
|
|
client_xact_id = Bits(id),
|
|
op_code = req.cmd)
|
|
|
|
io.meta_read.valid := state === s_drain_rpq
|
|
io.meta_read.bits.idx := req_idx
|
|
io.meta_read.bits.tag := io.tag
|
|
|
|
io.replay.valid := state === s_drain_rpq && rpq.io.deq.valid
|
|
io.replay.bits := rpq.io.deq.bits
|
|
io.replay.bits.phys := Bool(true)
|
|
io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(blockOffBits-1,0)).toUInt
|
|
|
|
when (!io.meta_read.ready) {
|
|
rpq.io.deq.ready := Bool(false)
|
|
io.replay.bits.cmd := M_FLUSH_ALL /* nop */
|
|
}
|
|
}
|
|
|
|
class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|
val io = new Bundle {
|
|
val req = Decoupled(new MSHRReq).flip
|
|
val resp = Decoupled(new HellaCacheResp)
|
|
val secondary_miss = Bool(OUTPUT)
|
|
|
|
val mem_req = Decoupled(new Acquire)
|
|
val refill = new L1RefillReq().asOutput
|
|
val meta_read = Decoupled(new L1MetaReadReq)
|
|
val meta_write = Decoupled(new L1MetaWriteReq)
|
|
val replay = Decoupled(new Replay)
|
|
val mem_grant = Valid(new GrantFromSrc).flip
|
|
val mem_finish = Decoupled(new FinishToDst)
|
|
val wb_req = Decoupled(new WritebackReq)
|
|
|
|
val probe_rdy = Bool(OUTPUT)
|
|
val fence_rdy = Bool(OUTPUT)
|
|
val replay_next = Bool(OUTPUT)
|
|
}
|
|
|
|
// determine if the request is cacheable or not
|
|
val cacheable = addrMap.isCacheable(io.req.bits.addr)
|
|
|
|
val sdq_val = Reg(init=Bits(0, sdqDepth))
|
|
val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0))
|
|
val sdq_rdy = !sdq_val.andR
|
|
val sdq_enq = io.req.valid && io.req.ready && cacheable && isWrite(io.req.bits.cmd)
|
|
val sdq = Mem(sdqDepth, io.req.bits.data)
|
|
when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data }
|
|
|
|
val idxMatch = Wire(Vec(nMSHRs, Bool()))
|
|
val tagList = Wire(Vec(nMSHRs, Bits(width = tagBits)))
|
|
val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits
|
|
|
|
val wbTagList = Wire(Vec(nMSHRs, Bits()))
|
|
val refillMux = Wire(Vec(nMSHRs, new L1RefillReq))
|
|
val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs))
|
|
val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs))
|
|
val mem_req_arb = Module(new LockingArbiter(
|
|
new Acquire,
|
|
nMSHRs + nIOMSHRs,
|
|
outerDataBeats,
|
|
Some((a: Acquire) => a.hasMultibeatData())))
|
|
val mem_finish_arb = Module(new Arbiter(new FinishToDst, nMSHRs + nIOMSHRs))
|
|
val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs))
|
|
val replay_arb = Module(new Arbiter(new ReplayInternal, nMSHRs))
|
|
val alloc_arb = Module(new Arbiter(Bool(), nMSHRs))
|
|
|
|
var idx_match = Bool(false)
|
|
var pri_rdy = Bool(false)
|
|
var sec_rdy = Bool(false)
|
|
|
|
io.fence_rdy := true
|
|
io.probe_rdy := true
|
|
|
|
for (i <- 0 until nMSHRs) {
|
|
val mshr = Module(new MSHR(i))
|
|
|
|
idxMatch(i) := mshr.io.idx_match
|
|
tagList(i) := mshr.io.tag
|
|
wbTagList(i) := mshr.io.wb_req.bits.addr_block >> idxBits
|
|
|
|
alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy
|
|
mshr.io.req_pri_val := alloc_arb.io.in(i).ready
|
|
|
|
mshr.io.req_sec_val := io.req.valid && sdq_rdy && tag_match
|
|
mshr.io.req_bits := io.req.bits
|
|
mshr.io.req_bits.sdq_id := sdq_alloc_id
|
|
|
|
meta_read_arb.io.in(i) <> mshr.io.meta_read
|
|
meta_write_arb.io.in(i) <> mshr.io.meta_write
|
|
mem_req_arb.io.in(i) <> mshr.io.mem_req
|
|
mem_finish_arb.io.in(i) <> mshr.io.mem_finish
|
|
wb_req_arb.io.in(i) <> mshr.io.wb_req
|
|
replay_arb.io.in(i) <> mshr.io.replay
|
|
|
|
mshr.io.mem_grant.valid := io.mem_grant.valid &&
|
|
io.mem_grant.bits.client_xact_id === UInt(i)
|
|
mshr.io.mem_grant.bits := io.mem_grant.bits
|
|
refillMux(i) := mshr.io.refill
|
|
|
|
pri_rdy = pri_rdy || mshr.io.req_pri_rdy
|
|
sec_rdy = sec_rdy || mshr.io.req_sec_rdy
|
|
idx_match = idx_match || mshr.io.idx_match
|
|
|
|
when (!mshr.io.req_pri_rdy) { io.fence_rdy := false }
|
|
when (!mshr.io.probe_rdy) { io.probe_rdy := false }
|
|
}
|
|
|
|
alloc_arb.io.out.ready := io.req.valid && sdq_rdy && cacheable && !idx_match
|
|
|
|
io.meta_read <> meta_read_arb.io.out
|
|
io.meta_write <> meta_write_arb.io.out
|
|
io.mem_req <> mem_req_arb.io.out
|
|
io.mem_finish <> mem_finish_arb.io.out
|
|
io.wb_req <> wb_req_arb.io.out
|
|
|
|
val mmio_alloc_arb = Module(new Arbiter(Bool(), nIOMSHRs))
|
|
val resp_arb = Module(new Arbiter(new HellaCacheResp, nIOMSHRs))
|
|
|
|
var mmio_rdy = Bool(false)
|
|
io.replay_next := Bool(false)
|
|
|
|
for (i <- 0 until nIOMSHRs) {
|
|
val id = nMSHRs + i
|
|
val mshr = Module(new IOMSHR(id))
|
|
|
|
mmio_alloc_arb.io.in(i).valid := mshr.io.req.ready
|
|
mshr.io.req.valid := mmio_alloc_arb.io.in(i).ready
|
|
mshr.io.req.bits := io.req.bits
|
|
|
|
mmio_rdy = mmio_rdy || mshr.io.req.ready
|
|
|
|
mem_req_arb.io.in(id) <> mshr.io.acquire
|
|
mem_finish_arb.io.in(id) <> mshr.io.finish
|
|
|
|
mshr.io.grant.bits := io.mem_grant.bits
|
|
mshr.io.grant.valid := io.mem_grant.valid &&
|
|
io.mem_grant.bits.client_xact_id === UInt(id)
|
|
|
|
resp_arb.io.in(i) <> mshr.io.resp
|
|
|
|
when (!mshr.io.req.ready) { io.fence_rdy := Bool(false) }
|
|
when (mshr.io.replay_next) { io.replay_next := Bool(true) }
|
|
}
|
|
|
|
mmio_alloc_arb.io.out.ready := io.req.valid && !cacheable
|
|
|
|
io.resp <> resp_arb.io.out
|
|
io.req.ready := Mux(!cacheable, mmio_rdy,
|
|
Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy)
|
|
io.secondary_miss := idx_match
|
|
io.refill := refillMux(io.mem_grant.bits.client_xact_id)
|
|
|
|
val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd)
|
|
io.replay.bits.data := sdq(RegEnable(replay_arb.io.out.bits.sdq_id, free_sdq))
|
|
io.replay <> replay_arb.io.out
|
|
|
|
when (io.replay.valid || sdq_enq) {
|
|
sdq_val := sdq_val & ~(UIntToOH(replay_arb.io.out.bits.sdq_id) & Fill(sdqDepth, free_sdq)) |
|
|
PriorityEncoderOH(~sdq_val(sdqDepth-1,0)) & Fill(sdqDepth, sdq_enq)
|
|
}
|
|
}
|
|
|
|
class WritebackUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|
val io = new Bundle {
|
|
val req = Decoupled(new WritebackReq).flip
|
|
val meta_read = Decoupled(new L1MetaReadReq)
|
|
val data_req = Decoupled(new L1DataReadReq)
|
|
val data_resp = Bits(INPUT, encRowBits)
|
|
val release = Decoupled(new Release)
|
|
}
|
|
|
|
val active = Reg(init=Bool(false))
|
|
val r1_data_req_fired = Reg(init=Bool(false))
|
|
val r2_data_req_fired = Reg(init=Bool(false))
|
|
val data_req_cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1))) //TODO Zero width
|
|
val buf_v = (if(refillCyclesPerBeat > 1) Reg(init=Bits(0, width = refillCyclesPerBeat-1)) else Bits(1))
|
|
val beat_done = buf_v.andR
|
|
val (beat_cnt, all_beats_done) = Counter(io.release.fire(), outerDataBeats)
|
|
val req = Reg(new WritebackReq)
|
|
|
|
io.release.valid := false
|
|
when (active) {
|
|
r1_data_req_fired := false
|
|
r2_data_req_fired := r1_data_req_fired
|
|
when (io.data_req.fire() && io.meta_read.fire()) {
|
|
r1_data_req_fired := true
|
|
data_req_cnt := data_req_cnt + 1
|
|
}
|
|
when (r2_data_req_fired) {
|
|
io.release.valid := beat_done
|
|
when(beat_done) {
|
|
when(!io.release.ready) {
|
|
r1_data_req_fired := false
|
|
r2_data_req_fired := false
|
|
data_req_cnt := data_req_cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1)
|
|
} .otherwise { if(refillCyclesPerBeat > 1) buf_v := 0 }
|
|
}
|
|
when(!r1_data_req_fired) {
|
|
// We're done if this is the final data request and the Release can be sent
|
|
active := data_req_cnt < UInt(refillCycles) || !io.release.ready
|
|
}
|
|
}
|
|
}
|
|
when (io.req.fire()) {
|
|
active := true
|
|
data_req_cnt := 0
|
|
if(refillCyclesPerBeat > 1) buf_v := 0
|
|
req := io.req.bits
|
|
}
|
|
|
|
io.req.ready := !active
|
|
|
|
val req_idx = req.addr_block(idxBits-1, 0)
|
|
val fire = active && data_req_cnt < UInt(refillCycles)
|
|
|
|
// We reissue the meta read as it sets up the mux ctrl for s2_data_muxed
|
|
io.meta_read.valid := fire
|
|
io.meta_read.bits.idx := req_idx
|
|
io.meta_read.bits.tag := req.addr_block >> idxBits
|
|
|
|
io.data_req.valid := fire
|
|
io.data_req.bits.way_en := req.way_en
|
|
io.data_req.bits.addr := (if(refillCycles > 1)
|
|
Cat(req_idx, data_req_cnt(log2Up(refillCycles)-1,0))
|
|
else req_idx) << rowOffBits
|
|
|
|
io.release.bits := req
|
|
io.release.bits.addr_beat := beat_cnt
|
|
io.release.bits.data := (if(refillCyclesPerBeat > 1) {
|
|
// If the cache rows are narrower than a TLDataBeat,
|
|
// then buffer enough data_resps to make a whole beat
|
|
val data_buf = Reg(Bits())
|
|
when(active && r2_data_req_fired && !beat_done) {
|
|
data_buf := Cat(io.data_resp, data_buf((refillCyclesPerBeat)*encRowBits-1, encRowBits))
|
|
buf_v := (if(refillCyclesPerBeat > 2)
|
|
Cat(UInt(1), buf_v(refillCyclesPerBeat-2,1))
|
|
else UInt(1))
|
|
}
|
|
Cat(io.data_resp, data_buf)
|
|
} else { io.data_resp })
|
|
}
|
|
|
|
class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|
val io = new Bundle {
|
|
val req = Decoupled(new ProbeInternal).flip
|
|
val rep = Decoupled(new Release)
|
|
val meta_read = Decoupled(new L1MetaReadReq)
|
|
val meta_write = Decoupled(new L1MetaWriteReq)
|
|
val wb_req = Decoupled(new WritebackReq)
|
|
val way_en = Bits(INPUT, nWays)
|
|
val mshr_rdy = Bool(INPUT)
|
|
val block_state = new ClientMetadata().asInput
|
|
}
|
|
|
|
val (s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req ::
|
|
s_mshr_resp :: s_release :: s_writeback_req :: s_writeback_resp ::
|
|
s_meta_write :: Nil) = Enum(UInt(), 9)
|
|
val state = Reg(init=s_invalid)
|
|
val old_coh = Reg(new ClientMetadata)
|
|
val way_en = Reg(Bits())
|
|
val req = Reg(new ProbeInternal)
|
|
val tag_matches = way_en.orR
|
|
|
|
val miss_coh = ClientMetadata.onReset
|
|
val reply_coh = Mux(tag_matches, old_coh, miss_coh)
|
|
val reply = reply_coh.makeRelease(req)
|
|
io.req.ready := state === s_invalid
|
|
io.rep.valid := state === s_release
|
|
io.rep.bits := reply
|
|
|
|
assert(!io.rep.valid || !io.rep.bits.hasData(),
|
|
"ProbeUnit should not send releases with data")
|
|
|
|
io.meta_read.valid := state === s_meta_read
|
|
io.meta_read.bits.idx := req.addr_block
|
|
io.meta_read.bits.tag := req.addr_block >> idxBits
|
|
|
|
io.meta_write.valid := state === s_meta_write
|
|
io.meta_write.bits.way_en := way_en
|
|
io.meta_write.bits.idx := req.addr_block
|
|
io.meta_write.bits.data.tag := req.addr_block >> idxBits
|
|
io.meta_write.bits.data.coh := old_coh.onProbe(req)
|
|
|
|
io.wb_req.valid := state === s_writeback_req
|
|
io.wb_req.bits := reply
|
|
io.wb_req.bits.way_en := way_en
|
|
|
|
// state === s_invalid
|
|
when (io.req.fire()) {
|
|
state := s_meta_read
|
|
req := io.req.bits
|
|
}
|
|
|
|
// state === s_meta_read
|
|
when (io.meta_read.fire()) {
|
|
state := s_meta_resp
|
|
}
|
|
|
|
// we need to wait one cycle for the metadata to be read from the array
|
|
when (state === s_meta_resp) {
|
|
state := s_mshr_req
|
|
}
|
|
|
|
when (state === s_mshr_req) {
|
|
state := s_mshr_resp
|
|
old_coh := io.block_state
|
|
way_en := io.way_en
|
|
// if the read didn't go through, we need to retry
|
|
when (!io.mshr_rdy) { state := s_meta_read }
|
|
}
|
|
|
|
when (state === s_mshr_resp) {
|
|
val needs_writeback = tag_matches && old_coh.requiresVoluntaryWriteback()
|
|
state := Mux(needs_writeback, s_writeback_req, s_release)
|
|
}
|
|
|
|
when (state === s_release && io.rep.ready) {
|
|
state := Mux(tag_matches, s_meta_write, s_invalid)
|
|
}
|
|
|
|
// state === s_writeback_req
|
|
when (io.wb_req.fire()) {
|
|
state := s_writeback_resp
|
|
}
|
|
|
|
// wait for the writeback request to finish before updating the metadata
|
|
when (state === s_writeback_resp && io.wb_req.ready) {
|
|
state := s_meta_write
|
|
}
|
|
|
|
when (io.meta_write.fire()) {
|
|
state := s_invalid
|
|
}
|
|
}
|
|
|
|
class DataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|
val io = new Bundle {
|
|
val read = Decoupled(new L1DataReadReq).flip
|
|
val write = Decoupled(new L1DataWriteReq).flip
|
|
val resp = Vec(nWays, Bits(OUTPUT, encRowBits))
|
|
}
|
|
|
|
val waddr = io.write.bits.addr >> rowOffBits
|
|
val raddr = io.read.bits.addr >> rowOffBits
|
|
|
|
if (doNarrowRead) {
|
|
for (w <- 0 until nWays by rowWords) {
|
|
val wway_en = io.write.bits.way_en(w+rowWords-1,w)
|
|
val rway_en = io.read.bits.way_en(w+rowWords-1,w)
|
|
val resp = Wire(Vec(rowWords, Bits(width = encRowBits)))
|
|
val r_raddr = RegEnable(io.read.bits.addr, io.read.valid)
|
|
for (p <- 0 until resp.size) {
|
|
val array = SeqMem(nSets*refillCycles, Vec(rowWords, Bits(width=encDataBits)))
|
|
when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) {
|
|
val data = Vec.fill(rowWords)(io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p))
|
|
array.write(waddr, data, wway_en.toBools)
|
|
}
|
|
resp(p) := array.read(raddr, rway_en.orR && io.read.valid).toBits
|
|
}
|
|
for (dw <- 0 until rowWords) {
|
|
val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw)))
|
|
val resp_mux =
|
|
if (r.size == 1) r
|
|
else Vec(r(r_raddr(rowOffBits-1,wordOffBits)), r.tail:_*)
|
|
io.resp(w+dw) := resp_mux.toBits
|
|
}
|
|
}
|
|
} else {
|
|
for (w <- 0 until nWays) {
|
|
val array = SeqMem(nSets*refillCycles, Vec(rowWords, Bits(width=encDataBits)))
|
|
when (io.write.bits.way_en(w) && io.write.valid) {
|
|
val data = Vec.tabulate(rowWords)(i => io.write.bits.data(encDataBits*(i+1)-1,encDataBits*i))
|
|
array.write(waddr, data, io.write.bits.wmask.toBools)
|
|
}
|
|
io.resp(w) := array.read(raddr, io.read.bits.way_en(w) && io.read.valid).toBits
|
|
}
|
|
}
|
|
|
|
io.read.ready := Bool(true)
|
|
io.write.ready := Bool(true)
|
|
}
|
|
|
|
class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|
val io = new Bundle {
|
|
val cpu = (new HellaCacheIO).flip
|
|
val ptw = new TLBPTWIO()
|
|
val mem = new ClientTileLinkIO
|
|
}
|
|
|
|
require(isPow2(nWays)) // TODO: relax this
|
|
|
|
val wb = Module(new WritebackUnit)
|
|
val prober = Module(new ProbeUnit)
|
|
val mshrs = Module(new MSHRFile)
|
|
|
|
io.cpu.req.ready := Bool(true)
|
|
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
|
|
val s1_req = Reg(io.cpu.req.bits)
|
|
val s1_valid_masked = s1_valid && !io.cpu.s1_kill
|
|
val s1_replay = Reg(init=Bool(false))
|
|
val s1_clk_en = Reg(Bool())
|
|
|
|
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
|
|
val s2_req = Reg(io.cpu.req.bits)
|
|
val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL
|
|
val s2_recycle = Wire(Bool())
|
|
val s2_valid_masked = Wire(Bool())
|
|
|
|
val s3_valid = Reg(init=Bool(false))
|
|
val s3_req = Reg(io.cpu.req.bits)
|
|
val s3_way = Reg(Bits())
|
|
|
|
val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en)
|
|
val s1_read = isRead(s1_req.cmd)
|
|
val s1_write = isWrite(s1_req.cmd)
|
|
val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd)
|
|
|
|
val dtlb = Module(new TLB)
|
|
io.ptw <> dtlb.io.ptw
|
|
dtlb.io.req.valid := s1_valid_masked && s1_readwrite
|
|
dtlb.io.req.bits.passthrough := s1_req.phys
|
|
dtlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits
|
|
dtlb.io.req.bits.instruction := Bool(false)
|
|
dtlb.io.req.bits.store := s1_write
|
|
when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) }
|
|
|
|
when (io.cpu.req.valid) {
|
|
s1_req := io.cpu.req.bits
|
|
}
|
|
when (wb.io.meta_read.valid) {
|
|
s1_req.addr := Cat(wb.io.meta_read.bits.tag, wb.io.meta_read.bits.idx) << blockOffBits
|
|
s1_req.phys := Bool(true)
|
|
}
|
|
when (prober.io.meta_read.valid) {
|
|
s1_req.addr := Cat(prober.io.meta_read.bits.tag, prober.io.meta_read.bits.idx) << blockOffBits
|
|
s1_req.phys := Bool(true)
|
|
}
|
|
when (mshrs.io.replay.valid) {
|
|
s1_req := mshrs.io.replay.bits
|
|
}
|
|
when (s2_recycle) {
|
|
s1_req := s2_req
|
|
}
|
|
val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
|
|
|
|
when (s1_clk_en) {
|
|
s2_req.typ := s1_req.typ
|
|
s2_req.phys := s1_req.phys
|
|
s2_req.addr := s1_addr
|
|
when (s1_write) {
|
|
s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data)
|
|
}
|
|
when (s1_recycled) { s2_req.data := s1_req.data }
|
|
s2_req.tag := s1_req.tag
|
|
s2_req.cmd := s1_req.cmd
|
|
}
|
|
|
|
val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned
|
|
io.cpu.xcpt.ma.ld := s1_read && misaligned
|
|
io.cpu.xcpt.ma.st := s1_write && misaligned
|
|
io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld
|
|
io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st
|
|
|
|
assert (!(Reg(next=
|
|
(io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) &&
|
|
s2_valid_masked),
|
|
"DCache exception occurred - cache response not killed.")
|
|
|
|
// tags
|
|
def onReset = L1Metadata(UInt(0), ClientMetadata.onReset)
|
|
val meta = Module(new MetadataArray(onReset _))
|
|
val metaReadArb = Module(new Arbiter(new MetaReadReq, 5))
|
|
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2))
|
|
meta.io.read <> metaReadArb.io.out
|
|
meta.io.write <> metaWriteArb.io.out
|
|
|
|
// data
|
|
val data = Module(new DataArray)
|
|
val readArb = Module(new Arbiter(new L1DataReadReq, 4))
|
|
val writeArb = Module(new Arbiter(new L1DataWriteReq, 2))
|
|
data.io.write.valid := writeArb.io.out.valid
|
|
writeArb.io.out.ready := data.io.write.ready
|
|
data.io.write.bits := writeArb.io.out.bits
|
|
val wdata_encoded = (0 until rowWords).map(i => code.encode(writeArb.io.out.bits.data(coreDataBits*(i+1)-1,coreDataBits*i)))
|
|
data.io.write.bits.data := wdata_encoded.toBits
|
|
|
|
// tag read for new requests
|
|
metaReadArb.io.in(4).valid := io.cpu.req.valid
|
|
metaReadArb.io.in(4).bits.idx := io.cpu.req.bits.addr >> blockOffBits
|
|
when (!metaReadArb.io.in(4).ready) { io.cpu.req.ready := Bool(false) }
|
|
|
|
// data read for new requests
|
|
readArb.io.in(3).valid := io.cpu.req.valid
|
|
readArb.io.in(3).bits.addr := io.cpu.req.bits.addr
|
|
readArb.io.in(3).bits.way_en := ~UInt(0, nWays)
|
|
when (!readArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) }
|
|
|
|
// recycled requests
|
|
metaReadArb.io.in(0).valid := s2_recycle
|
|
metaReadArb.io.in(0).bits.idx := s2_req.addr >> blockOffBits
|
|
readArb.io.in(0).valid := s2_recycle
|
|
readArb.io.in(0).bits.addr := s2_req.addr
|
|
readArb.io.in(0).bits.way_en := ~UInt(0, nWays)
|
|
|
|
// tag check and way muxing
|
|
def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f))
|
|
val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> untagBits)).toBits
|
|
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta.io.resp(w).coh.isValid()).toBits
|
|
s1_clk_en := metaReadArb.io.out.valid //TODO: should be metaReadArb.io.out.fire(), but triggers Verilog backend bug
|
|
val s1_writeback = s1_clk_en && !s1_valid && !s1_replay
|
|
val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en)
|
|
val s2_tag_match = s2_tag_match_way.orR
|
|
val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en)))
|
|
val s2_hit = s2_tag_match &&
|
|
s2_hit_state.isHit(s2_req.cmd) &&
|
|
s2_hit_state === s2_hit_state.onHit(s2_req.cmd)
|
|
|
|
// load-reserved/store-conditional
|
|
val lrsc_count = Reg(init=UInt(0))
|
|
val lrsc_valid = lrsc_count.orR
|
|
val lrsc_addr = Reg(UInt())
|
|
val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC)
|
|
val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> blockOffBits)
|
|
val s2_sc_fail = s2_sc && !s2_lrsc_addr_match
|
|
when (lrsc_valid) { lrsc_count := lrsc_count - 1 }
|
|
when (s2_valid_masked && s2_hit || s2_replay) {
|
|
when (s2_lr) {
|
|
when (!lrsc_valid) { lrsc_count := lrscCycles-1 }
|
|
lrsc_addr := s2_req.addr >> blockOffBits
|
|
}
|
|
when (s2_sc) {
|
|
lrsc_count := 0
|
|
}
|
|
}
|
|
when (io.cpu.invalidate_lr) { lrsc_count := 0 }
|
|
|
|
val s2_data = Wire(Vec(nWays, Bits(width=encRowBits)))
|
|
for (w <- 0 until nWays) {
|
|
val regs = Reg(Vec(rowWords, Bits(width = encDataBits)))
|
|
val en1 = s1_clk_en && s1_tag_eq_way(w)
|
|
for (i <- 0 until regs.size) {
|
|
val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback)
|
|
when (en) { regs(i) := data.io.resp(w) >> encDataBits*i }
|
|
}
|
|
s2_data(w) := regs.toBits
|
|
}
|
|
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
|
|
val s2_data_decoded = (0 until rowWords).map(i => code.decode(s2_data_muxed(encDataBits*(i+1)-1,encDataBits*i)))
|
|
val s2_data_corrected = s2_data_decoded.map(_.corrected).toBits
|
|
val s2_data_uncorrected = s2_data_decoded.map(_.uncorrected).toBits
|
|
val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,log2Up(wordBytes))
|
|
val s2_data_correctable = s2_data_decoded.map(_.correctable).toBits()(s2_word_idx)
|
|
|
|
// store/amo hits
|
|
s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd)
|
|
val amoalu = Module(new AMOALU)
|
|
when ((s2_valid || s2_replay) && (isWrite(s2_req.cmd) || s2_data_correctable)) {
|
|
s3_req := s2_req
|
|
s3_req.data := Mux(s2_data_correctable, s2_data_corrected, amoalu.io.out)
|
|
s3_way := s2_tag_match_way
|
|
}
|
|
|
|
writeArb.io.in(0).bits.addr := s3_req.addr
|
|
writeArb.io.in(0).bits.wmask := UIntToOH(s3_req.addr.extract(rowOffBits-1,offsetlsb))
|
|
writeArb.io.in(0).bits.data := Fill(rowWords, s3_req.data)
|
|
writeArb.io.in(0).valid := s3_valid
|
|
writeArb.io.in(0).bits.way_en := s3_way
|
|
|
|
// replacement policy
|
|
val replacer = p(Replacer)()
|
|
val s1_replaced_way_en = UIntToOH(replacer.way)
|
|
val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en))
|
|
val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq)
|
|
|
|
// miss handling
|
|
mshrs.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd))
|
|
mshrs.io.req.bits := s2_req
|
|
mshrs.io.req.bits.tag_match := s2_tag_match
|
|
mshrs.io.req.bits.old_meta := Mux(s2_tag_match, L1Metadata(s2_repl_meta.tag, s2_hit_state), s2_repl_meta)
|
|
mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en)
|
|
mshrs.io.req.bits.data := s2_req.data
|
|
when (mshrs.io.req.fire()) { replacer.miss }
|
|
io.mem.acquire <> mshrs.io.mem_req
|
|
|
|
// replays
|
|
readArb.io.in(1).valid := mshrs.io.replay.valid
|
|
readArb.io.in(1).bits := mshrs.io.replay.bits
|
|
readArb.io.in(1).bits.way_en := ~UInt(0, nWays)
|
|
mshrs.io.replay.ready := readArb.io.in(1).ready
|
|
s1_replay := mshrs.io.replay.valid && readArb.io.in(1).ready
|
|
metaReadArb.io.in(1) <> mshrs.io.meta_read
|
|
metaWriteArb.io.in(0) <> mshrs.io.meta_write
|
|
|
|
// probes and releases
|
|
val releaseArb = Module(new LockingArbiter(
|
|
new Release, 2, outerDataBeats,
|
|
Some((r: Release) => r.hasMultibeatData())))
|
|
io.mem.release <> releaseArb.io.out
|
|
|
|
prober.io.req.valid := io.mem.probe.valid && !lrsc_valid
|
|
io.mem.probe.ready := prober.io.req.ready && !lrsc_valid
|
|
prober.io.req.bits := io.mem.probe.bits
|
|
releaseArb.io.in(1) <> prober.io.rep
|
|
prober.io.way_en := s2_tag_match_way
|
|
prober.io.block_state := s2_hit_state
|
|
metaReadArb.io.in(2) <> prober.io.meta_read
|
|
metaWriteArb.io.in(1) <> prober.io.meta_write
|
|
prober.io.mshr_rdy := mshrs.io.probe_rdy
|
|
|
|
// refills
|
|
val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat)
|
|
mshrs.io.mem_grant.valid := narrow_grant.fire()
|
|
mshrs.io.mem_grant.bits := narrow_grant.bits
|
|
narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData()
|
|
/* The last clause here is necessary in order to prevent the responses for
|
|
* the IOMSHRs from being written into the data array. It works because the
|
|
* IOMSHR ids start right the ones for the regular MSHRs. */
|
|
writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() &&
|
|
narrow_grant.bits.client_xact_id < UInt(nMSHRs)
|
|
writeArb.io.in(1).bits.addr := mshrs.io.refill.addr
|
|
writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en
|
|
writeArb.io.in(1).bits.wmask := ~UInt(0, rowWords)
|
|
writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0)
|
|
data.io.read <> readArb.io.out
|
|
readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked
|
|
io.mem.finish <> mshrs.io.mem_finish
|
|
|
|
// writebacks
|
|
val wbArb = Module(new Arbiter(new WritebackReq, 2))
|
|
wbArb.io.in(0) <> prober.io.wb_req
|
|
wbArb.io.in(1) <> mshrs.io.wb_req
|
|
wb.io.req <> wbArb.io.out
|
|
metaReadArb.io.in(3) <> wb.io.meta_read
|
|
readArb.io.in(2) <> wb.io.data_req
|
|
wb.io.data_resp := s2_data_corrected
|
|
releaseArb.io.in(0) <> wb.io.release
|
|
|
|
// store->load bypassing
|
|
val s4_valid = Reg(next=s3_valid, init=Bool(false))
|
|
val s4_req = RegEnable(s3_req, s3_valid && metaReadArb.io.out.valid)
|
|
val bypasses = List(
|
|
((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out),
|
|
(s3_valid, s3_req, s3_req.data),
|
|
(s4_valid, s4_req, s4_req.data)
|
|
).map(r => (r._1 && (s1_addr >> wordOffBits === r._2.addr >> wordOffBits) && isWrite(r._2.cmd), r._3))
|
|
val s2_store_bypass_data = Reg(Bits(width = coreDataBits))
|
|
val s2_store_bypass = Reg(Bool())
|
|
when (s1_clk_en) {
|
|
s2_store_bypass := false
|
|
when (bypasses.map(_._1).reduce(_||_)) {
|
|
s2_store_bypass_data := PriorityMux(bypasses)
|
|
s2_store_bypass := true
|
|
}
|
|
}
|
|
|
|
// load data subword mux/sign extension
|
|
val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits)))
|
|
val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass)
|
|
val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBytes)
|
|
|
|
amoalu.io.addr := s2_req.addr
|
|
amoalu.io.cmd := s2_req.cmd
|
|
amoalu.io.typ := s2_req.typ
|
|
amoalu.io.lhs := s2_data_word
|
|
amoalu.io.rhs := s2_req.data
|
|
|
|
// nack it like it's hot
|
|
val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss ||
|
|
s1_req.addr(idxMSB,idxLSB) === prober.io.meta_write.bits.idx && !prober.io.req.ready
|
|
val s2_nack_hit = RegEnable(s1_nack, s1_valid || s1_replay)
|
|
when (s2_nack_hit) { mshrs.io.req.valid := Bool(false) }
|
|
val s2_nack_victim = s2_hit && mshrs.io.secondary_miss
|
|
val s2_nack_miss = !s2_hit && !mshrs.io.req.ready
|
|
val s2_nack = s2_nack_hit || s2_nack_victim || s2_nack_miss
|
|
s2_valid_masked := s2_valid && !s2_nack
|
|
|
|
val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable
|
|
val s2_recycle_next = Reg(init=Bool(false))
|
|
when (s1_valid || s1_replay) { s2_recycle_next := s2_recycle_ecc }
|
|
s2_recycle := s2_recycle_ecc || s2_recycle_next
|
|
|
|
// after a nack, block until nack condition resolves to save energy
|
|
val block_miss = Reg(init=Bool(false))
|
|
block_miss := (s2_valid || block_miss) && s2_nack_miss
|
|
when (block_miss) {
|
|
io.cpu.req.ready := Bool(false)
|
|
}
|
|
|
|
val cache_resp = Wire(Valid(new HellaCacheResp))
|
|
cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
|
|
cache_resp.bits := s2_req
|
|
cache_resp.bits.has_data := isRead(s2_req.cmd)
|
|
cache_resp.bits.data := loadgen.data | s2_sc_fail
|
|
cache_resp.bits.store_data := s2_req.data
|
|
cache_resp.bits.replay := s2_replay
|
|
|
|
val uncache_resp = Wire(Valid(new HellaCacheResp))
|
|
uncache_resp.bits := mshrs.io.resp.bits
|
|
uncache_resp.valid := mshrs.io.resp.valid
|
|
mshrs.io.resp.ready := Reg(next= !(s1_valid || s1_replay))
|
|
|
|
io.cpu.s2_nack := s2_valid && s2_nack
|
|
io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp)
|
|
io.cpu.resp.bits.data_word_bypass := loadgen.wordData
|
|
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
|
|
io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next
|
|
}
|
|
|
|
/**
|
|
* This module buffers requests made by the SimpleHellaCacheIF in case they
|
|
* are nacked. Nacked requests must be replayed in order, and no other requests
|
|
* must be allowed to go through until the replayed requests are successfully
|
|
* completed.
|
|
*/
|
|
class SimpleHellaCacheIFReplayQueue(depth: Int)
|
|
(implicit val p: Parameters) extends Module
|
|
with HasL1HellaCacheParameters {
|
|
val io = new Bundle {
|
|
val req = Decoupled(new HellaCacheReq).flip
|
|
val nack = Valid(Bits(width = coreDCacheReqTagBits)).flip
|
|
val resp = Valid(new HellaCacheResp).flip
|
|
val replay = Decoupled(new HellaCacheReq)
|
|
}
|
|
|
|
// Registers to store the sent request
|
|
// When a request is sent the first time,
|
|
// it is stored in one of the reqs registers
|
|
// and the corresponding inflight bit is set.
|
|
// The reqs register will be deallocated once the request is
|
|
// successfully completed.
|
|
val inflight = Reg(init = UInt(0, depth))
|
|
val reqs = Reg(Vec(depth, new HellaCacheReq))
|
|
|
|
// The nack queue stores the index of nacked requests (in the reqs vector)
|
|
// in the order that they were nacked. A request is enqueued onto nackq
|
|
// when it is newly nacked (i.e. not a nack for a previous replay).
|
|
// The head of the nack queue will be replayed until it is
|
|
// successfully completed, at which time the request is dequeued.
|
|
// No new requests will be made or other replays attempted until the head
|
|
// of the nackq is successfully completed.
|
|
val nackq = Module(new Queue(UInt(width = log2Up(depth)), depth))
|
|
val replaying = Reg(init = Bool(false))
|
|
|
|
val next_inflight_onehot = PriorityEncoderOH(~inflight)
|
|
val next_inflight = OHToUInt(next_inflight_onehot)
|
|
|
|
val next_replay = nackq.io.deq.bits
|
|
val next_replay_onehot = UIntToOH(next_replay)
|
|
val next_replay_req = reqs(next_replay)
|
|
|
|
// Keep sending the head of the nack queue until it succeeds
|
|
io.replay.valid := nackq.io.deq.valid && !replaying
|
|
io.replay.bits := next_replay_req
|
|
// Don't allow new requests if there is are replays waiting
|
|
// or something being nacked.
|
|
io.req.ready := !inflight.andR && !nackq.io.deq.valid && !io.nack.valid
|
|
|
|
// Match on the tags to determine the index of nacks or responses
|
|
val nack_onehot = Cat(reqs.map(_.tag === io.nack.bits).reverse) & inflight
|
|
val resp_onehot = Cat(reqs.map(_.tag === io.resp.bits.tag).reverse) & inflight
|
|
|
|
val replay_complete = io.resp.valid && replaying && io.resp.bits.tag === next_replay_req.tag
|
|
val nack_head = io.nack.valid && nackq.io.deq.valid && io.nack.bits === next_replay_req.tag
|
|
|
|
// Enqueue to the nack queue if there is a nack that is not in response to
|
|
// the previous replay
|
|
nackq.io.enq.valid := io.nack.valid && !nack_head
|
|
nackq.io.enq.bits := OHToUInt(nack_onehot)
|
|
assert(!nackq.io.enq.valid || nackq.io.enq.ready,
|
|
"SimpleHellaCacheIF: ReplayQueue nack queue overflow")
|
|
|
|
// Dequeue from the nack queue if the last replay was successfully completed
|
|
nackq.io.deq.ready := replay_complete
|
|
assert(!nackq.io.deq.ready || nackq.io.deq.valid,
|
|
"SimpleHellaCacheIF: ReplayQueue nack queue underflow")
|
|
|
|
// Set inflight bit when a request is made
|
|
// Clear it when it is successfully completed
|
|
inflight := (inflight | Mux(io.req.fire(), next_inflight_onehot, UInt(0))) &
|
|
~Mux(io.resp.valid, resp_onehot, UInt(0))
|
|
|
|
when (io.req.fire()) {
|
|
reqs(next_inflight) := io.req.bits
|
|
}
|
|
|
|
// Only one replay outstanding at a time
|
|
when (io.replay.fire()) { replaying := Bool(true) }
|
|
when (nack_head || replay_complete) { replaying := Bool(false) }
|
|
}
|
|
|
|
// exposes a sane decoupled request interface
|
|
class SimpleHellaCacheIF(implicit p: Parameters) extends Module
|
|
{
|
|
val io = new Bundle {
|
|
val requestor = new HellaCacheIO().flip
|
|
val cache = new HellaCacheIO
|
|
}
|
|
|
|
val replayq = Module(new SimpleHellaCacheIFReplayQueue(2))
|
|
val req_arb = Module(new Arbiter(new HellaCacheReq, 2))
|
|
|
|
val req_helper = DecoupledHelper(
|
|
req_arb.io.in(1).ready,
|
|
replayq.io.req.ready,
|
|
io.requestor.req.valid)
|
|
|
|
req_arb.io.in(0) <> replayq.io.replay
|
|
req_arb.io.in(1).valid := req_helper.fire(req_arb.io.in(1).ready)
|
|
req_arb.io.in(1).bits := io.requestor.req.bits
|
|
io.requestor.req.ready := req_helper.fire(io.requestor.req.valid)
|
|
replayq.io.req.valid := req_helper.fire(replayq.io.req.ready)
|
|
replayq.io.req.bits := io.requestor.req.bits
|
|
|
|
val s0_req_fire = io.cache.req.fire()
|
|
val s1_req_fire = Reg(next = s0_req_fire)
|
|
val s2_req_fire = Reg(next = s1_req_fire)
|
|
val s1_req_tag = Reg(next = io.cache.req.bits.tag)
|
|
val s2_req_tag = Reg(next = s1_req_tag)
|
|
val s2_kill = Reg(next = io.cache.s1_kill)
|
|
|
|
io.cache.invalidate_lr := io.requestor.invalidate_lr
|
|
io.cache.req <> req_arb.io.out
|
|
io.cache.req.bits.phys := Bool(true)
|
|
io.cache.s1_kill := io.cache.s2_nack
|
|
io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
|
|
|
|
replayq.io.nack.valid := (io.cache.s2_nack || s2_kill) && s2_req_fire
|
|
replayq.io.nack.bits := s2_req_tag
|
|
replayq.io.resp := io.cache.resp
|
|
io.requestor.resp := io.cache.resp
|
|
|
|
assert(!Reg(next = io.cache.req.fire()) ||
|
|
!(io.cache.xcpt.ma.ld || io.cache.xcpt.ma.st ||
|
|
io.cache.xcpt.pf.ld || io.cache.xcpt.pf.st),
|
|
"SimpleHellaCacheIF exception")
|
|
}
|