// See LICENSE for license details. package rocket import Chisel._ import junctions._ import uncore.tilelink._ import uncore.coherence._ import uncore.agents._ import uncore.util._ import uncore.constants._ import cde.{Parameters, Field} import Util._ case class DCacheConfig( nMSHRs: Int = 1, nSDQ: Int = 17, nRPQ: Int = 16) case object DCacheKey extends Field[DCacheConfig] trait HasL1HellaCacheParameters extends HasL1CacheParameters { val wordBits = xLen // really, xLen max fLen val wordBytes = wordBits/8 val wordOffBits = log2Up(wordBytes) val beatBytes = p(CacheBlockBytes) / outerDataBeats val beatWords = beatBytes / wordBytes val beatOffBits = log2Up(beatBytes) val idxMSB = untagBits-1 val idxLSB = blockOffBits val offsetmsb = idxLSB-1 val offsetlsb = wordOffBits val rowWords = rowBits/wordBits val doNarrowRead = coreDataBits * nWays % rowBits == 0 val encDataBits = code.width(coreDataBits) val encRowBits = encDataBits*rowWords val nIOMSHRs = 1 val lrscCycles = 32 // ISA requires 16-insn LRSC sequences to succeed require(isPow2(nSets)) require(rowBits <= outerDataBits) require(!usingVM || untagBits <= pgIdxBits) } abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module with HasL1HellaCacheParameters abstract class L1HellaCacheBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p) with HasL1HellaCacheParameters trait HasCoreMemOp extends HasCoreParameters { val addr = UInt(width = coreMaxAddrBits) val tag = Bits(width = dcacheReqTagBits) val cmd = Bits(width = M_SZ) val typ = Bits(width = MT_SZ) } trait HasCoreData extends HasCoreParameters { val data = Bits(width = coreDataBits) } trait HasMissInfo extends HasL1HellaCacheParameters { val tag_match = Bool() val old_meta = new L1Metadata val way_en = Bits(width = nWays) } class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p) with HasCoreMemOp { val phys = Bool() } class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p) with HasCoreMemOp with HasCoreData { val replay = Bool() val has_data = Bool() val data_word_bypass = Bits(width = coreDataBits) val store_data = Bits(width = coreDataBits) } class AlignmentExceptions extends Bundle { val ld = Bool() val st = Bool() } class HellaCacheExceptions extends Bundle { val ma = new AlignmentExceptions val pf = new AlignmentExceptions } // interface between D$ and processor/DTLB class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { val req = Decoupled(new HellaCacheReq) val s1_kill = Bool(OUTPUT) // kill previous cycle's req val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req val s2_nack = Bool(INPUT) // req from two cycles ago is rejected val resp = Valid(new HellaCacheResp).flip val replay_next = Bool(INPUT) val xcpt = (new HellaCacheExceptions).asInput val invalidate_lr = Bool(OUTPUT) val ordered = Bool(INPUT) } class L1DataReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { val way_en = Bits(width = nWays) val addr = Bits(width = untagBits) } class L1DataWriteReq(implicit p: Parameters) extends L1DataReadReq()(p) { val wmask = Bits(width = rowWords) val data = Bits(width = encRowBits) } class L1RefillReq(implicit p: Parameters) extends L1DataReadReq()(p) class L1MetaReadReq(implicit p: Parameters) extends MetaReadReq { val tag = Bits(width = tagBits) override def cloneType = new L1MetaReadReq()(p).asInstanceOf[this.type] //TODO remove } class L1MetaWriteReq(implicit p: Parameters) extends MetaWriteReq[L1Metadata](new L1Metadata) object L1Metadata { def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = { val meta = Wire(new L1Metadata) meta.tag := tag meta.coh := coh meta } } class L1Metadata(implicit p: Parameters) extends Metadata()(p) with HasL1HellaCacheParameters { val coh = new ClientMetadata } class Replay(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData class ReplayInternal(cfg: DCacheConfig)(implicit p: Parameters) extends HellaCacheReqInternal()(p) { val sdq_id = UInt(width = log2Up(cfg.nSDQ)) override def cloneType = new ReplayInternal(cfg)(p).asInstanceOf[this.type] } class MSHRReq(implicit p: Parameters) extends Replay()(p) with HasMissInfo class MSHRReqInternal(cfg: DCacheConfig)(implicit p: Parameters) extends ReplayInternal(cfg)(p) with HasMissInfo { override def cloneType = new MSHRReqInternal(cfg)(p).asInstanceOf[this.type] } class ProbeInternal(implicit p: Parameters) extends Probe()(p) with HasClientTransactionId class WritebackReq(implicit p: Parameters) extends Release()(p) with HasCacheParameters { val way_en = Bits(width = nWays) } class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req = Decoupled(new HellaCacheReq).flip val acquire = Decoupled(new Acquire) val grant = Valid(new GrantFromSrc).flip val finish = Decoupled(new FinishToDst) val resp = Decoupled(new HellaCacheResp) val replay_next = Bool(OUTPUT) } def beatOffset(addr: UInt) = addr.extract(beatOffBits - 1, wordOffBits) def wordFromBeat(addr: UInt, dat: UInt) = { val shift = Cat(beatOffset(addr), UInt(0, wordOffBits + log2Up(wordBytes))) (dat >> shift)(wordBits - 1, 0) } val req = Reg(new HellaCacheReq) val req_cmd_sc = req.cmd === M_XSC val grant_word = Reg(UInt(width = wordBits)) val fq = Module(new FinishQueue(1)) val s_idle :: s_acquire :: s_grant :: s_resp :: s_finish :: Nil = Enum(Bits(), 5) val state = Reg(init = s_idle) io.req.ready := (state === s_idle) fq.io.enq.valid := io.grant.valid && io.grant.bits.requiresAck() fq.io.enq.bits := io.grant.bits.makeFinish() io.finish.valid := fq.io.deq.valid && (state === s_finish) io.finish.bits := fq.io.deq.bits fq.io.deq.ready := io.finish.ready && (state === s_finish) val storegen = new StoreGen(req.typ, req.addr, req.data, wordBytes) val loadgen = new LoadGen(req.typ, mtSigned(req.typ), req.addr, grant_word, req_cmd_sc, wordBytes) val beat_mask = (storegen.mask << Cat(beatOffset(req.addr), UInt(0, wordOffBits))) val beat_data = Fill(beatWords, storegen.data) val addr_block = req.addr(paddrBits - 1, blockOffBits) val addr_beat = req.addr(blockOffBits - 1, beatOffBits) val addr_byte = req.addr(beatOffBits - 1, 0) val get_acquire = Get( client_xact_id = UInt(id), addr_block = addr_block, addr_beat = addr_beat, addr_byte = addr_byte, operand_size = req.typ, alloc = Bool(false)) val put_acquire = Put( client_xact_id = UInt(id), addr_block = addr_block, addr_beat = addr_beat, data = beat_data, wmask = Some(beat_mask), alloc = Bool(false)) val putAtomic_acquire = PutAtomic( client_xact_id = UInt(id), addr_block = addr_block, addr_beat = addr_beat, addr_byte = addr_byte, atomic_opcode = req.cmd, operand_size = req.typ, data = beat_data) io.acquire.valid := (state === s_acquire) io.acquire.bits := Mux(isAMO(req.cmd), putAtomic_acquire, Mux(isRead(req.cmd), get_acquire, put_acquire)) io.replay_next := (state === s_grant) || io.resp.valid && !io.resp.ready io.resp.valid := (state === s_resp) io.resp.bits := req io.resp.bits.has_data := isRead(req.cmd) io.resp.bits.data := loadgen.data | req_cmd_sc io.resp.bits.store_data := req.data io.resp.bits.replay := Bool(true) when (io.req.fire()) { req := io.req.bits state := s_acquire } when (io.acquire.fire()) { state := s_grant } when (state === s_grant && io.grant.valid) { state := s_resp when (isRead(req.cmd)) { grant_word := wordFromBeat(req.addr, io.grant.bits.data) } } when (io.resp.fire()) { state := s_finish } when (io.finish.fire()) { state := s_idle } } class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) val req_sec_val = Bool(INPUT) val req_sec_rdy = Bool(OUTPUT) val req_bits = new MSHRReqInternal(cfg).asInput val idx_match = Bool(OUTPUT) val tag = Bits(OUTPUT, tagBits) val mem_req = Decoupled(new Acquire) val refill = new L1RefillReq().asOutput // Data is bypassed val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new ReplayInternal(cfg)) val mem_grant = Valid(new GrantFromSrc).flip val mem_finish = Decoupled(new FinishToDst) val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) } val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(UInt(), 9) val state = Reg(init=s_invalid) val new_coh_state = Reg(init=ClientMetadata.onReset) val req = Reg(new MSHRReqInternal(cfg)) val req_idx = req.addr(untagBits-1,blockOffBits) val idx_match = req_idx === io.req_bits.addr(untagBits-1,blockOffBits) // We only accept secondary misses if we haven't yet sent an Acquire to outer memory // or if the Acquire that was sent will obtain a Grant with sufficient permissions // to let us replay this new request. I.e. we don't handle multiple outstanding // Acquires on the same block for now. val cmd_requires_second_acquire = req.old_meta.coh.requiresAcquireOnSecondaryMiss(req.cmd, io.req_bits.cmd) // Track whether or not a secondary acquire will cause the coherence state // to go from clean to dirty. val dirties_coh = Reg(Bool()) val states_before_refill = Seq(s_wb_req, s_wb_resp, s_meta_clear) val gnt_multi_data = io.mem_grant.bits.hasMultibeatData() val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles) val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done) val sec_rdy = idx_match && (state.isOneOf(states_before_refill) || (state.isOneOf(s_refill_req, s_refill_resp) && !cmd_requires_second_acquire && !refill_done)) val rpq = Module(new Queue(new ReplayInternal(cfg), cfg.nRPQ)) rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(io.req_bits.cmd) rpq.io.enq.bits := io.req_bits rpq.io.deq.ready := (io.replay.ready && state === s_drain_rpq) || state === s_invalid val coh_on_grant = req.old_meta.coh.onGrant( incoming = io.mem_grant.bits, pending = Mux(dirties_coh, M_XWR, req.cmd)) val coh_on_hit = io.req_bits.old_meta.coh.onHit(io.req_bits.cmd) when (state === s_drain_rpq && !rpq.io.deq.valid) { state := s_invalid } when (state === s_meta_write_resp) { // this wait state allows us to catch RAW hazards on the tags via nack_victim state := s_drain_rpq } when (state === s_meta_write_req && io.meta_write.ready) { state := s_meta_write_resp } when (state === s_refill_resp && refill_done) { state := s_meta_write_req new_coh_state := coh_on_grant } when (io.mem_req.fire()) { // s_refill_req state := s_refill_resp } when (state === s_meta_clear && io.meta_write.ready) { state := s_refill_req } when (state === s_wb_resp && io.mem_grant.valid) { state := s_meta_clear } when (io.wb_req.fire()) { // s_wb_req state := Mux(io.wb_req.bits.requiresAck(), s_wb_resp, s_meta_clear) } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req //If we get a secondary miss that needs more permissions before we've sent // out the primary miss's Acquire, we can upgrade the permissions we're // going to ask for in s_refill_req when(cmd_requires_second_acquire) { req.cmd := io.req_bits.cmd } dirties_coh := dirties_coh || isWrite(io.req_bits.cmd) } when (io.req_pri_val && io.req_pri_rdy) { val coh = io.req_bits.old_meta.coh req := io.req_bits dirties_coh := isWrite(io.req_bits.cmd) when (io.req_bits.tag_match) { when(coh.isHit(io.req_bits.cmd)) { // set dirty bit state := s_meta_write_req new_coh_state := coh_on_hit }.otherwise { // upgrade permissions state := s_refill_req } }.otherwise { // writback if necessary and refill state := Mux(coh.requiresVoluntaryWriteback(), s_wb_req, s_meta_clear) } } val fq = Module(new FinishQueue(1)) val g = io.mem_grant.bits val can_finish = state.isOneOf(s_invalid, s_refill_req) fq.io.enq.valid := io.mem_grant.valid && g.requiresAck() && refill_done fq.io.enq.bits := g.makeFinish() io.mem_finish.valid := fq.io.deq.valid && can_finish fq.io.deq.ready := io.mem_finish.ready && can_finish io.mem_finish.bits := fq.io.deq.bits io.idx_match := (state =/= s_invalid) && idx_match io.refill.way_en := req.way_en io.refill.addr := ((req_idx << log2Ceil(refillCycles)) | refill_cnt) << rowOffBits io.tag := req.addr >> untagBits io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready val meta_hazard = Reg(init=UInt(0,2)) when (meta_hazard =/= UInt(0)) { meta_hazard := meta_hazard + 1 } when (io.meta_write.fire()) { meta_hazard := 1 } io.probe_rdy := !idx_match || (!state.isOneOf(states_before_refill) && meta_hazard === 0) io.meta_write.valid := state.isOneOf(s_meta_write_req, s_meta_clear) io.meta_write.bits.idx := req_idx io.meta_write.bits.data.coh := Mux(state === s_meta_clear, req.old_meta.coh.onCacheControl(M_FLUSH), new_coh_state) io.meta_write.bits.data.tag := io.tag io.meta_write.bits.way_en := req.way_en io.wb_req.valid := state === s_wb_req io.wb_req.bits := req.old_meta.coh.makeVoluntaryWriteback( client_xact_id = UInt(id), addr_block = Cat(req.old_meta.tag, req_idx)) io.wb_req.bits.way_en := req.way_en io.mem_req.valid := state === s_refill_req && fq.io.enq.ready io.mem_req.bits := req.old_meta.coh.makeAcquire( addr_block = Cat(io.tag, req_idx), client_xact_id = Bits(id), op_code = req.cmd) io.meta_read.valid := state === s_drain_rpq io.meta_read.bits.idx := req_idx io.meta_read.bits.tag := io.tag io.replay.valid := state === s_drain_rpq && rpq.io.deq.valid io.replay.bits := rpq.io.deq.bits io.replay.bits.phys := Bool(true) io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(blockOffBits-1,0)) when (!io.meta_read.ready) { rpq.io.deq.ready := Bool(false) io.replay.bits.cmd := M_FLUSH_ALL /* nop */ } } class MSHRFile(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req = Decoupled(new MSHRReq).flip val resp = Decoupled(new HellaCacheResp) val secondary_miss = Bool(OUTPUT) val mem_req = Decoupled(new Acquire) val refill = new L1RefillReq().asOutput val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new Replay) val mem_grant = Valid(new GrantFromSrc).flip val mem_finish = Decoupled(new FinishToDst) val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) val fence_rdy = Bool(OUTPUT) val replay_next = Bool(OUTPUT) } // determine if the request is cacheable or not val cacheable = addrMap.isCacheable(io.req.bits.addr) val sdq_val = Reg(init=Bits(0, cfg.nSDQ)) val sdq_alloc_id = PriorityEncoder(~sdq_val(cfg.nSDQ-1,0)) val sdq_rdy = !sdq_val.andR val sdq_enq = io.req.valid && io.req.ready && cacheable && isWrite(io.req.bits.cmd) val sdq = Mem(cfg.nSDQ, io.req.bits.data) when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } val idxMatch = Wire(Vec(cfg.nMSHRs, Bool())) val tagList = Wire(Vec(cfg.nMSHRs, Bits(width = tagBits))) val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits val wbTagList = Wire(Vec(cfg.nMSHRs, Bits())) val refillMux = Wire(Vec(cfg.nMSHRs, new L1RefillReq)) val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, cfg.nMSHRs)) val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, cfg.nMSHRs)) val mem_req_arb = Module(new LockingArbiter( new Acquire, cfg.nMSHRs + nIOMSHRs, outerDataBeats, Some((a: Acquire) => a.hasMultibeatData()))) val mem_finish_arb = Module(new Arbiter(new FinishToDst, cfg.nMSHRs + nIOMSHRs)) val wb_req_arb = Module(new Arbiter(new WritebackReq, cfg.nMSHRs)) val replay_arb = Module(new Arbiter(new ReplayInternal(cfg), cfg.nMSHRs)) val alloc_arb = Module(new Arbiter(Bool(), cfg.nMSHRs)) var idx_match = Bool(false) var pri_rdy = Bool(false) var sec_rdy = Bool(false) io.fence_rdy := true io.probe_rdy := true for (i <- 0 until cfg.nMSHRs) { val mshr = Module(new MSHR(i)(cfg)) idxMatch(i) := mshr.io.idx_match tagList(i) := mshr.io.tag wbTagList(i) := mshr.io.wb_req.bits.addr_block >> idxBits alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy mshr.io.req_pri_val := alloc_arb.io.in(i).ready mshr.io.req_sec_val := io.req.valid && sdq_rdy && tag_match mshr.io.req_bits := io.req.bits mshr.io.req_bits.sdq_id := sdq_alloc_id meta_read_arb.io.in(i) <> mshr.io.meta_read meta_write_arb.io.in(i) <> mshr.io.meta_write mem_req_arb.io.in(i) <> mshr.io.mem_req mem_finish_arb.io.in(i) <> mshr.io.mem_finish wb_req_arb.io.in(i) <> mshr.io.wb_req replay_arb.io.in(i) <> mshr.io.replay mshr.io.mem_grant.valid := io.mem_grant.valid && io.mem_grant.bits.client_xact_id === UInt(i) mshr.io.mem_grant.bits := io.mem_grant.bits refillMux(i) := mshr.io.refill pri_rdy = pri_rdy || mshr.io.req_pri_rdy sec_rdy = sec_rdy || mshr.io.req_sec_rdy idx_match = idx_match || mshr.io.idx_match when (!mshr.io.req_pri_rdy) { io.fence_rdy := false } when (!mshr.io.probe_rdy) { io.probe_rdy := false } } alloc_arb.io.out.ready := io.req.valid && sdq_rdy && cacheable && !idx_match io.meta_read <> meta_read_arb.io.out io.meta_write <> meta_write_arb.io.out io.mem_req <> mem_req_arb.io.out io.mem_finish <> mem_finish_arb.io.out io.wb_req <> wb_req_arb.io.out val mmio_alloc_arb = Module(new Arbiter(Bool(), nIOMSHRs)) val resp_arb = Module(new Arbiter(new HellaCacheResp, nIOMSHRs)) var mmio_rdy = Bool(false) io.replay_next := Bool(false) for (i <- 0 until nIOMSHRs) { val id = cfg.nMSHRs + i val mshr = Module(new IOMSHR(id)) mmio_alloc_arb.io.in(i).valid := mshr.io.req.ready mshr.io.req.valid := mmio_alloc_arb.io.in(i).ready mshr.io.req.bits := io.req.bits mmio_rdy = mmio_rdy || mshr.io.req.ready mem_req_arb.io.in(id) <> mshr.io.acquire mem_finish_arb.io.in(id) <> mshr.io.finish mshr.io.grant.bits := io.mem_grant.bits mshr.io.grant.valid := io.mem_grant.valid && io.mem_grant.bits.client_xact_id === UInt(id) resp_arb.io.in(i) <> mshr.io.resp when (!mshr.io.req.ready) { io.fence_rdy := Bool(false) } when (mshr.io.replay_next) { io.replay_next := Bool(true) } } mmio_alloc_arb.io.out.ready := io.req.valid && !cacheable io.resp <> resp_arb.io.out io.req.ready := Mux(!cacheable, mmio_rdy, Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy) io.secondary_miss := idx_match io.refill := refillMux(io.mem_grant.bits.client_xact_id) val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd) io.replay.bits.data := sdq(RegEnable(replay_arb.io.out.bits.sdq_id, free_sdq)) io.replay <> replay_arb.io.out when (io.replay.valid || sdq_enq) { sdq_val := sdq_val & ~(UIntToOH(replay_arb.io.out.bits.sdq_id) & Fill(cfg.nSDQ, free_sdq)) | PriorityEncoderOH(~sdq_val(cfg.nSDQ-1,0)) & Fill(cfg.nSDQ, sdq_enq) } } class WritebackUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req = Decoupled(new WritebackReq).flip val meta_read = Decoupled(new L1MetaReadReq) val data_req = Decoupled(new L1DataReadReq) val data_resp = Bits(INPUT, encRowBits) val release = Decoupled(new Release) } val active = Reg(init=Bool(false)) val r1_data_req_fired = Reg(init=Bool(false)) val r2_data_req_fired = Reg(init=Bool(false)) val data_req_cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1))) //TODO Zero width val buf_v = (if(refillCyclesPerBeat > 1) Reg(init=Bits(0, width = refillCyclesPerBeat-1)) else Bits(1)) val beat_done = buf_v.andR val (beat_cnt, all_beats_done) = Counter(io.release.fire(), outerDataBeats) val req = Reg(new WritebackReq) io.release.valid := false when (active) { r1_data_req_fired := false r2_data_req_fired := r1_data_req_fired when (io.data_req.fire() && io.meta_read.fire()) { r1_data_req_fired := true data_req_cnt := data_req_cnt + 1 } when (r2_data_req_fired) { io.release.valid := beat_done when(beat_done) { when(!io.release.ready) { r1_data_req_fired := false r2_data_req_fired := false data_req_cnt := data_req_cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1) } .otherwise { if(refillCyclesPerBeat > 1) buf_v := 0 } } when(!r1_data_req_fired) { // We're done if this is the final data request and the Release can be sent active := data_req_cnt < UInt(refillCycles) || !io.release.ready } } } when (io.req.fire()) { active := true data_req_cnt := 0 if(refillCyclesPerBeat > 1) buf_v := 0 req := io.req.bits } io.req.ready := !active val req_idx = req.addr_block(idxBits-1, 0) val fire = active && data_req_cnt < UInt(refillCycles) // We reissue the meta read as it sets up the mux ctrl for s2_data_muxed io.meta_read.valid := fire io.meta_read.bits.idx := req_idx io.meta_read.bits.tag := req.addr_block >> idxBits io.data_req.valid := fire io.data_req.bits.way_en := req.way_en io.data_req.bits.addr := (if(refillCycles > 1) Cat(req_idx, data_req_cnt(log2Up(refillCycles)-1,0)) else req_idx) << rowOffBits io.release.bits := req io.release.bits.addr_beat := beat_cnt io.release.bits.data := (if(refillCyclesPerBeat > 1) { // If the cache rows are narrower than a TLDataBeat, // then buffer enough data_resps to make a whole beat val data_buf = Reg(Bits()) when(active && r2_data_req_fired && !beat_done) { data_buf := Cat(io.data_resp, data_buf((refillCyclesPerBeat)*encRowBits-1, encRowBits)) buf_v := (if(refillCyclesPerBeat > 2) Cat(UInt(1), buf_v(refillCyclesPerBeat-2,1)) else UInt(1)) } Cat(io.data_resp, data_buf) } else { io.data_resp }) } class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req = Decoupled(new ProbeInternal).flip val rep = Decoupled(new Release) val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val wb_req = Decoupled(new WritebackReq) val way_en = Bits(INPUT, nWays) val mshr_rdy = Bool(INPUT) val block_state = new ClientMetadata().asInput } val (s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_mshr_resp :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil) = Enum(UInt(), 9) val state = Reg(init=s_invalid) val old_coh = Reg(new ClientMetadata) val way_en = Reg(Bits()) val req = Reg(new ProbeInternal) val tag_matches = way_en.orR val miss_coh = ClientMetadata.onReset val reply_coh = Mux(tag_matches, old_coh, miss_coh) val reply = reply_coh.makeRelease(req) io.req.ready := state === s_invalid io.rep.valid := state === s_release io.rep.bits := reply assert(!io.rep.valid || !io.rep.bits.hasData(), "ProbeUnit should not send releases with data") io.meta_read.valid := state === s_meta_read io.meta_read.bits.idx := req.addr_block io.meta_read.bits.tag := req.addr_block >> idxBits io.meta_write.valid := state === s_meta_write io.meta_write.bits.way_en := way_en io.meta_write.bits.idx := req.addr_block io.meta_write.bits.data.tag := req.addr_block >> idxBits io.meta_write.bits.data.coh := old_coh.onProbe(req) io.wb_req.valid := state === s_writeback_req io.wb_req.bits := reply io.wb_req.bits.way_en := way_en // state === s_invalid when (io.req.fire()) { state := s_meta_read req := io.req.bits } // state === s_meta_read when (io.meta_read.fire()) { state := s_meta_resp } // we need to wait one cycle for the metadata to be read from the array when (state === s_meta_resp) { state := s_mshr_req } when (state === s_mshr_req) { state := s_mshr_resp old_coh := io.block_state way_en := io.way_en // if the read didn't go through, we need to retry when (!io.mshr_rdy) { state := s_meta_read } } when (state === s_mshr_resp) { val needs_writeback = tag_matches && old_coh.requiresVoluntaryWriteback() state := Mux(needs_writeback, s_writeback_req, s_release) } when (state === s_release && io.rep.ready) { state := Mux(tag_matches, s_meta_write, s_invalid) } // state === s_writeback_req when (io.wb_req.fire()) { state := s_writeback_resp } // wait for the writeback request to finish before updating the metadata when (state === s_writeback_resp && io.wb_req.ready) { state := s_meta_write } when (io.meta_write.fire()) { state := s_invalid } } class DataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val read = Decoupled(new L1DataReadReq).flip val write = Decoupled(new L1DataWriteReq).flip val resp = Vec(nWays, Bits(OUTPUT, encRowBits)) } val waddr = io.write.bits.addr >> rowOffBits val raddr = io.read.bits.addr >> rowOffBits if (doNarrowRead) { for (w <- 0 until nWays by rowWords) { val wway_en = io.write.bits.way_en(w+rowWords-1,w) val rway_en = io.read.bits.way_en(w+rowWords-1,w) val resp = Wire(Vec(rowWords, Bits(width = encRowBits))) val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { val array = SeqMem(nSets*refillCycles, Vec(rowWords, Bits(width=encDataBits))) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { val data = Vec.fill(rowWords)(io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) array.write(waddr, data, wway_en.toBools) } resp(p) := array.read(raddr, rway_en.orR && io.read.valid).asUInt } for (dw <- 0 until rowWords) { val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw))) val resp_mux = if (r.size == 1) r else Vec(r(r_raddr(rowOffBits-1,wordOffBits)), r.tail:_*) io.resp(w+dw) := resp_mux.asUInt } } } else { for (w <- 0 until nWays) { val array = SeqMem(nSets*refillCycles, Vec(rowWords, Bits(width=encDataBits))) when (io.write.bits.way_en(w) && io.write.valid) { val data = Vec.tabulate(rowWords)(i => io.write.bits.data(encDataBits*(i+1)-1,encDataBits*i)) array.write(waddr, data, io.write.bits.wmask.toBools) } io.resp(w) := array.read(raddr, io.read.bits.way_en(w) && io.read.valid).asUInt } } io.read.ready := Bool(true) io.write.ready := Bool(true) } class HellaCache(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val cpu = (new HellaCacheIO).flip val ptw = new TLBPTWIO() val mem = new ClientTileLinkIO } require(isPow2(nWays)) // TODO: relax this val wb = Module(new WritebackUnit) val prober = Module(new ProbeUnit) val mshrs = Module(new MSHRFile(cfg)) io.cpu.req.ready := Bool(true) val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) val s1_req = Reg(io.cpu.req.bits) val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR val s1_replay = Reg(init=Bool(false)) val s1_clk_en = Reg(Bool()) val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) val s2_req = Reg(io.cpu.req.bits) val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL val s2_recycle = Wire(Bool()) val s2_valid_masked = Wire(Bool()) val s3_valid = Reg(init=Bool(false)) val s3_req = Reg(io.cpu.req.bits) val s3_way = Reg(Bits()) val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en) val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) val dtlb = Module(new TLB) io.ptw <> dtlb.io.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite dtlb.io.req.bits.passthrough := s1_req.phys dtlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits dtlb.io.req.bits.instruction := Bool(false) dtlb.io.req.bits.store := s1_write when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) } when (io.cpu.req.valid) { s1_req := io.cpu.req.bits } when (wb.io.meta_read.valid) { s1_req.addr := Cat(wb.io.meta_read.bits.tag, wb.io.meta_read.bits.idx) << blockOffBits s1_req.phys := Bool(true) } when (prober.io.meta_read.valid) { s1_req.addr := Cat(prober.io.meta_read.bits.tag, prober.io.meta_read.bits.idx) << blockOffBits s1_req.phys := Bool(true) } when (mshrs.io.replay.valid) { s1_req := mshrs.io.replay.bits } when (s2_recycle) { s1_req := s2_req } val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) when (s1_clk_en) { s2_req.typ := s1_req.typ s2_req.phys := s1_req.phys s2_req.addr := s1_addr when (s1_write) { s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data) } when (s1_recycled) { s2_req.data := s1_req.data } s2_req.tag := s1_req.tag s2_req.cmd := s1_req.cmd } val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned io.cpu.xcpt.ma.ld := s1_read && misaligned io.cpu.xcpt.ma.st := s1_write && misaligned io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st // tags def onReset = L1Metadata(UInt(0), ClientMetadata.onReset) val meta = Module(new MetadataArray(onReset _)) val metaReadArb = Module(new Arbiter(new MetaReadReq, 5)) val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2)) meta.io.read <> metaReadArb.io.out meta.io.write <> metaWriteArb.io.out // data val data = Module(new DataArray) val readArb = Module(new Arbiter(new L1DataReadReq, 4)) val writeArb = Module(new Arbiter(new L1DataWriteReq, 2)) data.io.write.valid := writeArb.io.out.valid writeArb.io.out.ready := data.io.write.ready data.io.write.bits := writeArb.io.out.bits val wdata_encoded = (0 until rowWords).map(i => code.encode(writeArb.io.out.bits.data(coreDataBits*(i+1)-1,coreDataBits*i))) data.io.write.bits.data := wdata_encoded.asUInt // tag read for new requests metaReadArb.io.in(4).valid := io.cpu.req.valid metaReadArb.io.in(4).bits.idx := io.cpu.req.bits.addr >> blockOffBits when (!metaReadArb.io.in(4).ready) { io.cpu.req.ready := Bool(false) } // data read for new requests readArb.io.in(3).valid := io.cpu.req.valid readArb.io.in(3).bits.addr := io.cpu.req.bits.addr readArb.io.in(3).bits.way_en := ~UInt(0, nWays) when (!readArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } // recycled requests metaReadArb.io.in(0).valid := s2_recycle metaReadArb.io.in(0).bits.idx := s2_req.addr >> blockOffBits readArb.io.in(0).valid := s2_recycle readArb.io.in(0).bits.addr := s2_req.addr readArb.io.in(0).bits.way_en := ~UInt(0, nWays) // tag check and way muxing def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f)) val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> untagBits)).asUInt val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta.io.resp(w).coh.isValid()).asUInt s1_clk_en := metaReadArb.io.out.valid //TODO: should be metaReadArb.io.out.fire(), but triggers Verilog backend bug val s1_writeback = s1_clk_en && !s1_valid && !s1_replay val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en))) val s2_hit = s2_tag_match && s2_hit_state.isHit(s2_req.cmd) && s2_hit_state === s2_hit_state.onHit(s2_req.cmd) // load-reserved/store-conditional val lrsc_count = Reg(init=UInt(0)) val lrsc_valid = lrsc_count.orR val lrsc_addr = Reg(UInt()) val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC) val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> blockOffBits) val s2_sc_fail = s2_sc && !s2_lrsc_addr_match when (lrsc_valid) { lrsc_count := lrsc_count - 1 } when (s2_valid_masked && s2_hit || s2_replay) { when (s2_lr) { when (!lrsc_valid) { lrsc_count := lrscCycles-1 } lrsc_addr := s2_req.addr >> blockOffBits } when (s2_sc) { lrsc_count := 0 } } when (io.cpu.invalidate_lr) { lrsc_count := 0 } val s2_data = Wire(Vec(nWays, Bits(width=encRowBits))) for (w <- 0 until nWays) { val regs = Reg(Vec(rowWords, Bits(width = encDataBits))) val en1 = s1_clk_en && s1_tag_eq_way(w) for (i <- 0 until regs.size) { val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback) when (en) { regs(i) := data.io.resp(w) >> encDataBits*i } } s2_data(w) := regs.asUInt } val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data) val s2_data_decoded = (0 until rowWords).map(i => code.decode(s2_data_muxed(encDataBits*(i+1)-1,encDataBits*i))) val s2_data_corrected = s2_data_decoded.map(_.corrected).asUInt val s2_data_uncorrected = s2_data_decoded.map(_.uncorrected).asUInt val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,log2Up(wordBytes)) val s2_data_correctable = s2_data_decoded.map(_.correctable).asUInt()(s2_word_idx) // store/amo hits s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd) val amoalu = Module(new AMOALU(xLen)) when ((s2_valid || s2_replay) && (isWrite(s2_req.cmd) || s2_data_correctable)) { s3_req := s2_req s3_req.data := Mux(s2_data_correctable, s2_data_corrected, amoalu.io.out) s3_way := s2_tag_match_way } writeArb.io.in(0).bits.addr := s3_req.addr writeArb.io.in(0).bits.wmask := UIntToOH(s3_req.addr.extract(rowOffBits-1,offsetlsb)) writeArb.io.in(0).bits.data := Fill(rowWords, s3_req.data) writeArb.io.in(0).valid := s3_valid writeArb.io.in(0).bits.way_en := s3_way // replacement policy val replacer = p(Replacer)() val s1_replaced_way_en = UIntToOH(replacer.way) val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en)) val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) // miss handling mshrs.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) mshrs.io.req.bits := s2_req mshrs.io.req.bits.tag_match := s2_tag_match mshrs.io.req.bits.old_meta := Mux(s2_tag_match, L1Metadata(s2_repl_meta.tag, s2_hit_state), s2_repl_meta) mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshrs.io.req.bits.data := s2_req.data when (mshrs.io.req.fire()) { replacer.miss } io.mem.acquire <> mshrs.io.mem_req // replays readArb.io.in(1).valid := mshrs.io.replay.valid readArb.io.in(1).bits := mshrs.io.replay.bits readArb.io.in(1).bits.way_en := ~UInt(0, nWays) mshrs.io.replay.ready := readArb.io.in(1).ready s1_replay := mshrs.io.replay.valid && readArb.io.in(1).ready metaReadArb.io.in(1) <> mshrs.io.meta_read metaWriteArb.io.in(0) <> mshrs.io.meta_write // probes and releases val releaseArb = Module(new LockingArbiter( new Release, 2, outerDataBeats, Some((r: Release) => r.hasMultibeatData()))) io.mem.release <> releaseArb.io.out prober.io.req.valid := io.mem.probe.valid && !lrsc_valid io.mem.probe.ready := prober.io.req.ready && !lrsc_valid prober.io.req.bits := io.mem.probe.bits releaseArb.io.in(1) <> prober.io.rep prober.io.way_en := s2_tag_match_way prober.io.block_state := s2_hit_state metaReadArb.io.in(2) <> prober.io.meta_read metaWriteArb.io.in(1) <> prober.io.meta_write prober.io.mshr_rdy := mshrs.io.probe_rdy // refills val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat) mshrs.io.mem_grant.valid := narrow_grant.fire() mshrs.io.mem_grant.bits := narrow_grant.bits narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData() /* The last clause here is necessary in order to prevent the responses for * the IOMSHRs from being written into the data array. It works because the * IOMSHR ids start right the ones for the regular MSHRs. */ writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() && narrow_grant.bits.client_xact_id < UInt(cfg.nMSHRs) writeArb.io.in(1).bits.addr := mshrs.io.refill.addr writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en writeArb.io.in(1).bits.wmask := ~UInt(0, rowWords) writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0) data.io.read <> readArb.io.out readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked io.mem.finish <> mshrs.io.mem_finish // writebacks val wbArb = Module(new Arbiter(new WritebackReq, 2)) wbArb.io.in(0) <> prober.io.wb_req wbArb.io.in(1) <> mshrs.io.wb_req wb.io.req <> wbArb.io.out metaReadArb.io.in(3) <> wb.io.meta_read readArb.io.in(2) <> wb.io.data_req wb.io.data_resp := s2_data_corrected releaseArb.io.in(0) <> wb.io.release // store->load bypassing val s4_valid = Reg(next=s3_valid, init=Bool(false)) val s4_req = RegEnable(s3_req, s3_valid && metaReadArb.io.out.valid) val bypasses = List( ((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out), (s3_valid, s3_req, s3_req.data), (s4_valid, s4_req, s4_req.data) ).map(r => (r._1 && (s1_addr >> wordOffBits === r._2.addr >> wordOffBits) && isWrite(r._2.cmd), r._3)) val s2_store_bypass_data = Reg(Bits(width = coreDataBits)) val s2_store_bypass = Reg(Bool()) when (s1_clk_en) { s2_store_bypass := false when (bypasses.map(_._1).reduce(_||_)) { s2_store_bypass_data := PriorityMux(bypasses) s2_store_bypass := true } } // load data subword mux/sign extension val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word, s2_sc, wordBytes) amoalu.io.addr := s2_req.addr amoalu.io.cmd := s2_req.cmd amoalu.io.typ := s2_req.typ amoalu.io.lhs := s2_data_word amoalu.io.rhs := s2_req.data // nack it like it's hot val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || s1_req.addr(idxMSB,idxLSB) === prober.io.meta_write.bits.idx && !prober.io.req.ready val s2_nack_hit = RegEnable(s1_nack, s1_valid || s1_replay) when (s2_nack_hit) { mshrs.io.req.valid := Bool(false) } val s2_nack_victim = s2_hit && mshrs.io.secondary_miss val s2_nack_miss = !s2_hit && !mshrs.io.req.ready val s2_nack = s2_nack_hit || s2_nack_victim || s2_nack_miss s2_valid_masked := s2_valid && !s2_nack val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable val s2_recycle_next = Reg(init=Bool(false)) when (s1_valid || s1_replay) { s2_recycle_next := s2_recycle_ecc } s2_recycle := s2_recycle_ecc || s2_recycle_next // after a nack, block until nack condition resolves to save energy val block_miss = Reg(init=Bool(false)) block_miss := (s2_valid || block_miss) && s2_nack_miss when (block_miss) { io.cpu.req.ready := Bool(false) } val cache_resp = Wire(Valid(new HellaCacheResp)) cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable cache_resp.bits := s2_req cache_resp.bits.has_data := isRead(s2_req.cmd) cache_resp.bits.data := loadgen.data | s2_sc_fail cache_resp.bits.store_data := s2_req.data cache_resp.bits.replay := s2_replay val uncache_resp = Wire(Valid(new HellaCacheResp)) uncache_resp.bits := mshrs.io.resp.bits uncache_resp.valid := mshrs.io.resp.valid mshrs.io.resp.ready := Reg(next= !(s1_valid || s1_replay)) io.cpu.s2_nack := s2_valid && s2_nack io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp) io.cpu.resp.bits.data_word_bypass := loadgen.wordData io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next } /** * This module buffers requests made by the SimpleHellaCacheIF in case they * are nacked. Nacked requests must be replayed in order, and no other requests * must be allowed to go through until the replayed requests are successfully * completed. */ class SimpleHellaCacheIFReplayQueue(depth: Int) (implicit val p: Parameters) extends Module with HasL1HellaCacheParameters { val io = new Bundle { val req = Decoupled(new HellaCacheReq).flip val nack = Valid(Bits(width = coreDCacheReqTagBits)).flip val resp = Valid(new HellaCacheResp).flip val replay = Decoupled(new HellaCacheReq) } // Registers to store the sent request // When a request is sent the first time, // it is stored in one of the reqs registers // and the corresponding inflight bit is set. // The reqs register will be deallocated once the request is // successfully completed. val inflight = Reg(init = UInt(0, depth)) val reqs = Reg(Vec(depth, new HellaCacheReq)) // The nack queue stores the index of nacked requests (in the reqs vector) // in the order that they were nacked. A request is enqueued onto nackq // when it is newly nacked (i.e. not a nack for a previous replay). // The head of the nack queue will be replayed until it is // successfully completed, at which time the request is dequeued. // No new requests will be made or other replays attempted until the head // of the nackq is successfully completed. val nackq = Module(new Queue(UInt(width = log2Up(depth)), depth)) val replaying = Reg(init = Bool(false)) val next_inflight_onehot = PriorityEncoderOH(~inflight) val next_inflight = OHToUInt(next_inflight_onehot) val next_replay = nackq.io.deq.bits val next_replay_onehot = UIntToOH(next_replay) val next_replay_req = reqs(next_replay) // Keep sending the head of the nack queue until it succeeds io.replay.valid := nackq.io.deq.valid && !replaying io.replay.bits := next_replay_req // Don't allow new requests if there is are replays waiting // or something being nacked. io.req.ready := !inflight.andR && !nackq.io.deq.valid && !io.nack.valid // Match on the tags to determine the index of nacks or responses val nack_onehot = Cat(reqs.map(_.tag === io.nack.bits).reverse) & inflight val resp_onehot = Cat(reqs.map(_.tag === io.resp.bits.tag).reverse) & inflight val replay_complete = io.resp.valid && replaying && io.resp.bits.tag === next_replay_req.tag val nack_head = io.nack.valid && nackq.io.deq.valid && io.nack.bits === next_replay_req.tag // Enqueue to the nack queue if there is a nack that is not in response to // the previous replay nackq.io.enq.valid := io.nack.valid && !nack_head nackq.io.enq.bits := OHToUInt(nack_onehot) assert(!nackq.io.enq.valid || nackq.io.enq.ready, "SimpleHellaCacheIF: ReplayQueue nack queue overflow") // Dequeue from the nack queue if the last replay was successfully completed nackq.io.deq.ready := replay_complete assert(!nackq.io.deq.ready || nackq.io.deq.valid, "SimpleHellaCacheIF: ReplayQueue nack queue underflow") // Set inflight bit when a request is made // Clear it when it is successfully completed inflight := (inflight | Mux(io.req.fire(), next_inflight_onehot, UInt(0))) & ~Mux(io.resp.valid, resp_onehot, UInt(0)) when (io.req.fire()) { reqs(next_inflight) := io.req.bits } // Only one replay outstanding at a time when (io.replay.fire()) { replaying := Bool(true) } when (nack_head || replay_complete) { replaying := Bool(false) } } // exposes a sane decoupled request interface class SimpleHellaCacheIF(implicit p: Parameters) extends Module { val io = new Bundle { val requestor = new HellaCacheIO().flip val cache = new HellaCacheIO } val replayq = Module(new SimpleHellaCacheIFReplayQueue(2)) val req_arb = Module(new Arbiter(new HellaCacheReq, 2)) val req_helper = DecoupledHelper( req_arb.io.in(1).ready, replayq.io.req.ready, io.requestor.req.valid) req_arb.io.in(0) <> replayq.io.replay req_arb.io.in(1).valid := req_helper.fire(req_arb.io.in(1).ready) req_arb.io.in(1).bits := io.requestor.req.bits io.requestor.req.ready := req_helper.fire(io.requestor.req.valid) replayq.io.req.valid := req_helper.fire(replayq.io.req.ready) replayq.io.req.bits := io.requestor.req.bits val s0_req_fire = io.cache.req.fire() val s1_req_fire = Reg(next = s0_req_fire) val s2_req_fire = Reg(next = s1_req_fire) val s1_req_tag = Reg(next = io.cache.req.bits.tag) val s2_req_tag = Reg(next = s1_req_tag) val s2_kill = Reg(next = io.cache.s1_kill) io.cache.invalidate_lr := io.requestor.invalidate_lr io.cache.req <> req_arb.io.out io.cache.req.bits.phys := Bool(true) io.cache.s1_kill := io.cache.s2_nack io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) replayq.io.nack.valid := (io.cache.s2_nack || s2_kill) && s2_req_fire replayq.io.nack.bits := s2_req_tag replayq.io.resp := io.cache.resp io.requestor.resp := io.cache.resp assert(!Reg(next = io.cache.req.fire()) || !(io.cache.xcpt.ma.ld || io.cache.xcpt.ma.st || io.cache.xcpt.pf.ld || io.cache.xcpt.pf.st), "SimpleHellaCacheIF exception") } object HellaCache { def apply(cfg: DCacheConfig)(implicit p: Parameters) = if (cfg.nMSHRs == 0) Module(new DCache()).io else Module(new HellaCache(cfg)).io }