From afa1a6d549c5d05f8753d30594b9ff86986aaf94 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 10 Nov 2016 15:56:42 -0800 Subject: [PATCH] WIP uncore and rocket changes compile --- src/main/scala/groundtest/Tile.scala | 32 +- src/main/scala/rocket/dcache.scala | 884 +++++++++--------- src/main/scala/rocket/nbdcache.scala | 66 +- src/main/scala/rocket/tile.scala | 79 +- src/main/scala/uncore/agents/Cache.scala | 1 - src/main/scala/uncore/tilelink2/Bundles.scala | 8 + src/main/scala/uncore/tilelink2/Edges.scala | 13 +- .../scala/uncore/tilelink2/Metadata.scala | 149 +++ src/main/scala/util/Misc.scala | 17 + 9 files changed, 713 insertions(+), 536 deletions(-) create mode 100644 src/main/scala/uncore/tilelink2/Metadata.scala diff --git a/src/main/scala/groundtest/Tile.scala b/src/main/scala/groundtest/Tile.scala index f0973dc4..2d778948 100644 --- a/src/main/scala/groundtest/Tile.scala +++ b/src/main/scala/groundtest/Tile.scala @@ -3,6 +3,9 @@ package groundtest import Chisel._ import rocket._ import uncore.tilelink._ +import uncore.agents.CacheName +import uncore.tilelink2._ +import diplomacy._ import scala.util.Random import scala.collection.mutable.ListBuffer import junctions.HasAddrMapParameters @@ -96,20 +99,25 @@ abstract class GroundTest(implicit val p: Parameters) extends Module val io = new GroundTestIO } -class GroundTestTile(implicit val p: Parameters) extends LazyTile { +class GroundTestTile(implicit val p: Parameters) extends LazyModule with HasGroundTestParameters { + val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) val slave = None - lazy val module = new TileImp(this) with HasGroundTestParameters { - val io = new TileIO(bc) { + val dcache = HellaCache(p(DCacheKey))(dcacheParams) + val ucLegacy = LazyModule(new TLLegacy()(p)) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val cached = dcache.node.bundleOut + val uncached = ucLegacy.node.bundleOut val success = Bool(OUTPUT) } val test = p(BuildGroundTest)(dcacheParams) val ptwPorts = ListBuffer.empty ++= test.io.ptw - val memPorts = ListBuffer.empty ++= test.io.mem + val uncachedArbPorts = ListBuffer.empty ++= test.io.mem if (nCached > 0) { - val dcache_io = HellaCache(p(DCacheKey))(dcacheParams) val dcacheArb = Module(new HellaCacheArbiter(nCached)(dcacheParams)) dcacheArb.io.requestor.zip(test.io.cache).foreach { @@ -118,13 +126,12 @@ class GroundTestTile(implicit val p: Parameters) extends LazyTile { dcacheIF.io.requestor <> cache requestor <> dcacheIF.io.cache } - dcache_io.cpu <> dcacheArb.io.mem - io.cached.head <> dcache_io.mem + dcache.module.io.cpu <> dcacheArb.io.mem // SimpleHellaCacheIF leaves invalidate_lr dangling, so we wire it to false - dcache_io.cpu.invalidate_lr := Bool(false) + dcache.module.io.cpu.invalidate_lr := Bool(false) - ptwPorts += dcache_io.ptw + ptwPorts += dcache.module.io.ptw } if (ptwPorts.size > 0) { @@ -132,10 +139,9 @@ class GroundTestTile(implicit val p: Parameters) extends LazyTile { ptw.io.requestors <> ptwPorts } - require(memPorts.size == io.uncached.size) - if (memPorts.size > 0) { - io.uncached <> memPorts - } + val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size)) + uncachedArb.io.in <> uncachedArbPorts + ucLegacy.module.io.legacy <> uncachedArb.io.out io.success := test.io.status.finished } diff --git a/src/main/scala/rocket/dcache.scala b/src/main/scala/rocket/dcache.scala index 8e908dcd..d60dc1ad 100644 --- a/src/main/scala/rocket/dcache.scala +++ b/src/main/scala/rocket/dcache.scala @@ -5,13 +5,12 @@ package rocket import Chisel._ import junctions._ import diplomacy._ -import uncore.tilelink._ import uncore.tilelink2._ -import uncore.agents._ -import uncore.coherence._ import uncore.constants._ +import uncore.agents._ import uncore.util._ import util._ +import TLMessages._ import Chisel.ImplicitConversions._ import cde.{Parameters, Field} @@ -41,458 +40,473 @@ class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { } } -class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { - val io = new Bundle { - val cpu = (new HellaCacheIO).flip - val ptw = new TLBPTWIO() - val mem = new ClientTileLinkIO - } +class DCache(maxUncachedInFlight: Int = 2)(implicit val p: Parameters) extends LazyModule with HasL1HellaCacheParameters { - val fq = Module(new FinishQueue(1)) + val node = TLClientNode(TLClientParameters(supportsProbe = TransferSizes(cacheBlockBytes))) - require(rowBits == encRowBits) // no ECC - require(refillCyclesPerBeat == 1) - require(rowBits >= coreDataBits) + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val cpu = (new HellaCacheIO).flip + val ptw = new TLBPTWIO() + val mem = node.bundleOut + } - // tags - val replacer = p(Replacer)() - def onReset = L1Metadata(UInt(0), ClientMetadata.onReset) - val metaReadArb = Module(new Arbiter(new MetaReadReq, 3)) - val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 3)) + val edge = node.edgesOut(0) + val tl_out = io.mem(0) - // data - val data = Module(new DCacheDataArray) - val dataArb = Module(new Arbiter(new DCacheDataReq, 4)) - data.io.req <> dataArb.io.out - dataArb.io.out.ready := true + val grantackq = Module(new Queue(tl_out.e.bits,1)) - val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) - val s1_probe = Reg(next=io.mem.probe.fire(), init=Bool(false)) - val probe_bits = RegEnable(io.mem.probe.bits, io.mem.probe.fire()) - val s1_nack = Wire(init=Bool(false)) - val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR - val s1_valid_not_nacked = s1_valid_masked && !s1_nack - val s1_req = Reg(io.cpu.req.bits) - when (metaReadArb.io.out.valid) { - s1_req := io.cpu.req.bits - s1_req.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaReadArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0)) - } - val s1_read = isRead(s1_req.cmd) - val s1_write = isWrite(s1_req.cmd) - val s1_readwrite = s1_read || s1_write - val s1_flush_valid = Reg(Bool()) + require(rowBits == encRowBits) // no ECC + require(refillCyclesPerBeat == 1) + require(rowBits >= coreDataBits) - val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 7) - val cached_grant_wait = Reg(init=Bool(false)) - val release_ack_wait = Reg(init=Bool(false)) - val release_state = Reg(init=s_ready) - val pstore1_valid = Wire(Bool()) - val pstore2_valid = Reg(Bool()) - val inWriteback = release_state.isOneOf(s_voluntary_writeback, s_probe_rep_dirty) - val releaseWay = Wire(UInt()) - io.cpu.req.ready := (release_state === s_ready) && !cached_grant_wait && !s1_nack + // tags + val replacer = p(Replacer)() + def onReset = L1Metadata(UInt(0), ClientMetadata.onReset) + val metaReadArb = Module(new Arbiter(new MetaReadReq, 3)) + val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 3)) - // I/O MSHRs - val maxUncachedInFlight = (1 << io.mem.acquire.bits.client_xact_id.getWidth) - 1 - val uncachedInFlight = Reg(init=Vec.fill(maxUncachedInFlight)(Bool(false))) - val uncachedReqs = Reg(Vec(maxUncachedInFlight, new HellaCacheReq)) + // data + val data = Module(new DCacheDataArray) + val dataArb = Module(new Arbiter(new DCacheDataReq, 4)) + data.io.req <> dataArb.io.out + dataArb.io.out.ready := true - // hit initiation path - dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd) - dataArb.io.in(3).bits.write := false - dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr - dataArb.io.in(3).bits.way_en := ~UInt(0, nWays) - when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false } - metaReadArb.io.in(2).valid := io.cpu.req.valid - metaReadArb.io.in(2).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB) - metaReadArb.io.in(2).bits.way_en := ~UInt(0, nWays) - when (!metaReadArb.io.in(2).ready) { io.cpu.req.ready := false } + val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) + val s1_probe = Reg(next=tl_out.b.fire(), init=Bool(false)) + val probe_bits = RegEnable(tl_out.b.bits, tl_out.b.fire()) // TODO has data now :( + val s1_nack = Wire(init=Bool(false)) + val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR + val s1_valid_not_nacked = s1_valid_masked && !s1_nack + val s1_req = Reg(io.cpu.req.bits) + when (metaReadArb.io.out.valid) { + s1_req := io.cpu.req.bits + s1_req.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaReadArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0)) + } + val s1_read = isRead(s1_req.cmd) + val s1_write = isWrite(s1_req.cmd) + val s1_readwrite = s1_read || s1_write + val s1_flush_valid = Reg(Bool()) - // address translation - val tlb = Module(new TLB) - io.ptw <> tlb.io.ptw - tlb.io.req.valid := s1_valid_masked && s1_readwrite - tlb.io.req.bits.passthrough := s1_req.phys - tlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits - tlb.io.req.bits.instruction := false - tlb.io.req.bits.store := s1_write - when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false } - when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true } + val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 7) + val cached_grant_wait = Reg(init=Bool(false)) + val release_ack_wait = Reg(init=Bool(false)) + val release_state = Reg(init=s_ready) + val pstore1_valid = Wire(Bool()) + val pstore2_valid = Reg(Bool()) + val inWriteback = release_state.isOneOf(s_voluntary_writeback, s_probe_rep_dirty) + val releaseWay = Wire(UInt()) + io.cpu.req.ready := (release_state === s_ready) && !cached_grant_wait && !s1_nack - val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) - val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits)) - val s1_victim_way = Wire(init = replacer.way) - val (s1_hit_way, s1_hit_state, s1_victim_meta) = - if (usingDataScratchpad) { - require(nWays == 1) - metaWriteArb.io.out.ready := true - metaReadArb.io.out.ready := !metaWriteArb.io.out.valid - val inScratchpad = addrMap(s"TL2:dmem${tileId}").containsAddress(s1_paddr) - val hitState = Mux(inScratchpad, ClientMetadata.onReset.onHit(M_XWR), ClientMetadata.onReset) - (inScratchpad, hitState, L1Metadata(UInt(0), ClientMetadata.onReset)) + // I/O MSHRs + val uncachedInFlight = Reg(init=Vec.fill(maxUncachedInFlight)(Bool(false))) + val uncachedReqs = Reg(Vec(maxUncachedInFlight, new HellaCacheReq)) + + // hit initiation path + dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd) + dataArb.io.in(3).bits.write := false + dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr + dataArb.io.in(3).bits.way_en := ~UInt(0, nWays) + when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false } + metaReadArb.io.in(2).valid := io.cpu.req.valid + metaReadArb.io.in(2).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB) + metaReadArb.io.in(2).bits.way_en := ~UInt(0, nWays) + when (!metaReadArb.io.in(2).ready) { io.cpu.req.ready := false } + + // address translation + val tlb = Module(new TLB) + io.ptw <> tlb.io.ptw + tlb.io.req.valid := s1_valid_masked && s1_readwrite + tlb.io.req.bits.passthrough := s1_req.phys + tlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits + tlb.io.req.bits.instruction := false + tlb.io.req.bits.store := s1_write + when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false } + when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true } + + val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) + val s1_tag = Mux(s1_probe, probe_bits.address, s1_paddr)(paddrBits-1, untagBits) + val s1_victim_way = Wire(init = replacer.way) + val (s1_hit_way, s1_hit_state, s1_victim_meta) = + if (usingDataScratchpad) { + require(nWays == 1) + metaWriteArb.io.out.ready := true + metaReadArb.io.out.ready := !metaWriteArb.io.out.valid + val inScratchpad = addrMap(s"TL2:dmem${tileId}").containsAddress(s1_paddr) + val hitState = Mux(inScratchpad, ClientMetadata.maximum, ClientMetadata.onReset) + (inScratchpad, hitState, L1Metadata(UInt(0), ClientMetadata.onReset)) + } else { + val meta = Module(new MetadataArray(onReset _)) + meta.io.read <> metaReadArb.io.out + meta.io.write <> metaWriteArb.io.out + val s1_meta = meta.io.resp + val s1_meta_hit_way = s1_meta.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt + val s1_meta_hit_state = ClientMetadata.onReset.fromBits( + s1_meta.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0))) + .reduce (_|_)) + (s1_meta_hit_way, s1_meta_hit_state, s1_meta(s1_victim_way)) + } + val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way) + val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical + + val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) + val s2_probe = Reg(next=s1_probe, init=Bool(false)) + val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready + val s2_valid_masked = s2_valid && Reg(next = !s1_nack) + val s2_req = Reg(io.cpu.req.bits) + val s2_uncached = Reg(Bool()) + when (s1_valid_not_nacked || s1_flush_valid) { + s2_req := s1_req + s2_req.addr := s1_paddr + s2_uncached := !tlb.io.resp.cacheable || Bool(usingDataScratchpad) + } + val s2_read = isRead(s2_req.cmd) + val s2_write = isWrite(s2_req.cmd) + val s2_readwrite = s2_read || s2_write + val s2_flush_valid = RegNext(s1_flush_valid) + val s2_data = RegEnable(s1_data, s1_valid || inWriteback) + val s2_probe_way = RegEnable(s1_hit_way, s1_probe) + val s2_probe_state = RegEnable(s1_hit_state, s1_probe) + val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked) + val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked) + val s2_hit_valid = s2_hit_state.isValid() + val (s2_hit, s2_grow_param, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd) + val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit + val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait + val s2_valid_cached_miss = s2_valid_miss && !s2_uncached + val s2_victimize = s2_valid_cached_miss || s2_flush_valid + val s2_valid_uncached = s2_valid_miss && s2_uncached + val s2_victim_way = Mux(s2_hit_valid && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid))) + val s2_victim_tag = RegEnable(s1_victim_meta.tag, s1_valid_not_nacked || s1_flush_valid) + val s2_victim_state = Mux(s2_hit_valid && !s2_flush_valid, s2_hit_state, RegEnable(s1_victim_meta.coh, s1_valid_not_nacked || s1_flush_valid)) + val s2_victim_valid = s2_victim_state.isValid() + val (prb_ack_data, s2_report_param, probeNewCoh)= s2_probe_state.onProbe(probe_bits.param) + val (needs_vol_wb, s2_shrink_param, voluntaryNewCoh) = s2_victim_state.onCacheControl(M_FLUSH) + val s2_victim_dirty = needs_vol_wb + val s2_update_meta = s2_hit_state =/= s2_new_hit_state + io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && tl_out.a.ready && !uncachedInFlight.asUInt.andR) + when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true } + + // exceptions + val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes) + io.cpu.xcpt.ma.ld := s1_read && s1_storegen.misaligned + io.cpu.xcpt.ma.st := s1_write && s1_storegen.misaligned + io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld + io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st + + // load reservations + val s2_lr = Bool(usingAtomics) && s2_req.cmd === M_XLR + val s2_sc = Bool(usingAtomics) && s2_req.cmd === M_XSC + val lrscCount = Reg(init=UInt(0)) + val lrscValid = lrscCount > 0 + val lrscAddr = Reg(UInt()) + val s2_sc_fail = s2_sc && !(lrscValid && lrscAddr === (s2_req.addr >> blockOffBits)) + when (s2_valid_hit && s2_lr) { + lrscCount := lrscCycles - 1 + lrscAddr := s2_req.addr >> blockOffBits + } + when (lrscValid) { lrscCount := lrscCount - 1 } + when ((s2_valid_masked && lrscValid) || io.cpu.invalidate_lr) { lrscCount := 0 } + + // pending store buffer + val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write) + val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write) + val pstore1_addr = RegEnable(s1_paddr, s1_valid_not_nacked && s1_write) + val pstore1_data = RegEnable(io.cpu.s1_data, s1_valid_not_nacked && s1_write) + val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write) + val pstore1_storegen = new StoreGen(pstore1_typ, pstore1_addr, pstore1_data, wordBytes) + val pstore1_storegen_data = Wire(init = pstore1_storegen.data) + val pstore1_amo = Bool(usingAtomics) && isRead(pstore1_cmd) + val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_amo) + val pstore_drain_opportunistic = !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)) + val pstore_drain_on_miss = releaseInFlight || io.cpu.s2_nack + val pstore_drain = + Bool(usingAtomics) && pstore_drain_structural || + (((pstore1_valid && !pstore1_amo) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss)) + pstore1_valid := { + val s2_store_valid = s2_valid_hit && s2_write && !s2_sc_fail + val pstore1_held = Reg(Bool()) + assert(!s2_store_valid || !pstore1_held) + pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain + s2_store_valid || pstore1_held + } + val advance_pstore1 = pstore1_valid && (pstore2_valid === pstore_drain) + pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1 + val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1) + val pstore2_way = RegEnable(pstore1_way, advance_pstore1) + val pstore2_storegen_data = RegEnable(pstore1_storegen_data, advance_pstore1) + val pstore2_storegen_mask = RegEnable(pstore1_storegen.mask, advance_pstore1) + dataArb.io.in(0).valid := pstore_drain + dataArb.io.in(0).bits.write := true + dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr) + dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way) + dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data)) + val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits + dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_storegen.mask) << pstore_mask_shift + + // store->load RAW hazard detection + val s1_idx = s1_req.addr(idxMSB, wordOffBits) + val s1_raw_hazard = s1_read && + ((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_storegen.mask & s1_storegen.mask).orR) || + (pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx && (pstore2_storegen_mask & s1_storegen.mask).orR)) + when (s1_valid && s1_raw_hazard) { s1_nack := true } + + metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty) + metaWriteArb.io.in(0).bits.way_en := s2_victim_way + metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB) + metaWriteArb.io.in(0).bits.data.coh := Mux(s2_valid_hit, s2_new_hit_state, ClientMetadata.onReset) + metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) + + // Prepare a TileLink request message that initiates a transaction + val a_source = PriorityEncoder(~uncachedInFlight.asUInt) + val a_address = s2_req.addr + val a_size = s2_req.typ + val a_data = Fill(beatWords, pstore1_storegen.data) + val acquire = edge.Acquire(a_source, a_address, lgCacheBlockBytes, s2_grow_param)._2 // TODO check cacheability + val get = edge.Get(a_source, a_address, a_size)._2 + val put = edge.Put(a_source, a_address, a_size, a_data)._2 + val atomics = if (edge.manager.anySupportLogical) { + MuxLookup(s2_req.cmd, Wire(new TLBundleA(edge.bundle)), Array( + M_XA_SWAP -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.SWAP)._2, + M_XA_XOR -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.XOR) ._2, + M_XA_OR -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.OR) ._2, + M_XA_AND -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.AND) ._2, + M_XA_ADD -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.ADD)._2, + M_XA_MIN -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MIN)._2, + M_XA_MAX -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MAX)._2, + M_XA_MINU -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MINU)._2, + M_XA_MAXU -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MAXU)._2)) } else { - val meta = Module(new MetadataArray(onReset _)) - meta.io.read <> metaReadArb.io.out - meta.io.write <> metaWriteArb.io.out - val s1_meta = meta.io.resp - val s1_meta_hit_way = s1_meta.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt - val s1_meta_hit_state = ClientMetadata.onReset.fromBits( - s1_meta.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0))) - .reduce (_|_)) - (s1_meta_hit_way, s1_meta_hit_state, s1_meta(s1_victim_way)) + // If no managers support atomics, assert fail if processor asks for them + assert (!(tl_out.a.valid && pstore1_amo && s2_write && s2_uncached)) + Wire(new TLBundleA(edge.bundle)) } - val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way) - val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical - val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) - val s2_probe = Reg(next=s1_probe, init=Bool(false)) - val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready - val s2_valid_masked = s2_valid && Reg(next = !s1_nack) - val s2_req = Reg(io.cpu.req.bits) - val s2_uncached = Reg(Bool()) - when (s1_valid_not_nacked || s1_flush_valid) { - s2_req := s1_req - s2_req.addr := s1_paddr - s2_uncached := !tlb.io.resp.cacheable || Bool(usingDataScratchpad) - } - val s2_read = isRead(s2_req.cmd) - val s2_write = isWrite(s2_req.cmd) - val s2_readwrite = s2_read || s2_write - val s2_flush_valid = RegNext(s1_flush_valid) - val s2_data = RegEnable(s1_data, s1_valid || inWriteback) - val s2_probe_way = RegEnable(s1_hit_way, s1_probe) - val s2_probe_state = RegEnable(s1_hit_state, s1_probe) - val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked) - val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked) - val s2_hit = s2_hit_state.isHit(s2_req.cmd) - val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit - val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait - val s2_valid_cached_miss = s2_valid_miss && !s2_uncached - val s2_victimize = s2_valid_cached_miss || s2_flush_valid - val s2_valid_uncached = s2_valid_miss && s2_uncached - val s2_victim_way = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid))) - val s2_victim_tag = RegEnable(s1_victim_meta.tag, s1_valid_not_nacked || s1_flush_valid) - val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(s1_victim_meta.coh, s1_valid_not_nacked || s1_flush_valid)) - val s2_victim_valid = s2_victim_state.isValid() - val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback() - val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd) - val s2_update_meta = s2_hit_state =/= s2_new_hit_state - io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && io.mem.acquire.ready && !uncachedInFlight.asUInt.andR) - when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true } + tl_out.a.valid := grantackq.io.enq.ready && ((s2_valid_cached_miss && !s2_victim_dirty) || + (s2_valid_uncached && !uncachedInFlight.asUInt.andR)) + tl_out.a.bits := Mux(pstore1_amo && s2_write && s2_uncached, atomics, + Mux(s2_write && s2_uncached, put, + Mux(s2_uncached, get, acquire))) - // exceptions - val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes) - io.cpu.xcpt.ma.ld := s1_read && s1_storegen.misaligned - io.cpu.xcpt.ma.st := s1_write && s1_storegen.misaligned - io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld - io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st - - // load reservations - val s2_lr = Bool(usingAtomics) && s2_req.cmd === M_XLR - val s2_sc = Bool(usingAtomics) && s2_req.cmd === M_XSC - val lrscCount = Reg(init=UInt(0)) - val lrscValid = lrscCount > 0 - val lrscAddr = Reg(UInt()) - val s2_sc_fail = s2_sc && !(lrscValid && lrscAddr === (s2_req.addr >> blockOffBits)) - when (s2_valid_hit && s2_lr) { - lrscCount := lrscCycles - 1 - lrscAddr := s2_req.addr >> blockOffBits - } - when (lrscValid) { lrscCount := lrscCount - 1 } - when ((s2_valid_masked && lrscValid) || io.cpu.invalidate_lr) { lrscCount := 0 } - - // pending store buffer - val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write) - val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write) - val pstore1_addr = RegEnable(s1_paddr, s1_valid_not_nacked && s1_write) - val pstore1_data = RegEnable(io.cpu.s1_data, s1_valid_not_nacked && s1_write) - val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write) - val pstore1_storegen = new StoreGen(pstore1_typ, pstore1_addr, pstore1_data, wordBytes) - val pstore1_storegen_data = Wire(init = pstore1_storegen.data) - val pstore1_amo = Bool(usingAtomics) && isRead(pstore1_cmd) - val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_amo) - val pstore_drain_opportunistic = !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)) - val pstore_drain_on_miss = releaseInFlight || io.cpu.s2_nack - val pstore_drain = - Bool(usingAtomics) && pstore_drain_structural || - (((pstore1_valid && !pstore1_amo) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss)) - pstore1_valid := { - val s2_store_valid = s2_valid_hit && s2_write && !s2_sc_fail - val pstore1_held = Reg(Bool()) - assert(!s2_store_valid || !pstore1_held) - pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain - s2_store_valid || pstore1_held - } - val advance_pstore1 = pstore1_valid && (pstore2_valid === pstore_drain) - pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1 - val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1) - val pstore2_way = RegEnable(pstore1_way, advance_pstore1) - val pstore2_storegen_data = RegEnable(pstore1_storegen_data, advance_pstore1) - val pstore2_storegen_mask = RegEnable(pstore1_storegen.mask, advance_pstore1) - dataArb.io.in(0).valid := pstore_drain - dataArb.io.in(0).bits.write := true - dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr) - dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way) - dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data)) - val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits - dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_storegen.mask) << pstore_mask_shift - - // store->load RAW hazard detection - val s1_idx = s1_req.addr(idxMSB, wordOffBits) - val s1_raw_hazard = s1_read && - ((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_storegen.mask & s1_storegen.mask).orR) || - (pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx && (pstore2_storegen_mask & s1_storegen.mask).orR)) - when (s1_valid && s1_raw_hazard) { s1_nack := true } - - metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty) - metaWriteArb.io.in(0).bits.way_en := s2_victim_way - metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB) - metaWriteArb.io.in(0).bits.data.coh := Mux(s2_valid_hit, s2_new_hit_state, ClientMetadata.onReset) - metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) - - // acquire - val xact_id = PriorityEncoder(~uncachedInFlight.asUInt) - val cachedGetMessage = s2_hit_state.makeAcquire( - client_xact_id = UInt(maxUncachedInFlight - 1), - addr_block = s2_req.addr(paddrBits-1, blockOffBits), - op_code = s2_req.cmd) - val uncachedGetMessage = Get( - client_xact_id = xact_id, - addr_block = s2_req.addr(paddrBits-1, blockOffBits), - addr_beat = s2_req.addr(blockOffBits-1, beatOffBits), - addr_byte = s2_req.addr(beatOffBits-1, 0), - operand_size = s2_req.typ, - alloc = Bool(false)) - val uncachedPutOffset = s2_req.addr.extract(beatOffBits-1, wordOffBits) - val uncachedPutMessage = Put( - client_xact_id = xact_id, - addr_block = s2_req.addr(paddrBits-1, blockOffBits), - addr_beat = s2_req.addr(blockOffBits-1, beatOffBits), - data = Fill(beatWords, pstore1_storegen.data), - wmask = Some(pstore1_storegen.mask << (uncachedPutOffset << wordOffBits)), - alloc = Bool(false)) - val uncachedPutAtomicMessage = PutAtomic( - client_xact_id = xact_id, - addr_block = s2_req.addr(paddrBits-1, blockOffBits), - addr_beat = s2_req.addr(blockOffBits-1, beatOffBits), - addr_byte = s2_req.addr(beatOffBits-1, 0), - atomic_opcode = s2_req.cmd, - operand_size = s2_req.typ, - data = Fill(beatWords, pstore1_storegen.data)) - io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || (s2_valid_uncached && !uncachedInFlight.asUInt.andR)) && fq.io.enq.ready - io.mem.acquire.bits := cachedGetMessage - when (s2_uncached) { - if (!usingDataScratchpad) - assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access") - io.mem.acquire.bits := uncachedGetMessage - when (s2_write) { - io.mem.acquire.bits := uncachedPutMessage - when (pstore1_amo) { - io.mem.acquire.bits := uncachedPutAtomicMessage + // Set pending bits for outstanding TileLink transaction + when (tl_out.a.fire()) { + when (s2_uncached) { + uncachedInFlight(a_source) := true + uncachedReqs(a_source) := s2_req + }.otherwise { + cached_grant_wait := true } } - } - when (io.mem.acquire.fire()) { - when (s2_uncached) { - uncachedInFlight(xact_id) := true - uncachedReqs(xact_id) := s2_req - }.otherwise { - cached_grant_wait := true - } - } - // grant - val grantIsRefill = io.mem.grant.bits.hasMultibeatData() - val grantIsVoluntary = io.mem.grant.bits.isVoluntary() - val grantIsUncached = !grantIsRefill && !grantIsVoluntary - io.mem.grant.ready := true - when (io.mem.grant.fire()) { - when (grantIsRefill) { assert(cached_grant_wait) } + // grant + val (d_first, d_last, d_address_inc) = edge.firstlast(tl_out.d) + val grantIsCached = tl_out.d.bits.opcode.isOneOf(Grant, GrantData) + val grantIsUncached = tl_out.d.bits.opcode.isOneOf(AccessAck, AccessAckData, HintAck) + val grantIsVoluntary = tl_out.d.bits.opcode === ReleaseAck // Clears a different pending bit + val grantIsRefill = tl_out.d.bits.opcode === GrantData // Writes the data array + tl_out.d.ready := true + when (tl_out.d.fire() && d_last) { + when (grantIsCached) { + assert(cached_grant_wait, "A GrantData was unexpected by the dcache.") + cached_grant_wait := false + } .elsewhen (grantIsUncached) { + // TODO this requires that uncached accesses only take a single beat + val id = tl_out.d.bits.source + val req = uncachedReqs(id) + assert(uncachedInFlight(id), "An AccessAck was unexpected by the dcache.") + uncachedInFlight(id) := false + s2_data := tl_out.d.bits.data + s2_req.cmd := req.cmd + s2_req.typ := req.typ + s2_req.tag := req.tag + s2_req.addr := Cat(s1_paddr >> wordOffBits /* don't-care */, req.addr(wordOffBits-1, 0)) + } .elsewhen (grantIsVoluntary) { + assert(release_ack_wait, "A ReleaseAck was unexpected by the dcache.") + release_ack_wait := false + } + } + + // data refill + val doRefillBeat = grantIsRefill && tl_out.d.valid + dataArb.io.in(1).valid := doRefillBeat + assert(dataArb.io.in(1).ready || !doRefillBeat) + dataArb.io.in(1).bits.write := true + dataArb.io.in(1).bits.addr := s2_req.addr | d_address_inc + dataArb.io.in(1).bits.way_en := s2_victim_way + dataArb.io.in(1).bits.wdata := tl_out.d.bits.data + dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes) + // tag updates on refill + metaWriteArb.io.in(1).valid := grantIsCached && tl_out.d.fire() && d_last + assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready) + metaWriteArb.io.in(1).bits.way_en := s2_victim_way + metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB) + metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(s2_req.cmd, tl_out.d.bits.param) + metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) + // don't accept uncached grants if there's a structural hazard on s2_data... + val blockUncachedGrant = Reg(Bool()) + blockUncachedGrant := dataArb.io.out.valid when (grantIsUncached) { - assert(uncachedInFlight(io.mem.grant.bits.client_xact_id)) - uncachedInFlight(io.mem.grant.bits.client_xact_id) := false - s2_data := io.mem.grant.bits.data - val req = uncachedReqs(io.mem.grant.bits.client_xact_id) - s2_req.cmd := req.cmd - s2_req.typ := req.typ - s2_req.tag := req.tag - s2_req.addr := Cat(s1_paddr >> wordOffBits /* don't-care */, req.addr(wordOffBits-1, 0)) - } - when (grantIsVoluntary) { - assert(release_ack_wait) - release_ack_wait := false - } - } - val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles) - when (io.mem.grant.fire() && refillDone) { cached_grant_wait := false } - - // data refill - val doRefillBeat = grantIsRefill && io.mem.grant.valid - dataArb.io.in(1).valid := doRefillBeat - assert(dataArb.io.in(1).ready || !doRefillBeat) - dataArb.io.in(1).bits.write := true - dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits - dataArb.io.in(1).bits.way_en := s2_victim_way - dataArb.io.in(1).bits.wdata := io.mem.grant.bits.data - dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes) - // tag updates on refill - metaWriteArb.io.in(1).valid := refillDone - assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready) - metaWriteArb.io.in(1).bits.way_en := s2_victim_way - metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB) - metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd) - metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) - // don't accept uncached grants if there's a structural hazard on s2_data... - val blockUncachedGrant = Reg(Bool()) - blockUncachedGrant := dataArb.io.out.valid - when (grantIsUncached) { - io.mem.grant.ready := !(blockUncachedGrant || s1_valid) - // ...but insert bubble to guarantee grant's eventual forward progress - when (io.mem.grant.valid && !io.mem.grant.ready) { - io.cpu.req.ready := false - dataArb.io.in(1).valid := true - dataArb.io.in(1).bits.write := false - blockUncachedGrant := !dataArb.io.in(1).ready - } - } - - // finish - fq.io.enq.valid := io.mem.grant.fire() && io.mem.grant.bits.requiresAck() && (!grantIsRefill || refillDone) - fq.io.enq.bits := io.mem.grant.bits.makeFinish() - io.mem.finish <> fq.io.deq - when (fq.io.enq.valid) { assert(fq.io.enq.ready) } - when (refillDone) { replacer.miss } - - // probe - val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr) - metaReadArb.io.in(1).valid := io.mem.probe.valid && !block_probe - io.mem.probe.ready := metaReadArb.io.in(1).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit) - metaReadArb.io.in(1).bits.idx := io.mem.probe.bits.addr_block - metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays) - - // release - val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles) - val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback) - val releaseRejected = io.mem.release.valid && !io.mem.release.ready - val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire()) - val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected) - val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid)) - io.mem.release.valid := s2_release_data_valid - io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits) - val voluntaryReleaseMessage = s2_victim_state.makeVoluntaryWriteback(UInt(maxUncachedInFlight - 1), UInt(0)) - val voluntaryNewCoh = s2_victim_state.onCacheControl(M_FLUSH) - val probeResponseMessage = s2_probe_state.makeRelease(probe_bits) - val probeNewCoh = s2_probe_state.onProbe(probe_bits) - val newCoh = Wire(init = probeNewCoh) - releaseWay := s2_probe_way - when (s2_victimize && s2_victim_dirty) { - assert(!(s2_valid && s2_hit_state.isValid())) - release_state := s_voluntary_writeback - probe_bits.addr_block := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB)) - } - when (s2_probe) { - when (s2_probe_state.requiresVoluntaryWriteback()) { release_state := s_probe_rep_dirty } - .elsewhen (s2_probe_state.isValid()) { release_state := s_probe_rep_clean } - .otherwise { - io.mem.release.valid := true - release_state := s_probe_rep_miss - } - } - when (releaseDone) { release_state := s_ready } - when (release_state.isOneOf(s_probe_rep_miss, s_probe_rep_clean)) { - io.mem.release.valid := true - } - when (release_state.isOneOf(s_probe_rep_clean, s_probe_rep_dirty)) { - io.mem.release.bits := probeResponseMessage - when (releaseDone) { release_state := s_probe_write_meta } - } - when (release_state.isOneOf(s_voluntary_writeback, s_voluntary_write_meta)) { - io.mem.release.bits := voluntaryReleaseMessage - newCoh := voluntaryNewCoh - releaseWay := s2_victim_way - when (releaseDone) { - release_state := s_voluntary_write_meta - release_ack_wait := true - } - } - when (s2_probe && !io.mem.release.fire()) { s1_nack := true } - io.mem.release.bits.addr_block := probe_bits.addr_block - io.mem.release.bits.addr_beat := writebackCount - io.mem.release.bits.data := s2_data - - dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles - dataArb.io.in(2).bits.write := false - dataArb.io.in(2).bits.addr := Cat(io.mem.release.bits.addr_block, releaseDataBeat(log2Up(refillCycles)-1,0)) << rowOffBits - dataArb.io.in(2).bits.way_en := ~UInt(0, nWays) - - metaWriteArb.io.in(2).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta) - metaWriteArb.io.in(2).bits.way_en := releaseWay - metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB) - metaWriteArb.io.in(2).bits.data.coh := newCoh - metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits) - when (metaWriteArb.io.in(2).fire()) { release_state := s_ready } - - // cached response - io.cpu.resp.valid := s2_valid_hit - io.cpu.resp.bits <> s2_req - io.cpu.resp.bits.has_data := s2_read - io.cpu.resp.bits.replay := false - io.cpu.ordered := !(s1_valid || s2_valid || cached_grant_wait || uncachedInFlight.asUInt.orR) - - // uncached response - io.cpu.replay_next := io.mem.grant.fire() && grantIsUncached - val doUncachedResp = Reg(next = io.cpu.replay_next) - when (doUncachedResp) { - assert(!s2_valid_hit) - io.cpu.resp.valid := true - io.cpu.resp.bits.replay := true - } - - // load data subword mux/sign extension - val s2_word_idx = s2_req.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes)) - val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits))) - val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word, s2_sc, wordBytes) - io.cpu.resp.bits.data := loadgen.data | s2_sc_fail - io.cpu.resp.bits.data_word_bypass := loadgen.wordData - io.cpu.resp.bits.store_data := pstore1_data - - // AMOs - if (usingAtomics) { - val amoalu = Module(new AMOALU(xLen)) - amoalu.io.addr := pstore1_addr - amoalu.io.cmd := pstore1_cmd - amoalu.io.typ := pstore1_typ - amoalu.io.lhs := s2_data_word - amoalu.io.rhs := pstore1_data - pstore1_storegen_data := amoalu.io.out - } else { - assert(!(s1_valid_masked && s1_read && s1_write), "unsupported D$ operation") - } - - // flushes - val flushed = Reg(init=Bool(true)) - val flushing = Reg(init=Bool(false)) - val flushCounter = Counter(nSets * nWays) - when (io.mem.acquire.fire() && !s2_uncached) { flushed := false } - when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) { - io.cpu.s2_nack := !flushed - when (!flushed) { - flushing := !release_ack_wait && !uncachedInFlight.asUInt.orR - } - } - s1_flush_valid := metaReadArb.io.in(0).fire() && !s1_flush_valid && !s2_flush_valid && release_state === s_ready && !release_ack_wait - metaReadArb.io.in(0).valid := flushing - metaReadArb.io.in(0).bits.idx := flushCounter.value - metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays) - when (flushing) { - s1_victim_way := flushCounter.value >> log2Up(nSets) - when (s2_flush_valid) { - when (flushCounter.inc()) { - flushed := true + tl_out.d.ready := !(blockUncachedGrant || s1_valid) + // ...but insert bubble to guarantee grant's eventual forward progress + when (tl_out.d.valid && !tl_out.d.ready) { + io.cpu.req.ready := false + dataArb.io.in(1).valid := true + dataArb.io.in(1).bits.write := false + blockUncachedGrant := !dataArb.io.in(1).ready } } - when (flushed && release_state === s_ready && !release_ack_wait) { - flushing := false + + // Finish TileLink transaction by issuing a GrantAck + grantackq.io.enq.valid := tl_out.d.fire() && d_last && edge.hasFollowUp(tl_out.d.bits) + grantackq.io.enq.bits := edge.GrantAck(tl_out.d.bits) + tl_out.e <> grantackq.io.deq + assert(!grantackq.io.enq.valid || grantackq.io.enq.ready, "Too many Grants received by dcache.") + when (tl_out.d.fire() && d_last) { replacer.miss } + + // Handle an incoming TileLink Probe message + val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr) + metaReadArb.io.in(1).valid := tl_out.b.valid && !block_probe + tl_out.b.ready := metaReadArb.io.in(1).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit) + metaReadArb.io.in(1).bits.idx := tl_out.b.bits.address(idxMSB, idxLSB) + metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays) + + // release + val (writebackCount, writebackDone) = Counter(tl_out.c.fire() && inWriteback, refillCycles) //TODO firstlast? + val releaseDone = writebackDone || (tl_out.c.fire() && !inWriteback) + val releaseRejected = tl_out.c.valid && !tl_out.c.ready + val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire()) + val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected) + val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid)) + + val voluntaryReleaseMessage = edge.Release( + fromSource = UInt(maxUncachedInFlight - 1), + toAddress = probe_bits.address, + lgSize = lgCacheBlockBytes, + shrinkPermissions = s2_shrink_param, + data = s2_data)._2 + + val probeResponseMessage = Mux(prb_ack_data, + edge.ProbeAck( + b = probe_bits, + reportPermissions = s2_report_param), + edge.ProbeAck( + b = probe_bits, + reportPermissions = s2_report_param, + data = s2_data)) + + tl_out.c.valid := s2_release_data_valid + tl_out.c.bits := voluntaryReleaseMessage // TODO was ClientMetadata.onReset.makeRelease(probe_bits) ... s2_victim_state ok? + val newCoh = Wire(init = probeNewCoh) + releaseWay := s2_probe_way + + when (s2_victimize && s2_victim_dirty) { + assert(!(s2_valid && s2_hit_valid)) + release_state := s_voluntary_writeback + probe_bits.address := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB)) << rowOffBits + } + when (s2_probe) { + when (needs_vol_wb) { release_state := s_probe_rep_dirty } + .elsewhen (s2_probe_state.isValid()) { release_state := s_probe_rep_clean } + .otherwise { + tl_out.c.valid := true + release_state := s_probe_rep_miss + } + } + when (releaseDone) { release_state := s_ready } + when (release_state.isOneOf(s_probe_rep_miss, s_probe_rep_clean)) { + tl_out.c.valid := true + } + when (release_state.isOneOf(s_probe_rep_clean, s_probe_rep_dirty)) { + tl_out.c.bits := probeResponseMessage + when (releaseDone) { release_state := s_probe_write_meta } + } + when (release_state.isOneOf(s_voluntary_writeback, s_voluntary_write_meta)) { + tl_out.c.bits := voluntaryReleaseMessage + newCoh := voluntaryNewCoh + releaseWay := s2_victim_way + when (releaseDone) { + release_state := s_voluntary_write_meta + release_ack_wait := true + } + } + when (s2_probe && !tl_out.c.fire()) { s1_nack := true } + tl_out.c.bits.address := probe_bits.address + tl_out.c.bits.data := s2_data + + dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles + dataArb.io.in(2).bits.write := false + dataArb.io.in(2).bits.addr := tl_out.c.bits.address | (releaseDataBeat(log2Up(refillCycles)-1,0) << rowOffBits) + dataArb.io.in(2).bits.way_en := ~UInt(0, nWays) + + metaWriteArb.io.in(2).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta) + metaWriteArb.io.in(2).bits.way_en := releaseWay + metaWriteArb.io.in(2).bits.idx := tl_out.c.bits.address(idxMSB, idxLSB) + metaWriteArb.io.in(2).bits.data.coh := newCoh + metaWriteArb.io.in(2).bits.data.tag := tl_out.c.bits.address(paddrBits-1, untagBits) + when (metaWriteArb.io.in(2).fire()) { release_state := s_ready } + + // cached response + io.cpu.resp.valid := s2_valid_hit + io.cpu.resp.bits <> s2_req + io.cpu.resp.bits.has_data := s2_read + io.cpu.resp.bits.replay := false + io.cpu.ordered := !(s1_valid || s2_valid || cached_grant_wait || uncachedInFlight.asUInt.orR) + + // uncached response + io.cpu.replay_next := tl_out.d.fire() && tl_out.d.bits.opcode <= AccessAckData + val doUncachedResp = Reg(next = io.cpu.replay_next) + when (doUncachedResp) { + assert(!s2_valid_hit) + io.cpu.resp.valid := true + io.cpu.resp.bits.replay := true + } + + // load data subword mux/sign extension + val s2_word_idx = s2_req.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes)) + val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits))) + val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word, s2_sc, wordBytes) + io.cpu.resp.bits.data := loadgen.data | s2_sc_fail + io.cpu.resp.bits.data_word_bypass := loadgen.wordData + io.cpu.resp.bits.store_data := pstore1_data + + // AMOs + if (usingAtomics) { + val amoalu = Module(new AMOALU(xLen)) + amoalu.io.addr := pstore1_addr + amoalu.io.cmd := pstore1_cmd + amoalu.io.typ := pstore1_typ + amoalu.io.lhs := s2_data_word + amoalu.io.rhs := pstore1_data + pstore1_storegen_data := amoalu.io.out + } else { + assert(!(s1_valid_masked && s1_read && s1_write), "unsupported D$ operation") + } + + // flushes + val flushed = Reg(init=Bool(true)) + val flushing = Reg(init=Bool(false)) + val flushCounter = Counter(nSets * nWays) + when (tl_out.a.fire() && !s2_uncached) { flushed := false } + when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) { + io.cpu.s2_nack := !flushed + when (!flushed) { + flushing := !release_ack_wait && !uncachedInFlight.asUInt.orR + } + } + s1_flush_valid := metaReadArb.io.in(0).fire() && !s1_flush_valid && !s2_flush_valid && release_state === s_ready && !release_ack_wait + metaReadArb.io.in(0).valid := flushing + metaReadArb.io.in(0).bits.idx := flushCounter.value + metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays) + when (flushing) { + s1_victim_way := flushCounter.value >> log2Up(nSets) + when (s2_flush_valid) { + when (flushCounter.inc()) { + flushed := true + } + } + when (flushed && release_state === s_ready && !release_ack_wait) { + flushing := false + } } } } diff --git a/src/main/scala/rocket/nbdcache.scala b/src/main/scala/rocket/nbdcache.scala index f0be7c07..16a9fd95 100644 --- a/src/main/scala/rocket/nbdcache.scala +++ b/src/main/scala/rocket/nbdcache.scala @@ -4,10 +4,11 @@ package rocket import Chisel._ import uncore.tilelink._ -import uncore.coherence._ +import uncore.tilelink2._ import uncore.agents._ import uncore.constants._ import uncore.util._ +import diplomacy._ import util._ import Chisel.ImplicitConversions._ import cde.{Parameters, Field} @@ -19,11 +20,19 @@ case class DCacheConfig( case object DCacheKey extends Field[DCacheConfig] -trait HasL1HellaCacheParameters extends HasL1CacheParameters { - val wordBits = xLen // really, xLen max fLen +trait HasL1HellaCacheParameters extends HasCacheParameters with HasCoreParameters { + val outerDataBeats = p(TLKey(p(TLId))).dataBeats + val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat + val refillCyclesPerBeat = outerDataBits/rowBits + val refillCycles = refillCyclesPerBeat*outerDataBeats + + val cacheBlockBytes = p(CacheBlockBytes) + val lgCacheBlockBytes = log2Up(cacheBlockBytes) + + val wordBits = xLen // really, xLen max val wordBytes = wordBits/8 val wordOffBits = log2Up(wordBytes) - val beatBytes = p(CacheBlockBytes) / outerDataBeats + val beatBytes = cacheBlockBytes / outerDataBeats val beatWords = beatBytes / wordBytes val beatOffBits = log2Up(beatBytes) val idxMSB = untagBits-1 @@ -310,10 +319,10 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa rpq.io.enq.bits := io.req_bits rpq.io.deq.ready := (io.replay.ready && state === s_drain_rpq) || state === s_invalid - val coh_on_grant = req.old_meta.coh.onGrant( - incoming = io.mem_grant.bits, - pending = Mux(dirties_coh, M_XWR, req.cmd)) - val coh_on_hit = io.req_bits.old_meta.coh.onHit(io.req_bits.cmd) + val coh_on_grant = req.old_meta.coh.onGrant(UInt(0), UInt(0)) + //incoming = io.mem_grant.bits, + //pending = Mux(dirties_coh, M_XWR, req.cmd)) + val coh_on_hit = coh_on_grant //io.req_bits.old_meta.coh.onHit(io.req_bits.cmd) when (state === s_drain_rpq && !rpq.io.deq.valid) { state := s_invalid @@ -355,14 +364,14 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa req := io.req_bits dirties_coh := isWrite(io.req_bits.cmd) when (io.req_bits.tag_match) { - when(coh.isHit(io.req_bits.cmd)) { // set dirty bit + when(Bool(false)) { // TODO coh.isHit(io.req_bits.cmd)) { // set dirty bit state := s_meta_write_req new_coh_state := coh_on_hit }.otherwise { // upgrade permissions state := s_refill_req } }.otherwise { // writback if necessary and refill - state := Mux(coh.requiresVoluntaryWriteback(), s_wb_req, s_meta_clear) + //TODO state := Mux(coh.requiresVoluntaryWriteback(), s_wb_req, s_meta_clear) } } @@ -390,22 +399,22 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa io.meta_write.valid := state.isOneOf(s_meta_write_req, s_meta_clear) io.meta_write.bits.idx := req_idx io.meta_write.bits.data.coh := Mux(state === s_meta_clear, - req.old_meta.coh.onCacheControl(M_FLUSH), + req.old_meta.coh.onCacheControl(M_FLUSH)._2, new_coh_state) io.meta_write.bits.data.tag := io.tag io.meta_write.bits.way_en := req.way_en io.wb_req.valid := state === s_wb_req - io.wb_req.bits := req.old_meta.coh.makeVoluntaryWriteback( - client_xact_id = UInt(id), - addr_block = Cat(req.old_meta.tag, req_idx)) + //TODO io.wb_req.bits := req.old_meta.coh.makeVoluntaryWriteback( + // client_xact_id = UInt(id), + // addr_block = Cat(req.old_meta.tag, req_idx)) io.wb_req.bits.way_en := req.way_en io.mem_req.valid := state === s_refill_req && fq.io.enq.ready - io.mem_req.bits := req.old_meta.coh.makeAcquire( - addr_block = Cat(io.tag, req_idx), - client_xact_id = Bits(id), - op_code = req.cmd) + //TODO io.mem_req.bits := req.old_meta.coh.makeAcquire( + // addr_block = Cat(io.tag, req_idx), + // client_xact_id = Bits(id), + // op_code = req.cmd) io.meta_read.valid := state === s_drain_rpq io.meta_read.bits.idx := req_idx @@ -669,10 +678,10 @@ class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { val miss_coh = ClientMetadata.onReset val reply_coh = Mux(tag_matches, old_coh, miss_coh) - val reply = reply_coh.makeRelease(req) + //TODO val reply = reply_coh.makeRelease(req) io.req.ready := state === s_invalid io.rep.valid := state === s_release - io.rep.bits := reply + //TODO io.rep.bits := reply assert(!io.rep.valid || !io.rep.bits.hasData(), "ProbeUnit should not send releases with data") @@ -685,10 +694,10 @@ class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.meta_write.bits.way_en := way_en io.meta_write.bits.idx := req.addr_block io.meta_write.bits.data.tag := req.addr_block >> idxBits - io.meta_write.bits.data.coh := old_coh.onProbe(req) + //TODO io.meta_write.bits.data.coh := old_coh.onProbe(req) io.wb_req.valid := state === s_writeback_req - io.wb_req.bits := reply + //TODO io.wb_req.bits := reply io.wb_req.bits.way_en := way_en // state === s_invalid @@ -716,7 +725,7 @@ class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { } when (state === s_mshr_resp) { - val needs_writeback = tag_matches && old_coh.requiresVoluntaryWriteback() + val needs_writeback = tag_matches // TODO && old_coh.requiresVoluntaryWriteback() state := Mux(needs_writeback, s_writeback_req, s_release) } @@ -912,9 +921,8 @@ class HellaCache(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCache val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en))) - val s2_hit = s2_tag_match && - s2_hit_state.isHit(s2_req.cmd) && - s2_hit_state === s2_hit_state.onHit(s2_req.cmd) + val (s2_has_permission, s2_grow_param, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd) + val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state // load-reserved/store-conditional val lrsc_count = Reg(init=UInt(0)) @@ -1236,7 +1244,7 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module } object HellaCache { - def apply(cfg: DCacheConfig)(implicit p: Parameters) = - if (cfg.nMSHRs == 0) Module(new DCache()).io - else Module(new HellaCache(cfg)).io + def apply(cfg: DCacheConfig)(implicit p: Parameters) = LazyModule(new DCache) + // if (cfg.nMSHRs == 0) Module(new DCache()).io + // else Module(new HellaCache(cfg)).io } diff --git a/src/main/scala/rocket/tile.scala b/src/main/scala/rocket/tile.scala index c497a98e..eb278659 100644 --- a/src/main/scala/rocket/tile.scala +++ b/src/main/scala/rocket/tile.scala @@ -11,10 +11,9 @@ import uncore.converters._ import uncore.devices._ import util._ import cde.{Parameters, Field} +import scala.collection.mutable.ListBuffer case object BuildRoCC extends Field[Seq[RoccParameters]] -case object NCachedTileLinkPorts extends Field[Int] -case object NUncachedTileLinkPorts extends Field[Int] case object TileId extends Field[Int] case class RoccParameters( @@ -24,61 +23,38 @@ case class RoccParameters( nPTWPorts : Int = 0, useFPU: Boolean = false) -case class TileBundleConfig( - nCachedTileLinkPorts: Int, - nUncachedTileLinkPorts: Int, - xLen: Int) - -class TileIO(c: TileBundleConfig, node: Option[TLInwardNode] = None)(implicit p: Parameters) extends Bundle { - val cached = Vec(c.nCachedTileLinkPorts, new ClientTileLinkIO) - val uncached = Vec(c.nUncachedTileLinkPorts, new ClientUncachedTileLinkIO) - val hartid = UInt(INPUT, c.xLen) - val interrupts = new TileInterrupts().asInput - val slave = node.map(_.inward.bundleIn) - val resetVector = UInt(INPUT, c.xLen) - - override def cloneType = new TileIO(c).asInstanceOf[this.type] -} - -abstract class TileImp(l: LazyTile)(implicit val p: Parameters) extends LazyModuleImp(l) { - val io: TileIO -} - -abstract class LazyTile(implicit p: Parameters) extends LazyModule { - val nCachedTileLinkPorts = p(NCachedTileLinkPorts) - val nUncachedTileLinkPorts = p(NUncachedTileLinkPorts) +class RocketTile(implicit p: Parameters) extends LazyModule { val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) - val bc = TileBundleConfig( - nCachedTileLinkPorts = nCachedTileLinkPorts, - nUncachedTileLinkPorts = nUncachedTileLinkPorts, - xLen = p(XLen)) + val icacheParams = p.alterPartial({ case CacheName => "L1I" }) - val module: TileImp - val slave: Option[TLInputNode] -} - -class RocketTile(implicit p: Parameters) extends LazyTile { - val slave = if (p(DataScratchpadSize) == 0) None else Some(TLInputNode()) + val slaveNode = if (p(DataScratchpadSize) == 0) None else Some(TLInputNode()) val scratch = if (p(DataScratchpadSize) == 0) None else Some(LazyModule(new ScratchpadSlavePort()(dcacheParams))) + val dcache = HellaCache(p(DCacheKey))(dcacheParams) + val ucLegacy = LazyModule(new TLLegacy()(p)) - (slave zip scratch) foreach { case (node, lm) => lm.node := TLFragmenter(p(XLen)/8, p(CacheBlockBytes))(node) } + (slaveNode zip scratch) foreach { case (node, lm) => lm.node := TLFragmenter(p(XLen)/8, p(CacheBlockBytes))(node) } + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val cached = dcache.node.bundleOut + val uncached = ucLegacy.node.bundleOut + val slave = slaveNode.map(_.bundleIn) + val hartid = UInt(INPUT, p(XLen)) + val interrupts = new TileInterrupts().asInput + val resetVector = UInt(INPUT, p(XLen)) + } - lazy val module = new TileImp(this) { - val io = new TileIO(bc, slave) val buildRocc = p(BuildRoCC) val usingRocc = !buildRocc.isEmpty val nRocc = buildRocc.size val nFPUPorts = buildRocc.filter(_.useFPU).size val core = Module(new Rocket) - val icache = Module(new Frontend()(p.alterPartial({ case CacheName => "L1I" }))) - val dcache = HellaCache(p(DCacheKey))(dcacheParams) + val icache = Module(new Frontend()(icacheParams)) - val ptwPorts = collection.mutable.ArrayBuffer(icache.io.ptw, dcache.ptw) - val dcPorts = collection.mutable.ArrayBuffer(core.io.dmem) - val uncachedArbPorts = collection.mutable.ArrayBuffer(icache.io.mem) - val uncachedPorts = collection.mutable.ArrayBuffer[ClientUncachedTileLinkIO]() - val cachedPorts = collection.mutable.ArrayBuffer(dcache.mem) + val ptwPorts = ListBuffer(icache.io.ptw, dcache.module.io.ptw) + val dcPorts = ListBuffer(core.io.dmem) + val uncachedArbPorts = ListBuffer(icache.io.mem) core.io.interrupts := io.interrupts core.io.hartid := io.hartid icache.io.cpu <> core.io.imem @@ -129,19 +105,12 @@ class RocketTile(implicit p: Parameters) extends LazyTile { respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) ptwPorts ++= roccs.flatMap(_.io.ptw) - uncachedPorts ++= roccs.flatMap(_.io.utl) + uncachedArbPorts ++= roccs.flatMap(_.io.utl) // TODO no difference between io.autl and io.utl for now } val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size)) uncachedArb.io.in <> uncachedArbPorts - uncachedArb.io.out +=: uncachedPorts - - // Connect the caches and RoCC to the outer memory system - io.uncached <> uncachedPorts - io.cached <> cachedPorts - // TODO remove nCached/nUncachedTileLinkPorts parameters and these assertions - require(uncachedPorts.size == nUncachedTileLinkPorts) - require(cachedPorts.size == nCachedTileLinkPorts) + ucLegacy.module.io.legacy <> uncachedArb.io.out if (p(UseVM)) { val ptw = Module(new PTW(ptwPorts.size)(dcacheParams)) @@ -155,7 +124,7 @@ class RocketTile(implicit p: Parameters) extends LazyTile { require(dcPorts.size == core.dcacheArbPorts) val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams)) dcArb.io.requestor <> dcPorts - dcache.cpu <> dcArb.io.mem + dcache.module.io.cpu <> dcArb.io.mem if (nFPUPorts == 0) { fpuOpt.foreach { fpu => diff --git a/src/main/scala/uncore/agents/Cache.scala b/src/main/scala/uncore/agents/Cache.scala index 47b3628e..d0ef95cd 100644 --- a/src/main/scala/uncore/agents/Cache.scala +++ b/src/main/scala/uncore/agents/Cache.scala @@ -130,7 +130,6 @@ class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy { abstract class Metadata(implicit p: Parameters) extends CacheBundle()(p) { val tag = Bits(width = tagBits) - val coh: CoherenceMetadata } class MetaReadReq(implicit p: Parameters) extends CacheBundle()(p) { diff --git a/src/main/scala/uncore/tilelink2/Bundles.scala b/src/main/scala/uncore/tilelink2/Bundles.scala index cee73942..46a45e07 100644 --- a/src/main/scala/uncore/tilelink2/Bundles.scala +++ b/src/main/scala/uncore/tilelink2/Bundles.scala @@ -43,6 +43,14 @@ object TLMessages def isD(x: UInt) = x <= ReleaseAck } +/** + * The three primary TileLink permissions are: + * (T)runk: the agent is (or is on the path to) the global point of serialization. + * (B)ranch: the agent + * (N)one: + * These permissions are permuted by transfer operations in various ways. + * Messages for + */ object TLPermissions { // Cap types (Grant = new permissions, Probe = permisions <= target) diff --git a/src/main/scala/uncore/tilelink2/Edges.scala b/src/main/scala/uncore/tilelink2/Edges.scala index a7cb2df6..2948d7e7 100644 --- a/src/main/scala/uncore/tilelink2/Edges.scala +++ b/src/main/scala/uncore/tilelink2/Edges.scala @@ -238,7 +238,10 @@ class TLEdgeOut( (legal, c) } - def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt) = { + def ProbeAck(b: TLBundleB, reportPermissions: UInt): TLBundleC = + ProbeAck(b.source, b.address, b.size, reportPermissions) + + def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt): TLBundleC = { val c = Wire(new TLBundleC(bundle)) c.opcode := TLMessages.ProbeAck c.param := reportPermissions @@ -250,7 +253,10 @@ class TLEdgeOut( c } - def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt, data: UInt) = { + def ProbeAck(b: TLBundleB, reportPermissions: UInt, data: UInt): TLBundleC = + ProbeAck(b.source, b.address, b.size, reportPermissions, data) + + def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt, data: UInt): TLBundleC = { val c = Wire(new TLBundleC(bundle)) c.opcode := TLMessages.ProbeAckData c.param := reportPermissions @@ -262,7 +268,8 @@ class TLEdgeOut( c } - def GrantAck(toSink: UInt) = { + def GrantAck(d: TLBundleD): TLBundleE = GrantAck(d.sink) + def GrantAck(toSink: UInt): TLBundleE = { val e = Wire(new TLBundleE(bundle)) e.sink := toSink e diff --git a/src/main/scala/uncore/tilelink2/Metadata.scala b/src/main/scala/uncore/tilelink2/Metadata.scala new file mode 100644 index 00000000..5e4598f9 --- /dev/null +++ b/src/main/scala/uncore/tilelink2/Metadata.scala @@ -0,0 +1,149 @@ +// See LICENSE for license details. + +package uncore.tilelink2 + +import Chisel._ +import chisel3.internal.sourceinfo.SourceInfo +import util._ +import uncore.constants.MemoryOpConstants + +object ClientStates { + val width = 2 + + val Nothing = UInt(0) + val Branch = UInt(1) + val Trunk = UInt(2) + val Dirty = UInt(3) + + def hasReadPermission(state: UInt): Bool = state > Nothing + def hasWritePermission(state: UInt): Bool = state > Branch +} + +object MemoryOpCategories extends MemoryOpConstants { + val wr = Cat(Bool(true), Bool(true)) // Op actually writes + val wi = Cat(Bool(false), Bool(true)) // Future op will write + val rd = Cat(Bool(false), Bool(false)) // Op only reads + + def categorize(cmd: UInt): UInt = Cat(isWrite(cmd), isWriteIntent(cmd)) +} + +/** Stores the client-side coherence information, + * such as permissions on the data and whether the data is dirty. + * Its API can be used to make TileLink messages in response to + * memory operations, cache control oeprations, or Probe messages. + */ +class ClientMetadata extends Bundle { + /** Actual state information stored in this bundle */ + val state = UInt(width = ClientStates.width) + + /** Metadata equality */ + def ===(rhs: UInt): Bool = state === rhs + def ===(rhs: ClientMetadata): Bool = state === rhs.state + def =/=(rhs: ClientMetadata): Bool = !this.===(rhs) + + /** Is the block's data present in this cache */ + def isValid(dummy: Int = 0): Bool = state > ClientStates.Nothing + + /** Determine whether this cmd misses, and the new state (on hit) or param to be sent (on miss) */ + private def growStarter(cmd: UInt): (Bool, UInt) = { + import MemoryOpCategories._ + import TLPermissions._ + import ClientStates._ + MuxTLookup(Cat(categorize(cmd), state), (Bool(false), UInt(0)), Seq( + //(effect, am now) -> (was a hit, next) + Cat(rd, Dirty) -> (Bool(true), Dirty), + Cat(rd, Trunk) -> (Bool(true), Trunk), + Cat(rd, Branch) -> (Bool(true), Branch), + Cat(wi, Dirty) -> (Bool(true), Dirty), + Cat(wi, Trunk) -> (Bool(true), Trunk), + Cat(wr, Dirty) -> (Bool(true), Dirty), + Cat(wr, Trunk) -> (Bool(true), Dirty), + //(effect, am now) -> (was a miss, param) + Cat(rd, Nothing) -> (Bool(false), NtoB), + Cat(wi, Branch) -> (Bool(false), BtoT), + Cat(wi, Nothing) -> (Bool(false), NtoT), + Cat(wr, Branch) -> (Bool(false), BtoT), + Cat(wr, Nothing) -> (Bool(false), NtoT))) + } + + /** Determine what state to go to after miss based on Grant param */ + private def growFinisher(cmd: UInt, param: UInt): UInt = { + import MemoryOpCategories._ + import TLPermissions._ + import ClientStates._ + MuxLookup(Cat(categorize(cmd), param), UInt(0), Seq( + //(effect param) -> (next) + Cat(rd, toB) -> Branch, + Cat(rd, toT) -> Trunk, + Cat(wi, toT) -> Trunk, + Cat(wr, toT) -> Dirty)) + } + + + /** Does a secondary miss on the block require another Acquire message */ + def requiresAcquireOnSecondaryMiss(first_cmd: UInt, second_cmd: UInt): Bool = { + import MemoryOpCategories._ + isWriteIntent(second_cmd) && !isWriteIntent(first_cmd) + } + + /** Does this cache have permissions on this block sufficient to perform op, + * and what to do next (Acquire message param or updated metadata). */ + def onAccess(cmd: UInt): (Bool, UInt, ClientMetadata) = { + val r = growStarter(cmd) + (r._1, r._2, ClientMetadata(r._2)) + } + + /** Metadata change on a returned Grant */ + def onGrant(cmd: UInt, param: UInt): ClientMetadata = ClientMetadata(growFinisher(cmd, param)) + + /** Determine what state to go to based on Probe param */ + private def shrinkHelper(param: UInt): (Bool, UInt, UInt) = { + import ClientStates._ + import TLPermissions._ + MuxTLookup(Cat(param, state), (Bool(false), UInt(0), UInt(0)), Seq( + //(wanted, am now) -> (dirtyWB resp, next) + Cat(toT, Dirty) -> (Bool(true), TtoT, Trunk), + Cat(toT, Trunk) -> (Bool(false), TtoT, Trunk), + Cat(toT, Branch) -> (Bool(false), BtoB, Branch), + Cat(toT, Nothing) -> (Bool(false), NtoN, Nothing), + Cat(toB, Dirty) -> (Bool(true), TtoB, Branch), + Cat(toB, Trunk) -> (Bool(false), TtoB, Branch), // Policy: Don't notify on clean downgrade + Cat(toB, Branch) -> (Bool(false), BtoB, Branch), + Cat(toB, Nothing) -> (Bool(false), BtoN, Nothing), + Cat(toN, Dirty) -> (Bool(true), TtoN, Nothing), + Cat(toN, Trunk) -> (Bool(false), TtoN, Nothing), // Policy: Don't notify on clean downgrade + Cat(toN, Branch) -> (Bool(false), BtoN, Nothing), // Policy: Don't notify on clean downgrade + Cat(toN, Nothing) -> (Bool(false), NtoN, Nothing))) + } + + /** Translate cache control cmds into Probe param */ + private def cmdToPermCap(cmd: UInt): UInt = { + import MemoryOpCategories._ + import TLPermissions._ + MuxLookup(cmd, toN, Seq( + M_FLUSH -> toN, + M_PRODUCE -> toB, + M_CLEAN -> toT)) + } + + def onCacheControl(cmd: UInt): (Bool, UInt, ClientMetadata) = { + val r = shrinkHelper(cmdToPermCap(cmd)) + (r._1, r._2, ClientMetadata(r._3)) + } + + def onProbe(param: UInt): (Bool, UInt, ClientMetadata) = { + val r = shrinkHelper(param) + (Bool(true), r._2, ClientMetadata(r._3)) + } +} + +/** Factories for ClientMetadata, including on reset */ +object ClientMetadata { + def apply(perm: UInt) = { + val meta = Wire(new ClientMetadata) + meta.state := perm + meta + } + def onReset = ClientMetadata(ClientStates.Nothing) + def maximum = ClientMetadata(ClientStates.Dirty) +} diff --git a/src/main/scala/util/Misc.scala b/src/main/scala/util/Misc.scala index 6ce8583b..c1eec648 100644 --- a/src/main/scala/util/Misc.scala +++ b/src/main/scala/util/Misc.scala @@ -36,6 +36,23 @@ object MuxT { (Mux(cond, con._1, alt._1), Mux(cond, con._2, alt._2), Mux(cond, con._3, alt._3)) } +/** Creates a cascade of n MuxTs to search for a key value. */ +object MuxTLookup { + def apply[S <: UInt, T <: Data, U <: Data](key: S, default: (T, U), mapping: Seq[(S, (T, U))]): (T, U) = { + var res = default + for ((k, v) <- mapping.reverse) + res = MuxT(k === key, v, res) + res + } + + def apply[S <: UInt, T <: Data, U <: Data, W <: Data](key: S, default: (T, U, W), mapping: Seq[(S, (T, U, W))]): (T, U, W) = { + var res = default + for ((k, v) <- mapping.reverse) + res = MuxT(k === key, v, res) + res + } +} + object Str { def apply(s: String): UInt = {