From d203c4c654bbfa3dcb7e367c83def35f703cce8a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 12 Apr 2017 21:49:37 -0700 Subject: [PATCH] Check AMO operation legality in TLB --- src/main/scala/rocket/DCache.scala | 15 +++++++-------- src/main/scala/rocket/NBDcache.scala | 10 ++-------- src/main/scala/rocket/TLB.scala | 26 +++++++++++++++++++++----- src/main/scala/uncore/Consts.scala | 5 ++++- 4 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index 288faa33..52759168 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -115,6 +115,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { tlb.io.req.bits.instruction := false tlb.io.req.bits.store := s1_write tlb.io.req.bits.size := s1_req.typ + tlb.io.req.bits.cmd := s1_req.cmd when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false } when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true } @@ -187,14 +188,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { } // exceptions - val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes) - val no_xcpt = Bool(usingDataScratchpad) && s1_req.phys /* slave port */ && s1_hit_state.isValid() - io.cpu.xcpt.ma.ld := !no_xcpt && s1_read && s1_storegen.misaligned - io.cpu.xcpt.ma.st := !no_xcpt && s1_write && s1_storegen.misaligned - io.cpu.xcpt.pf.ld := !no_xcpt && s1_read && tlb.io.resp.pf.ld - io.cpu.xcpt.pf.st := !no_xcpt && s1_write && tlb.io.resp.pf.st - io.cpu.xcpt.ae.ld := !no_xcpt && s1_read && tlb.io.resp.ae.ld - io.cpu.xcpt.ae.st := !no_xcpt && s1_write && tlb.io.resp.ae.st + io.cpu.xcpt := tlb.io.resp + if (usingDataScratchpad) { + val no_xcpt = s1_req.phys /* slave port */ && s1_hit_state.isValid() + when (no_xcpt) { io.cpu.xcpt := 0.U.asTypeOf(io.cpu.xcpt) } + } // load reservations val s2_lr = Bool(usingAtomics) && s2_req.cmd === M_XLR @@ -247,6 +245,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_storegen.mask) << pstore_mask_shift // store->load RAW hazard detection + val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes) val s1_idx = s1_req.addr(idxMSB, wordOffBits) val s1_raw_hazard = s1_read && ((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_storegen.mask & s1_storegen.mask).orR) || diff --git a/src/main/scala/rocket/NBDcache.scala b/src/main/scala/rocket/NBDcache.scala index b840f527..da35ba97 100644 --- a/src/main/scala/rocket/NBDcache.scala +++ b/src/main/scala/rocket/NBDcache.scala @@ -699,6 +699,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule val dtlb = Module(new TLB(log2Ceil(coreDataBytes), nTLBEntries)) io.ptw <> dtlb.io.ptw + io.cpu.xcpt := dtlb.io.resp dtlb.io.req.valid := s1_valid && !io.cpu.s1_kill && (s1_readwrite || s1_sfence) dtlb.io.req.bits.sfence.valid := s1_sfence dtlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0) @@ -709,6 +710,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule dtlb.io.req.bits.instruction := Bool(false) dtlb.io.req.bits.store := s1_write dtlb.io.req.bits.size := s1_req.typ + dtlb.io.req.bits.cmd := s1_req.cmd when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) } when (io.cpu.req.valid) { @@ -742,14 +744,6 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule s2_req.cmd := s1_req.cmd } - val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned - io.cpu.xcpt.ma.ld := s1_read && misaligned - io.cpu.xcpt.ma.st := s1_write && misaligned - io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.pf.ld - io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.pf.st - io.cpu.xcpt.ae.ld := s1_read && dtlb.io.resp.ae.ld - io.cpu.xcpt.ae.st := s1_write && dtlb.io.resp.ae.st - // tags def onReset = L1Metadata(UInt(0), ClientMetadata.onReset) val meta = Module(new L1MetadataArray(onReset _)) diff --git a/src/main/scala/rocket/TLB.scala b/src/main/scala/rocket/TLB.scala index 1ad05b67..2daae155 100644 --- a/src/main/scala/rocket/TLB.scala +++ b/src/main/scala/rocket/TLB.scala @@ -10,6 +10,7 @@ import diplomacy._ import coreplex.CacheBlockBytes import tile.{XLen, CoreModule, CoreBundle} import uncore.tilelink2._ +import uncore.constants._ import util._ case object PAddrBits extends Field[Int] @@ -29,6 +30,7 @@ class TLBReq(lgMaxSize: Int)(implicit p: Parameters) extends CoreBundle()(p) { val store = Bool() val sfence = Valid(new SFenceReq) val size = UInt(width = log2Ceil(lgMaxSize + 1)) + val cmd = Bits(width = M_SZ) override def cloneType = new TLBReq(lgMaxSize).asInstanceOf[this.type] } @@ -45,6 +47,7 @@ class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { val paddr = UInt(width = paddrBits) val pf = new TLBExceptions val ae = new TLBExceptions + val ma = new TLBExceptions val cacheable = Bool() } @@ -68,6 +71,8 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters val pw = Bool() val px = Bool() val pr = Bool() + val pal = Bool() // AMO logical + val paa = Bool() // AMO arithmetic val c = Bool() } @@ -105,10 +110,12 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters val legal_address = edge.manager.findSafe(mpu_physaddr).reduce(_||_) def fastCheck(member: TLManagerParameters => Boolean) = legal_address && Mux1H(edge.manager.findFast(mpu_physaddr), edge.manager.managers.map(m => Bool(member(m)))) + val cacheable = fastCheck(_.supportsAcquireB) val prot_r = fastCheck(_.supportsGet) && pmp.io.r val prot_w = fastCheck(_.supportsPutFull) && pmp.io.w + val prot_al = fastCheck(_.supportsLogical) || cacheable + val prot_aa = fastCheck(_.supportsArithmetic) || cacheable val prot_x = fastCheck(_.executable) && pmp.io.x - val cacheable = fastCheck(_.supportsAcquireB) val isSpecial = !(io.ptw.resp.bits.homogeneous || io.ptw.resp.bits.ae) val lookup_tag = Cat(io.ptw.ptbr.asid, vpn(vpnBits-1,0)) @@ -151,6 +158,8 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters newEntry.pr := prot_r && !io.ptw.resp.bits.ae newEntry.pw := prot_w && !io.ptw.resp.bits.ae newEntry.px := prot_x && !io.ptw.resp.bits.ae + newEntry.pal := prot_al + newEntry.paa := prot_aa valid := valid | UIntToOH(waddr) reg_entries(waddr) := newEntry.asUInt @@ -166,8 +175,12 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters val pr_array = Cat(Fill(2, prot_r), entries.init.map(_.pr).asUInt) val pw_array = Cat(Fill(2, prot_w), entries.init.map(_.pw).asUInt) val px_array = Cat(Fill(2, prot_x), entries.init.map(_.px).asUInt) + val paa_array = Cat(Fill(2, prot_aa), entries.init.map(_.paa).asUInt) + val pal_array = Cat(Fill(2, prot_al), entries.init.map(_.pal).asUInt) val c_array = Cat(Fill(2, cacheable), entries.init.map(_.c).asUInt) + val ae_st_array = ~pw_array | Mux(isAMOLogical(io.req.bits.cmd), ~pal_array, 0.U) | Mux(isAMOArithmetic(io.req.bits.cmd), ~paa_array, 0.U) + val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1)).orR val bad_va = if (vpnBits == vpnBitsExtended) Bool(false) else vpn(vpnBits) =/= vpn(vpnBits-1) @@ -186,12 +199,15 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters val multipleHits = PopCountAtLeast(hits(totalEntries-1, 0), 2) io.req.ready := state === s_ready - io.resp.pf.ld := bad_va || (~r_array & hits).orR - io.resp.pf.st := bad_va || (~w_array & hits).orR + io.resp.pf.ld := (bad_va || (~r_array & hits).orR) && isRead(io.req.bits.cmd) + io.resp.pf.st := (bad_va || (~w_array & hits).orR) && isWrite(io.req.bits.cmd) io.resp.pf.inst := bad_va || (~x_array & hits).orR - io.resp.ae.ld := (~pr_array & hits).orR - io.resp.ae.st := (~pw_array & hits).orR + io.resp.ae.ld := (~pr_array & hits).orR && isRead(io.req.bits.cmd) + io.resp.ae.st := (ae_st_array & hits).orR && isWrite(io.req.bits.cmd) io.resp.ae.inst := (~px_array & hits).orR + io.resp.ma.ld := misaligned && isRead(io.req.bits.cmd) + io.resp.ma.st := misaligned && isWrite(io.req.bits.cmd) + io.resp.ma.inst := false // this is up to the pipeline to figure out io.resp.cacheable := (c_array & hits).orR io.resp.miss := do_refill || tlb_miss || multipleHits io.resp.paddr := Cat(ppn, pgOffset) diff --git a/src/main/scala/uncore/Consts.scala b/src/main/scala/uncore/Consts.scala index bf7c1121..4f0c7297 100644 --- a/src/main/scala/uncore/Consts.scala +++ b/src/main/scala/uncore/Consts.scala @@ -4,6 +4,7 @@ package uncore package constants import Chisel._ +import _root_.util._ object MemoryOpConstants extends MemoryOpConstants trait MemoryOpConstants { @@ -31,7 +32,9 @@ trait MemoryOpConstants { def M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions def M_SFENCE = UInt("b10100") // flush TLB - def isAMO(cmd: UInt) = cmd(3) || cmd === M_XA_SWAP + def isAMOLogical(cmd: UInt) = cmd.isOneOf(M_XA_SWAP, M_XA_XOR, M_XA_OR, M_XA_AND) + def isAMOArithmetic(cmd: UInt) = cmd.isOneOf(M_XA_ADD, M_XA_MIN, M_XA_MAX, M_XA_MINU, M_XA_MAXU) + def isAMO(cmd: UInt) = isAMOLogical(cmd) || isAMOArithmetic(cmd) def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW def isRead(cmd: UInt) = cmd === M_XRD || cmd === M_XLR || cmd === M_XSC || isAMO(cmd) def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_XSC || isAMO(cmd)