From 1fea0460ba014847232f00c1ef8a3a0c602c2fe6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 12 Mar 2017 20:42:51 -0700 Subject: [PATCH] Support superpage entries in TLB --- src/main/scala/rocket/DCache.scala | 4 +- src/main/scala/rocket/Frontend.scala | 4 +- src/main/scala/rocket/NBDcache.scala | 4 +- src/main/scala/rocket/PTW.scala | 5 +- src/main/scala/rocket/TLB.scala | 118 +++++++++++++++------------ 5 files changed, 74 insertions(+), 61 deletions(-) diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index b3bdb447..8ed9ba0f 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -105,13 +105,13 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { io.ptw <> tlb.io.ptw tlb.io.req.valid := s1_valid_masked && s1_readwrite tlb.io.req.bits.passthrough := s1_req.phys - tlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits + tlb.io.req.bits.vaddr := s1_req.addr tlb.io.req.bits.instruction := false tlb.io.req.bits.store := s1_write when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false } when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true } - val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) + val s1_paddr = tlb.io.resp.paddr val s1_tag = Mux(s1_probe, probe_bits.address, s1_paddr)(paddrBits-1, untagBits) val s1_victim_way = Wire(init = replacer.way) val (s1_hit_way, s1_hit_state, s1_victim_meta) = diff --git a/src/main/scala/rocket/Frontend.scala b/src/main/scala/rocket/Frontend.scala index 483ba89a..b52339fb 100644 --- a/src/main/scala/rocket/Frontend.scala +++ b/src/main/scala/rocket/Frontend.scala @@ -129,7 +129,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) io.ptw <> tlb.io.ptw tlb.io.req.valid := !stall && !icmiss - tlb.io.req.bits.vpn := s1_pc >> pgIdxBits + tlb.io.req.bits.vaddr := s1_pc tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) tlb.io.req.bits.store := Bool(false) @@ -137,7 +137,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) icache.io.req.valid := !stall && !s0_same_block icache.io.req.bits.addr := io.cpu.npc icache.io.invalidate := io.cpu.flush_icache - icache.io.s1_paddr := Cat(tlb.io.resp.ppn, s1_pc(pgIdxBits-1, 0)) + icache.io.s1_paddr := tlb.io.resp.paddr icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb icache.io.s2_kill := s2_speculative && !s2_cacheable icache.io.resp.ready := !stall && !s1_same_block diff --git a/src/main/scala/rocket/NBDcache.scala b/src/main/scala/rocket/NBDcache.scala index 07de60a7..215d6f20 100644 --- a/src/main/scala/rocket/NBDcache.scala +++ b/src/main/scala/rocket/NBDcache.scala @@ -700,7 +700,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule io.ptw <> dtlb.io.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite dtlb.io.req.bits.passthrough := s1_req.phys - dtlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits + dtlb.io.req.bits.vaddr := s1_req.addr dtlb.io.req.bits.instruction := Bool(false) dtlb.io.req.bits.store := s1_write when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) } @@ -722,7 +722,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule when (s2_recycle) { s1_req := s2_req } - val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) + val s1_addr = dtlb.io.resp.paddr when (s1_clk_en) { s2_req.typ := s1_req.typ diff --git a/src/main/scala/rocket/PTW.scala b/src/main/scala/rocket/PTW.scala index e9f86d0a..bcfb1386 100644 --- a/src/main/scala/rocket/PTW.scala +++ b/src/main/scala/rocket/PTW.scala @@ -23,6 +23,7 @@ class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { val pte = new PTE + val level = UInt(width = log2Ceil(pgLevels)) } class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) { @@ -131,11 +132,11 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { io.mem.s1_kill := s1_kill io.mem.invalidate_lr := Bool(false) - val resp_ppns = (0 until pgLevels-1).map(i => Cat(pte_addr >> (pgIdxBits + pgLevelBits*(pgLevels-i-1)), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ (pte_addr >> pgIdxBits) for (i <- 0 until io.requestor.size) { io.requestor(i).resp.valid := resp_valid && (r_req_dest === i) io.requestor(i).resp.bits.pte := r_pte - io.requestor(i).resp.bits.pte.ppn := resp_ppns(count) + io.requestor(i).resp.bits.level := count + io.requestor(i).resp.bits.pte.ppn := pte_addr >> pgIdxBits io.requestor(i).ptbr := io.dpath.ptbr io.requestor(i).invalidate := io.dpath.invalidate io.requestor(i).status := io.dpath.status diff --git a/src/main/scala/rocket/TLB.scala b/src/main/scala/rocket/TLB.scala index 22ed4c57..df8e320b 100644 --- a/src/main/scala/rocket/TLB.scala +++ b/src/main/scala/rocket/TLB.scala @@ -17,7 +17,7 @@ case object PgLevels extends Field[Int] case object ASIdBits extends Field[Int] class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { - val vpn = UInt(width = vpnBitsExtended) + val vaddr = UInt(width = vaddrBitsExtended) val passthrough = Bool() val instruction = Bool() val store = Bool() @@ -26,7 +26,7 @@ class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { // lookup responses val miss = Bool(OUTPUT) - val ppn = UInt(OUTPUT, ppnBits) + val paddr = UInt(OUTPUT, paddrBits) val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT) @@ -39,31 +39,33 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod val resp = new TLBResp val ptw = new TLBPTWIO } - val cacheBlockBytes = p(CacheBlockBytes) - val camAddrBits = log2Ceil(entries) - val camTagBits = asIdBits + vpnBits - - val valid = Reg(init = UInt(0, entries)) - val ppns = Reg(Vec(entries, UInt(width = ppnBits))) - val tags = Reg(Vec(entries, UInt(width = asIdBits + vpnBits))) + val totalEntries = entries + 1 + val normalEntries = entries + val specialEntry = entries + val valid = Reg(init = UInt(0, totalEntries)) + val ppns = Reg(Vec(totalEntries, UInt(width = ppnBits))) + val tags = Reg(Vec(totalEntries, UInt(width = asIdBits + vpnBits))) + val levels = Reg(Vec(totalEntries, UInt(width = log2Ceil(pgLevels)))) val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4) val state = Reg(init=s_ready) val r_refill_tag = Reg(UInt(width = asIdBits + vpnBits)) - val r_refill_waddr = Reg(UInt(width = log2Ceil(entries))) + val r_refill_waddr = Reg(UInt(width = log2Ceil(normalEntries))) val r_req = Reg(new TLBReq) val do_mprv = io.ptw.status.mprv && !io.req.bits.instruction val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv) val priv_s = priv === PRV.S val priv_uses_vm = priv <= PRV.S && !io.ptw.status.debug + val vm_enabled = Bool(usingVM) && io.ptw.ptbr.mode(io.ptw.ptbr.mode.getWidth-1) && priv_uses_vm && !io.req.bits.passthrough // share a single physical memory attribute checker (unshare if critical path) - val passthrough_ppn = io.req.bits.vpn(ppnBits-1, 0) + val (vpn, pgOffset) = Split(io.req.bits.vaddr, pgIdxBits) val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0) val do_refill = Bool(usingVM) && io.ptw.resp.valid - val mpu_ppn = Mux(do_refill, refill_ppn, passthrough_ppn) - val mpu_physaddr = mpu_ppn << pgIdxBits + val mpu_ppn = Mux(do_refill, refill_ppn, + Mux(vm_enabled, ppns.last, vpn(ppnBits-1, 0))) + val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0)) val legal_address = edge.manager.findSafe(mpu_physaddr).reduce(_||_) def fastCheck(member: TLManagerParameters => Boolean) = legal_address && Mux1H(edge.manager.findFast(mpu_physaddr), edge.manager.managers.map(m => Bool(member(m)))) @@ -71,65 +73,75 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod val prot_w = fastCheck(_.supportsPutFull) val prot_x = fastCheck(_.executable) val cacheable = fastCheck(_.supportsAcquireB) - val xferSizes = TransferSizes(cacheBlockBytes, cacheBlockBytes) - val allSizes = TransferSizes(1, cacheBlockBytes) - val amoSizes = TransferSizes(1, xLen/8) - edge.manager.managers.foreach { m => - require (m.minAlignment >= 4096, s"MemoryMap region ${m.name} must be page-aligned (is ${m.minAlignment})") - require (!m.supportsGet || m.supportsGet .contains(allSizes), s"MemoryMap region ${m.name} only supports ${m.supportsGet} Get, but must support ${allSizes}") - require (!m.supportsPutFull || m.supportsPutFull .contains(allSizes), s"MemoryMap region ${m.name} only supports ${m.supportsPutFull} PutFull, but must support ${allSizes}") - require (!m.supportsAcquireB || m.supportsAcquireB .contains(xferSizes), s"MemoryMap region ${m.name} only supports ${m.supportsAcquireB} AcquireB, but must support ${xferSizes}") - require (!m.supportsAcquireT || m.supportsAcquireT .contains(xferSizes), s"MemoryMap region ${m.name} only supports ${m.supportsAcquireT} AcquireT, but must support ${xferSizes}") - require (!m.supportsLogical || m.supportsLogical .contains(amoSizes), s"MemoryMap region ${m.name} only supports ${m.supportsLogical} Logical, but must support ${amoSizes}") - require (!m.supportsArithmetic || m.supportsArithmetic.contains(amoSizes), s"MemoryMap region ${m.name} only supports ${m.supportsArithmetic} Arithmetic, but must support ${amoSizes}") - require (m.supportsAcquireT || !m.supportsPutFull || !m.supportsAcquireB, s"MemoryMap region ${m.name} supports PutFull and AcquireB but not AcquireT") + val isSpecial = { + val homogeneous = Wire(init = false.B) + for (i <- 0 until pgLevels) { + println(BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits))) + when (io.ptw.resp.bits.level === i) { homogeneous := TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)))(mpu_physaddr).homogeneous } + } + !homogeneous } - val lookup_tag = Cat(io.ptw.ptbr.asid, io.req.bits.vpn(vpnBits-1,0)) - val vm_enabled = Bool(usingVM) && io.ptw.ptbr.mode(io.ptw.ptbr.mode.getWidth-1) && priv_uses_vm && !io.req.bits.passthrough - val hitsVec = (0 until entries).map(i => valid(i) && vm_enabled && tags(i) === lookup_tag) :+ !vm_enabled + val lookup_tag = Cat(io.ptw.ptbr.asid, vpn(vpnBits-1,0)) + val hitsVec = (0 until totalEntries).map { i => vm_enabled && { + var tagMatch = valid(i) + for (j <- 0 until pgLevels) { + val base = vpnBits - (j + 1) * pgLevelBits + tagMatch = tagMatch && (levels(i) < j || tags(i)(base + pgLevelBits - 1, base) === vpn(base + pgLevelBits - 1, base)) + } + tagMatch + }} :+ !vm_enabled val hits = hitsVec.asUInt + val level = Mux1H(hitsVec.init, levels) + val partialPPN = Mux1H(hitsVec.init, ppns) + val ppn = { + var ppn = Mux(vm_enabled, partialPPN, vpn)(pgLevelBits*pgLevels - 1, pgLevelBits*(pgLevels - 1)) + for (i <- 1 until pgLevels) + ppn = Cat(ppn, (Mux(level < i, vpn, 0.U) | partialPPN)(vpnBits - i*pgLevelBits - 1, vpnBits - (i + 1)*pgLevelBits)) + ppn + } // permission bit arrays - val pte_array = Reg(new PTE) - val u_array = Reg(UInt(width = entries)) // user permission - val sw_array = Reg(UInt(width = entries)) // write permission - val sx_array = Reg(UInt(width = entries)) // execute permission - val sr_array = Reg(UInt(width = entries)) // read permission - val xr_array = Reg(UInt(width = entries)) // read permission to executable page - val cash_array = Reg(UInt(width = entries)) // cacheable + val u_array = Reg(UInt(width = totalEntries)) // user permission + val sw_array = Reg(UInt(width = totalEntries)) // write permission + val sx_array = Reg(UInt(width = totalEntries)) // execute permission + val sr_array = Reg(UInt(width = totalEntries)) // read permission + val xr_array = Reg(UInt(width = totalEntries)) // read permission to executable page + val cash_array = Reg(UInt(width = normalEntries)) // cacheable when (do_refill) { + val waddr = Mux(isSpecial, specialEntry.U, r_refill_waddr) val pte = io.ptw.resp.bits.pte - ppns(r_refill_waddr) := pte.ppn - tags(r_refill_waddr) := r_refill_tag + ppns(waddr) := pte.ppn + tags(waddr) := r_refill_tag + levels(waddr) := io.ptw.resp.bits.level - val mask = UIntToOH(r_refill_waddr) + val mask = UIntToOH(waddr) valid := valid | mask u_array := Mux(pte.u, u_array | mask, u_array & ~mask) - sw_array := Mux(pte.sw() && prot_w, sw_array | mask, sw_array & ~mask) - sx_array := Mux(pte.sx() && prot_x, sx_array | mask, sx_array & ~mask) - sr_array := Mux(pte.sr() && prot_r, sr_array | mask, sr_array & ~mask) - xr_array := Mux(pte.sx() && prot_r, xr_array | mask, xr_array & ~mask) + sw_array := Mux(pte.sw() && (isSpecial || prot_w), sw_array | mask, sw_array & ~mask) + sx_array := Mux(pte.sx() && (isSpecial || prot_x), sx_array | mask, sx_array & ~mask) + sr_array := Mux(pte.sr() && (isSpecial || prot_r), sr_array | mask, sr_array & ~mask) + xr_array := Mux(pte.sx() && (isSpecial || prot_r), xr_array | mask, xr_array & ~mask) cash_array := Mux(cacheable, cash_array | mask, cash_array & ~mask) } - val plru = new PseudoLRU(entries) + val plru = new PseudoLRU(normalEntries) val repl_waddr = Mux(!valid.andR, PriorityEncoder(~valid), plru.replace) val priv_ok = Mux(priv_s, ~Mux(io.ptw.status.pum, u_array, UInt(0)), u_array) - val w_array = Cat(prot_w, priv_ok & sw_array) - val x_array = Cat(prot_x, priv_ok & sx_array) - val r_array = Cat(prot_r | (prot_x & io.ptw.status.mxr), priv_ok & (sr_array | Mux(io.ptw.status.mxr, xr_array, UInt(0)))) - val c_array = Cat(cacheable, cash_array) + val w_array = Cat(prot_w, priv_ok & ~(~prot_w << specialEntry) & sw_array) + val x_array = Cat(prot_x, priv_ok & ~(~prot_x << specialEntry) & sx_array) + val r_array = Cat(prot_r, priv_ok & ~(~prot_r << specialEntry) & (sr_array | Mux(io.ptw.status.mxr, xr_array, UInt(0)))) + val c_array = Cat(cacheable, cacheable, cash_array) val bad_va = if (vpnBits == vpnBitsExtended) Bool(false) - else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1) - val tlb_hit = hits(entries-1, 0).orR + else vpn(vpnBits) =/= vpn(vpnBits-1) + val tlb_hit = hits(totalEntries-1, 0).orR val tlb_miss = vm_enabled && !bad_va && !tlb_hit - when (io.req.valid && !tlb_miss) { - plru.access(OHToUInt(hits(entries-1, 0))) + when (io.req.valid && !tlb_miss && !hits(specialEntry)) { + plru.access(OHToUInt(hits(normalEntries-1, 0))) } // Superpages create the possibility that two entries in the TLB may match. @@ -137,7 +149,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod // we must return either the old translation or the new translation. This // isn't compatible with the Mux1H approach. So, flush the TLB and report // a miss on duplicate entries. - val multipleHits = PopCountAtLeast(hits(entries-1, 0), 2) + val multipleHits = PopCountAtLeast(hits(totalEntries-1, 0), 2) io.req.ready := state === s_ready io.resp.xcpt_ld := bad_va || (~r_array & hits).orR @@ -145,7 +157,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod io.resp.xcpt_if := bad_va || (~x_array & hits).orR io.resp.cacheable := (c_array & hits).orR io.resp.miss := do_refill || tlb_miss || multipleHits - io.resp.ppn := Mux1H(hitsVec, ppns :+ passthrough_ppn) + io.resp.paddr := Cat(ppn, pgOffset) io.ptw.req.valid := state === s_request io.ptw.req.bits <> io.ptw.status