diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index 993014e2..98e89a9f 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -146,6 +146,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { tlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0) tlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1) tlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data.data + tlb.io.req.bits.sfence.bits.addr := s1_req.addr tlb.io.req.bits.passthrough := s1_req.phys tlb.io.req.bits.vaddr := s1_req.addr tlb.io.req.bits.instruction := false diff --git a/src/main/scala/rocket/NBDcache.scala b/src/main/scala/rocket/NBDcache.scala index aad94ef4..8d5612d3 100644 --- a/src/main/scala/rocket/NBDcache.scala +++ b/src/main/scala/rocket/NBDcache.scala @@ -708,6 +708,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule dtlb.io.req.bits.sfence.valid := s1_sfence dtlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0) dtlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1) + dtlb.io.req.bits.sfence.bits.addr := s1_req.addr dtlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data.data dtlb.io.req.bits.passthrough := s1_req.phys dtlb.io.req.bits.vaddr := s1_req.addr diff --git a/src/main/scala/rocket/PTW.scala b/src/main/scala/rocket/PTW.scala index 37ec12f7..60484eca 100644 --- a/src/main/scala/rocket/PTW.scala +++ b/src/main/scala/rocket/PTW.scala @@ -11,6 +11,7 @@ import coreplex.CacheBlockBytes import uncore.constants._ import uncore.tilelink2._ import util._ +import uncore.util.ParityCode import scala.collection.mutable.ListBuffer @@ -37,7 +38,7 @@ class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) with HasRocketCoreParameters { val ptbr = new PTBR().asInput - val invalidate = Bool(INPUT) + val sfence = Valid(new SFenceReq).flip val status = new MStatus().asInput val pmp = Vec(nPMPs, new PMP).asInput } @@ -125,17 +126,69 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( data(r) := pte.ppn } when (hit && state === s_req) { plru.access(OHToUInt(hits)) } - when (io.dpath.invalidate) { valid := 0 } + when (io.dpath.sfence.valid && !io.dpath.sfence.bits.rs1) { valid := 0 } (hit && count < pgLevels-1, Mux1H(hits, data)) } + + val l2_refill = RegNext(false.B) + val (l2_hit, l2_pte) = if (coreParams.nL2TLBEntries == 0) (false.B, Wire(new PTE)) else { + class Entry extends Bundle { + val ppn = UInt(width = ppnBits) + val d = Bool() + val a = Bool() + val u = Bool() + val x = Bool() + val w = Bool() + val r = Bool() + } + + val code = new ParityCode + require(isPow2(coreParams.nL2TLBEntries)) + val idxBits = log2Ceil(coreParams.nL2TLBEntries) + val tagBits = vpnBits - idxBits + val ram = SeqMem(coreParams.nL2TLBEntries, UInt(width = code.width(new Entry().getWidth + tagBits))) + val g = Reg(UInt(width = coreParams.nL2TLBEntries)) + val valid = RegInit(UInt(0, coreParams.nL2TLBEntries)) + val (r_tag, r_idx) = Split(r_req.addr, idxBits) + when (l2_refill) { + val entry = Wire(new Entry) + entry := r_pte + ram.write(r_idx, code.encode(Cat(entry.asUInt, r_tag))) + + val mask = UIntToOH(r_idx) + valid := valid | mask + g := Mux(r_pte.g, g | mask, g & ~mask) + } + when (io.dpath.sfence.valid) { + valid := + Mux(io.dpath.sfence.bits.rs1, valid & ~UIntToOH(io.dpath.sfence.bits.addr(idxBits+pgIdxBits-1, pgIdxBits)), + Mux(io.dpath.sfence.bits.rs2, valid & g, 0.U)) + } + + val s0_valid = !l2_refill && arb.io.out.fire() + val s1_valid = RegNext(s0_valid) + val s2_valid = RegNext(s1_valid && valid(r_idx)) + val s1_rdata = ram.read(arb.io.out.bits.addr(idxBits-1, 0), s0_valid) + val s2_rdata = code.decode(RegEnable(s1_rdata, s1_valid)) + when (s2_valid && s2_rdata.error) { valid := 0.U } + + val (s2_entry, s2_tag) = Split(s2_rdata.uncorrected, tagBits) + val s2_hit = s2_valid && !s2_rdata.error && r_tag === s2_tag + val s2_pte = Wire(new PTE) + s2_pte := s2_entry.asTypeOf(new Entry) + s2_pte.g := g(r_idx) + s2_pte.v := true + + (s2_hit, s2_pte) + } - io.mem.req.valid := state === s_req + io.mem.req.valid := state === s_req && !l2_hit io.mem.req.bits.phys := Bool(true) io.mem.req.bits.cmd := M_XRD io.mem.req.bits.typ := log2Ceil(xLen/8) io.mem.req.bits.addr := pte_addr - io.mem.s1_kill := s1_kill + io.mem.s1_kill := s1_kill || l2_hit io.mem.invalidate_lr := Bool(false) val pmaPgLevelHomogeneous = (0 until pgLevels) map { i => @@ -159,7 +212,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( // control state machine switch (state) { is (s_ready) { - when (arb.io.out.valid) { + when (arb.io.out.fire()) { state := s_req } count := UInt(0) @@ -186,6 +239,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( state := s_req count := count + 1 }.otherwise { + l2_refill := pte.v && !invalid_paddr && count === pgLevels-1 resp_ae := pte.v && invalid_paddr state := s_ready resp_valid(r_req_dest) := true @@ -198,6 +252,12 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( } } } + when (l2_hit) { + state := s_ready + resp_valid(r_req_dest) := true + resp_ae := false + r_pte := l2_pte + } } /** Mix-ins for constructing tiles that might have a PTW */ diff --git a/src/main/scala/rocket/Rocket.scala b/src/main/scala/rocket/Rocket.scala index 498c4a2f..07362570 100644 --- a/src/main/scala/rocket/Rocket.scala +++ b/src/main/scala/rocket/Rocket.scala @@ -25,6 +25,7 @@ case class RocketCoreParams( nPMPs: Int = 8, nPerfCounters: Int = 0, nCustomMRWCSRs: Int = 0, + nL2TLBEntries: Int = 0, mtvecInit: Option[BigInt] = Some(BigInt(0)), mtvecWritable: Boolean = true, fastLoadWord: Boolean = true, @@ -588,8 +589,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0) io.imem.sfence.bits.rs2 := wb_ctrl.mem_type(1) + io.imem.sfence.bits.addr := wb_reg_wdata io.imem.sfence.bits.asid := wb_reg_rs2 - io.ptw.invalidate := io.imem.sfence.valid && !io.imem.sfence.bits.rs1 + io.ptw.sfence := io.imem.sfence ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt diff --git a/src/main/scala/rocket/TLB.scala b/src/main/scala/rocket/TLB.scala index f127488c..197b5f06 100644 --- a/src/main/scala/rocket/TLB.scala +++ b/src/main/scala/rocket/TLB.scala @@ -20,6 +20,7 @@ case object ASIdBits extends Field[Int] class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) { val rs1 = Bool() val rs2 = Bool() + val addr = UInt(width = vaddrBits) val asid = UInt(width = asIdBits max 1) // TODO zero-width } @@ -252,6 +253,7 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters } when (sfence) { + assert((io.req.bits.sfence.bits.addr >> pgIdxBits) === vpn(vpnBits-1,0)) valid := Mux(io.req.bits.sfence.bits.rs1, valid & ~hits(totalEntries-1, 0), Mux(io.req.bits.sfence.bits.rs2, valid & entries.map(_.g).asUInt, 0)) } diff --git a/src/main/scala/tile/Core.scala b/src/main/scala/tile/Core.scala index 2befafad..31279ae7 100644 --- a/src/main/scala/tile/Core.scala +++ b/src/main/scala/tile/Core.scala @@ -24,6 +24,7 @@ trait CoreParams { val retireWidth: Int val instBits: Int val nLocalInterrupts: Int + val nL2TLBEntries: Int } trait HasCoreParameters extends HasTileParameters {