From cf168e419bfde4f9306961f316b789b27ab5cccf Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 14 Mar 2017 13:54:49 -0700 Subject: [PATCH] Support SFENCE.VMA rs1 argument This one's a little invasive. To flush a specific entry from the TLB, you need to reuse its CAM port. Since the TLB lookup can be on the critical path, we wish to avoid muxing in another address. This is simple on the data side, where the datapath already carries rs1 to the TLB (it's the same path as the AMO address calculation). It's trickier for the I$, where the TLB lookup address comes from the fetch stage PC. The trick is to temporarily redirect the PC to rs1, then redirect the PC again to the instruction after SFENCE.VMA. --- src/main/scala/groundtest/DummyPTW.scala | 1 - src/main/scala/rocket/CSR.scala | 6 +---- src/main/scala/rocket/DCache.scala | 9 +++++-- src/main/scala/rocket/Frontend.scala | 5 ++-- src/main/scala/rocket/IDecode.scala | 2 +- src/main/scala/rocket/NBDcache.scala | 9 +++++-- src/main/scala/rocket/PTW.scala | 2 -- src/main/scala/rocket/Rocket.scala | 31 ++++++++++++++++++------ src/main/scala/rocket/TLB.scala | 30 ++++++++++++++--------- src/main/scala/uncore/Consts.scala | 1 + 10 files changed, 61 insertions(+), 35 deletions(-) diff --git a/src/main/scala/groundtest/DummyPTW.scala b/src/main/scala/groundtest/DummyPTW.scala index e2d25397..680d7ff7 100644 --- a/src/main/scala/groundtest/DummyPTW.scala +++ b/src/main/scala/groundtest/DummyPTW.scala @@ -52,6 +52,5 @@ class DummyPTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { requestor.ptbr.mode := requestor.ptbr.pgLevelsToMode(pgLevels).U requestor.ptbr.asid := UInt(0) requestor.ptbr.ppn := UInt(0) - requestor.invalidate := Bool(false) } } diff --git a/src/main/scala/rocket/CSR.scala b/src/main/scala/rocket/CSR.scala index 3a7737cf..a12a965f 100644 --- a/src/main/scala/rocket/CSR.scala +++ b/src/main/scala/rocket/CSR.scala @@ -178,7 +178,6 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle val cause = UInt(INPUT, xLen) val pc = UInt(INPUT, vaddrBitsExtended) val badaddr = UInt(INPUT, vaddrBitsExtended) - val fatc = Bool(OUTPUT) val time = UInt(OUTPUT, xLen) val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip @@ -410,12 +409,10 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param val system_insn = io.rw.cmd === CSR.I val opcode = UInt(1) << io.rw.addr(2,0) - val insn_rs2 = io.rw.addr(5) - val insn_call = system_insn && !insn_rs2 && opcode(0) + val insn_call = system_insn && opcode(0) val insn_break = system_insn && opcode(1) val insn_ret = system_insn && opcode(2) val insn_wfi = system_insn && opcode(5) - val insn_sfence_vma = system_insn && insn_rs2 private def decodeAny(m: LinkedHashMap[Int,Bits]): Bool = m.map { case(k: Int, _: Bits) => io.decode.csr === k }.reduce(_||_) val allow_wfi = Bool(!usingVM) || effective_prv > PRV.S || !reg_mstatus.tw @@ -447,7 +444,6 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param val delegate = Bool(usingVM) && reg_mstatus.prv <= PRV.S && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800)) val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec)) - io.fatc := insn_sfence_vma io.evec := tvec io.ptbr := reg_sptbr io.eret := insn_call || insn_break || insn_ret diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index 6f4ec90d..17f26c10 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -73,6 +73,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) val s1_readwrite = s1_read || s1_write + val s1_sfence = s1_req.cmd === M_SFENCE val s1_flush_valid = Reg(Bool()) val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 7) @@ -104,7 +105,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { // address translation val tlb = Module(new TLB(nTLBEntries)) io.ptw <> tlb.io.ptw - tlb.io.req.valid := s1_valid_masked && s1_readwrite + tlb.io.req.valid := s1_valid_masked && (s1_readwrite || s1_sfence) + tlb.io.req.bits.sfence.valid := s1_sfence + tlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0) + tlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1) + tlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data tlb.io.req.bits.passthrough := s1_req.phys tlb.io.req.bits.vaddr := s1_req.addr tlb.io.req.bits.instruction := false @@ -136,7 +141,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way) val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical - val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) + val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false)) val s2_probe = Reg(next=s1_probe, init=Bool(false)) val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready val s2_valid_masked = s2_valid && Reg(next = !s1_nack) diff --git a/src/main/scala/rocket/Frontend.scala b/src/main/scala/rocket/Frontend.scala index b52339fb..3deef791 100644 --- a/src/main/scala/rocket/Frontend.scala +++ b/src/main/scala/rocket/Frontend.scala @@ -28,12 +28,12 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { val req = Valid(new FrontendReq) + val sfence = Valid(new SFenceReq) val resp = Decoupled(new FrontendResp).flip val btb_update = Valid(new BTBUpdate) val bht_update = Valid(new BHTUpdate) val ras_update = Valid(new RASUpdate) val flush_icache = Bool(OUTPUT) - val flush_tlb = Bool(OUTPUT) val npc = UInt(INPUT, width = vaddrBitsExtended) // performance events @@ -133,12 +133,13 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) tlb.io.req.bits.store := Bool(false) + tlb.io.req.bits.sfence := io.cpu.sfence icache.io.req.valid := !stall && !s0_same_block icache.io.req.bits.addr := io.cpu.npc icache.io.invalidate := io.cpu.flush_icache icache.io.s1_paddr := tlb.io.resp.paddr - icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb + icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss icache.io.s2_kill := s2_speculative && !s2_cacheable icache.io.resp.ready := !stall && !s1_same_block diff --git a/src/main/scala/rocket/IDecode.scala b/src/main/scala/rocket/IDecode.scala index 89ee819a..86c0a03a 100644 --- a/src/main/scala/rocket/IDecode.scala +++ b/src/main/scala/rocket/IDecode.scala @@ -129,7 +129,7 @@ class IDecode(implicit val p: Parameters) extends DecodeConstants class SDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N), + SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_SFENCE, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N), SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N)) } diff --git a/src/main/scala/rocket/NBDcache.scala b/src/main/scala/rocket/NBDcache.scala index 6a89896d..0915a13d 100644 --- a/src/main/scala/rocket/NBDcache.scala +++ b/src/main/scala/rocket/NBDcache.scala @@ -680,8 +680,9 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR val s1_replay = Reg(init=Bool(false)) val s1_clk_en = Reg(Bool()) + val s1_sfence = s1_req.cmd === M_SFENCE - val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) + val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false)) val s2_req = Reg(io.cpu.req.bits) val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL val s2_recycle = Wire(Bool()) @@ -698,7 +699,11 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule val dtlb = Module(new TLB(nTLBEntries)) io.ptw <> dtlb.io.ptw - dtlb.io.req.valid := s1_valid_masked && s1_readwrite + dtlb.io.req.valid := s1_valid_masked && (s1_readwrite || s1_sfence) + dtlb.io.req.bits.sfence.valid := s1_sfence + dtlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0) + dtlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1) + dtlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data dtlb.io.req.bits.passthrough := s1_req.phys dtlb.io.req.bits.vaddr := s1_req.addr dtlb.io.req.bits.instruction := Bool(false) diff --git a/src/main/scala/rocket/PTW.scala b/src/main/scala/rocket/PTW.scala index bcfb1386..0fb65186 100644 --- a/src/main/scala/rocket/PTW.scala +++ b/src/main/scala/rocket/PTW.scala @@ -30,7 +30,6 @@ class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) { val req = Decoupled(new PTWReq) val resp = Valid(new PTWResp).flip val ptbr = new PTBR().asInput - val invalidate = Bool(INPUT) val status = new MStatus().asInput } @@ -138,7 +137,6 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { io.requestor(i).resp.bits.level := count io.requestor(i).resp.bits.pte.ppn := pte_addr >> pgIdxBits io.requestor(i).ptbr := io.dpath.ptbr - io.requestor(i).invalidate := io.dpath.invalidate io.requestor(i).status := io.dpath.status } diff --git a/src/main/scala/rocket/Rocket.scala b/src/main/scala/rocket/Rocket.scala index c8c93c19..e8fa4f2b 100644 --- a/src/main/scala/rocket/Rocket.scala +++ b/src/main/scala/rocket/Rocket.scala @@ -141,6 +141,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) val mem_reg_slow_bypass = Reg(Bool()) val mem_reg_load = Reg(Bool()) val mem_reg_store = Reg(Bool()) + val mem_reg_sfence = Reg(Bool()) val mem_reg_pc = Reg(UInt()) val mem_reg_inst = Reg(Bits()) val mem_reg_wdata = Reg(Bits()) @@ -151,6 +152,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) val wb_reg_xcpt = Reg(Bool()) val wb_reg_replay = Reg(Bool()) val wb_reg_cause = Reg(UInt()) + val wb_reg_sfence = Reg(Bool()) + val wb_reg_sfence_done = Reg(Bool()) val wb_reg_pc = Reg(UInt()) val wb_reg_inst = Reg(Bits()) val wb_reg_wdata = Reg(Bits()) @@ -189,7 +192,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) val id_system_insn = id_ctrl.csr >= CSR.I val id_csr_ren = id_ctrl.csr.isOneOf(CSR.S, CSR.C) && id_raddr1 === UInt(0) val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr) - val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && csr.io.decode.write_flush) + val id_sfence = id_ctrl.mem && id_ctrl.mem_cmd === M_SFENCE + val id_csr_flush = id_sfence || id_system_insn || (id_csr_en && !id_csr_ren && csr.io.decode.write_flush) val id_illegal_insn = !id_ctrl.legal || id_ctrl.div && !csr.io.status.isa('m'-'a') || @@ -199,7 +203,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) ibuf.io.inst(0).bits.rvc && !csr.io.status.isa('c'-'a') || id_ctrl.rocc && csr.io.decode.rocc_illegal || id_csr_en && (csr.io.decode.read_illegal || !id_csr_ren && csr.io.decode.write_illegal) || - id_system_insn && csr.io.decode.system_illegal + (id_sfence || id_system_insn) && csr.io.decode.system_illegal // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) val id_amo_aq = id_inst(0)(26) val id_amo_rl = id_inst(0)(25) @@ -297,6 +301,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) } ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep ex_reg_load_use := id_load_use + when (id_sfence) { + ex_ctrl.mem_type := Cat(id_raddr2 =/= UInt(0), id_raddr1 =/= UInt(0)) + when (wb_reg_sfence_done) { ex_ctrl.mem := false } + } when (id_ctrl.jalr && csr.io.status.debug) { ex_reg_flush_pipe := true @@ -340,9 +348,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst), Mux(Bool(!fastJAL) && mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), Mux(mem_reg_rvc, SInt(2), SInt(4)))) - val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt + val mem_npc = (Mux(mem_ctrl.jalr || mem_reg_sfence, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(ibuf.io.inst(0).valid, mem_npc =/= ibuf.io.pc, Bool(true))) - val mem_npc_misaligned = !csr.io.status.isa('c'-'a') && mem_npc(1) + val mem_npc_misaligned = !csr.io.status.isa('c'-'a') && mem_npc(1) && !mem_reg_sfence val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || (Bool(!fastJAL) && mem_ctrl.jal) @@ -361,6 +369,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) mem_reg_rvc := ex_reg_rvc mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd) mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd) + mem_reg_sfence := Bool(usingVM) && ex_ctrl.mem && ex_ctrl.mem_cmd === M_SFENCE mem_reg_btb_hit := ex_reg_btb_hit when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp } mem_reg_flush_pipe := ex_reg_flush_pipe @@ -391,7 +400,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem - val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem + val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem || mem_reg_valid && mem_reg_sfence val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid div.io.kill := killm_common && Reg(next = div.io.req.fire()) val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem @@ -403,8 +412,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) when (mem_xcpt) { wb_reg_cause := mem_cause } when (mem_pc_valid) { wb_ctrl := mem_ctrl + wb_reg_sfence := mem_reg_sfence wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata) - when (mem_ctrl.rocc) { + when (mem_ctrl.rocc || mem_reg_sfence) { wb_reg_rs2 := mem_reg_rs2 } wb_reg_inst := mem_reg_inst @@ -448,6 +458,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) } val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt + when (wb_valid || wb_xcpt) { wb_reg_sfence_done := false } + when (io.imem.sfence.valid) { wb_reg_sfence_done := true } val wb_wen = wb_valid && wb_ctrl.wxd val rf_wen = wb_wen || ll_wen val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr) @@ -470,7 +482,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) csr.io.pc := wb_reg_pc csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata) io.ptw.ptbr := csr.io.ptbr - io.ptw.invalidate := csr.io.fatc io.ptw.status := csr.io.status csr.io.rw.addr := wb_reg_inst(31,20) csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N) @@ -542,7 +553,11 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) Mux(take_pc_mem || Bool(!fastJAL), mem_npc, // branch misprediction id_npc))) // JAL io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack - io.imem.flush_tlb := csr.io.fatc + io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence + io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0) + io.imem.sfence.bits.rs2 := wb_ctrl.mem_type(1) + io.imem.sfence.bits.asid := wb_reg_rs2 + io.ptw.invalidate := io.imem.sfence.valid && !io.imem.sfence.bits.rs1 ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt diff --git a/src/main/scala/rocket/TLB.scala b/src/main/scala/rocket/TLB.scala index df8e320b..2137b012 100644 --- a/src/main/scala/rocket/TLB.scala +++ b/src/main/scala/rocket/TLB.scala @@ -16,11 +16,18 @@ case object PAddrBits extends Field[Int] case object PgLevels extends Field[Int] case object ASIdBits extends Field[Int] +class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) { + val rs1 = Bool() + val rs2 = Bool() + val asid = UInt(width = asIdBits max 1) // TODO zero-width +} + class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { val vaddr = UInt(width = vaddrBitsExtended) val passthrough = Bool() val instruction = Bool() val store = Bool() + val sfence = Valid(new SFenceReq) } class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { @@ -63,6 +70,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod val (vpn, pgOffset) = Split(io.req.bits.vaddr, pgIdxBits) val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0) val do_refill = Bool(usingVM) && io.ptw.resp.valid + val invalidate_refill = state.isOneOf(s_request /* don't care */, s_wait_invalidate) val mpu_ppn = Mux(do_refill, refill_ppn, Mux(vm_enabled, ppns.last, vpn(ppnBits-1, 0))) val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0)) @@ -76,7 +84,6 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod val isSpecial = { val homogeneous = Wire(init = false.B) for (i <- 0 until pgLevels) { - println(BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits))) when (io.ptw.resp.bits.level === i) { homogeneous := TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)))(mpu_physaddr).homogeneous } } !homogeneous @@ -108,7 +115,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod val sr_array = Reg(UInt(width = totalEntries)) // read permission val xr_array = Reg(UInt(width = totalEntries)) // read permission to executable page val cash_array = Reg(UInt(width = normalEntries)) // cacheable - when (do_refill) { + when (do_refill && !invalidate_refill) { val waddr = Mux(isSpecial, specialEntry.U, r_refill_waddr) val pte = io.ptw.resp.bits.pte ppns(waddr) := pte.ppn @@ -138,7 +145,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod if (vpnBits == vpnBitsExtended) Bool(false) else vpn(vpnBits) =/= vpn(vpnBits-1) val tlb_hit = hits(totalEntries-1, 0).orR - val tlb_miss = vm_enabled && !bad_va && !tlb_hit + val tlb_miss = vm_enabled && !bad_va && !tlb_hit && !io.req.bits.sfence.valid when (io.req.valid && !tlb_miss && !hits(specialEntry)) { plru.access(OHToUInt(hits(normalEntries-1, 0))) @@ -166,6 +173,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod io.ptw.req.bits.fetch := r_req.instruction if (usingVM) { + val sfence = io.req.valid && io.req.bits.sfence.valid when (io.req.fire() && tlb_miss) { state := s_request r_refill_tag := lookup_tag @@ -173,22 +181,20 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod r_req := io.req.bits } when (state === s_request) { - when (io.ptw.invalidate) { - state := s_ready - } - when (io.ptw.req.ready) { - state := s_wait - when (io.ptw.invalidate) { state := s_wait_invalidate } - } + when (sfence) { state := s_ready } + when (io.ptw.req.ready) { state := Mux(sfence, s_wait_invalidate, s_wait) } } - when (state === s_wait && io.ptw.invalidate) { + when (state === s_wait && sfence) { state := s_wait_invalidate } when (io.ptw.resp.valid) { state := s_ready } - when (io.ptw.invalidate || multipleHits) { + when (sfence && io.req.bits.sfence.bits.rs1) { + valid := valid & ~hits(totalEntries-1, 0) + } + when (sfence && !io.req.bits.sfence.bits.rs1 || multipleHits) { valid := 0 } } diff --git a/src/main/scala/uncore/Consts.scala b/src/main/scala/uncore/Consts.scala index aad178f9..bf7c1121 100644 --- a/src/main/scala/uncore/Consts.scala +++ b/src/main/scala/uncore/Consts.scala @@ -29,6 +29,7 @@ trait MemoryOpConstants { def M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions def M_PRODUCE = UInt("b10001") // write back dirty data and cede W permissions def M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions + def M_SFENCE = UInt("b10100") // flush TLB def isAMO(cmd: UInt) = cmd(3) || cmd === M_XA_SWAP def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW