1
0

Support SFENCE.VMA rs1 argument

This one's a little invasive.  To flush a specific entry from the TLB, you
need to reuse its CAM port.  Since the TLB lookup can be on the critical
path, we wish to avoid muxing in another address.

This is simple on the data side, where the datapath already carries rs1 to
the TLB (it's the same path as the AMO address calculation).  It's trickier
for the I$, where the TLB lookup address comes from the fetch stage PC.
The trick is to temporarily redirect the PC to rs1, then redirect the PC
again to the instruction after SFENCE.VMA.
This commit is contained in:
Andrew Waterman 2017-03-14 13:54:49 -07:00
parent 797c18b8db
commit cf168e419b
10 changed files with 61 additions and 35 deletions

View File

@ -52,6 +52,5 @@ class DummyPTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
requestor.ptbr.mode := requestor.ptbr.pgLevelsToMode(pgLevels).U
requestor.ptbr.asid := UInt(0)
requestor.ptbr.ppn := UInt(0)
requestor.invalidate := Bool(false)
}
}

View File

@ -178,7 +178,6 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle
val cause = UInt(INPUT, xLen)
val pc = UInt(INPUT, vaddrBitsExtended)
val badaddr = UInt(INPUT, vaddrBitsExtended)
val fatc = Bool(OUTPUT)
val time = UInt(OUTPUT, xLen)
val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip
@ -410,12 +409,10 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
val system_insn = io.rw.cmd === CSR.I
val opcode = UInt(1) << io.rw.addr(2,0)
val insn_rs2 = io.rw.addr(5)
val insn_call = system_insn && !insn_rs2 && opcode(0)
val insn_call = system_insn && opcode(0)
val insn_break = system_insn && opcode(1)
val insn_ret = system_insn && opcode(2)
val insn_wfi = system_insn && opcode(5)
val insn_sfence_vma = system_insn && insn_rs2
private def decodeAny(m: LinkedHashMap[Int,Bits]): Bool = m.map { case(k: Int, _: Bits) => io.decode.csr === k }.reduce(_||_)
val allow_wfi = Bool(!usingVM) || effective_prv > PRV.S || !reg_mstatus.tw
@ -447,7 +444,6 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
val delegate = Bool(usingVM) && reg_mstatus.prv <= PRV.S && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs))
val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800))
val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec))
io.fatc := insn_sfence_vma
io.evec := tvec
io.ptbr := reg_sptbr
io.eret := insn_call || insn_break || insn_ret

View File

@ -73,6 +73,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s1_read = isRead(s1_req.cmd)
val s1_write = isWrite(s1_req.cmd)
val s1_readwrite = s1_read || s1_write
val s1_sfence = s1_req.cmd === M_SFENCE
val s1_flush_valid = Reg(Bool())
val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 7)
@ -104,7 +105,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
// address translation
val tlb = Module(new TLB(nTLBEntries))
io.ptw <> tlb.io.ptw
tlb.io.req.valid := s1_valid_masked && s1_readwrite
tlb.io.req.valid := s1_valid_masked && (s1_readwrite || s1_sfence)
tlb.io.req.bits.sfence.valid := s1_sfence
tlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0)
tlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1)
tlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data
tlb.io.req.bits.passthrough := s1_req.phys
tlb.io.req.bits.vaddr := s1_req.addr
tlb.io.req.bits.instruction := false
@ -136,7 +141,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false))
val s2_probe = Reg(next=s1_probe, init=Bool(false))
val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready
val s2_valid_masked = s2_valid && Reg(next = !s1_nack)

View File

@ -28,12 +28,12 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Valid(new FrontendReq)
val sfence = Valid(new SFenceReq)
val resp = Decoupled(new FrontendResp).flip
val btb_update = Valid(new BTBUpdate)
val bht_update = Valid(new BHTUpdate)
val ras_update = Valid(new RASUpdate)
val flush_icache = Bool(OUTPUT)
val flush_tlb = Bool(OUTPUT)
val npc = UInt(INPUT, width = vaddrBitsExtended)
// performance events
@ -133,12 +133,13 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
tlb.io.req.bits.passthrough := Bool(false)
tlb.io.req.bits.instruction := Bool(true)
tlb.io.req.bits.store := Bool(false)
tlb.io.req.bits.sfence := io.cpu.sfence
icache.io.req.valid := !stall && !s0_same_block
icache.io.req.bits.addr := io.cpu.npc
icache.io.invalidate := io.cpu.flush_icache
icache.io.s1_paddr := tlb.io.resp.paddr
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss
icache.io.s2_kill := s2_speculative && !s2_cacheable
icache.io.resp.ready := !stall && !s1_same_block

View File

@ -129,7 +129,7 @@ class IDecode(implicit val p: Parameters) extends DecodeConstants
class SDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N),
SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_SFENCE, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N),
SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N))
}

View File

@ -680,8 +680,9 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR
val s1_replay = Reg(init=Bool(false))
val s1_clk_en = Reg(Bool())
val s1_sfence = s1_req.cmd === M_SFENCE
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false))
val s2_req = Reg(io.cpu.req.bits)
val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL
val s2_recycle = Wire(Bool())
@ -698,7 +699,11 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
val dtlb = Module(new TLB(nTLBEntries))
io.ptw <> dtlb.io.ptw
dtlb.io.req.valid := s1_valid_masked && s1_readwrite
dtlb.io.req.valid := s1_valid_masked && (s1_readwrite || s1_sfence)
dtlb.io.req.bits.sfence.valid := s1_sfence
dtlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0)
dtlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1)
dtlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data
dtlb.io.req.bits.passthrough := s1_req.phys
dtlb.io.req.bits.vaddr := s1_req.addr
dtlb.io.req.bits.instruction := Bool(false)

View File

@ -30,7 +30,6 @@ class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Decoupled(new PTWReq)
val resp = Valid(new PTWResp).flip
val ptbr = new PTBR().asInput
val invalidate = Bool(INPUT)
val status = new MStatus().asInput
}
@ -138,7 +137,6 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
io.requestor(i).resp.bits.level := count
io.requestor(i).resp.bits.pte.ppn := pte_addr >> pgIdxBits
io.requestor(i).ptbr := io.dpath.ptbr
io.requestor(i).invalidate := io.dpath.invalidate
io.requestor(i).status := io.dpath.status
}

View File

@ -141,6 +141,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val mem_reg_slow_bypass = Reg(Bool())
val mem_reg_load = Reg(Bool())
val mem_reg_store = Reg(Bool())
val mem_reg_sfence = Reg(Bool())
val mem_reg_pc = Reg(UInt())
val mem_reg_inst = Reg(Bits())
val mem_reg_wdata = Reg(Bits())
@ -151,6 +152,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val wb_reg_xcpt = Reg(Bool())
val wb_reg_replay = Reg(Bool())
val wb_reg_cause = Reg(UInt())
val wb_reg_sfence = Reg(Bool())
val wb_reg_sfence_done = Reg(Bool())
val wb_reg_pc = Reg(UInt())
val wb_reg_inst = Reg(Bits())
val wb_reg_wdata = Reg(Bits())
@ -189,7 +192,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val id_system_insn = id_ctrl.csr >= CSR.I
val id_csr_ren = id_ctrl.csr.isOneOf(CSR.S, CSR.C) && id_raddr1 === UInt(0)
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && csr.io.decode.write_flush)
val id_sfence = id_ctrl.mem && id_ctrl.mem_cmd === M_SFENCE
val id_csr_flush = id_sfence || id_system_insn || (id_csr_en && !id_csr_ren && csr.io.decode.write_flush)
val id_illegal_insn = !id_ctrl.legal ||
id_ctrl.div && !csr.io.status.isa('m'-'a') ||
@ -199,7 +203,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
ibuf.io.inst(0).bits.rvc && !csr.io.status.isa('c'-'a') ||
id_ctrl.rocc && csr.io.decode.rocc_illegal ||
id_csr_en && (csr.io.decode.read_illegal || !id_csr_ren && csr.io.decode.write_illegal) ||
id_system_insn && csr.io.decode.system_illegal
(id_sfence || id_system_insn) && csr.io.decode.system_illegal
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
val id_amo_aq = id_inst(0)(26)
val id_amo_rl = id_inst(0)(25)
@ -297,6 +301,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
}
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep
ex_reg_load_use := id_load_use
when (id_sfence) {
ex_ctrl.mem_type := Cat(id_raddr2 =/= UInt(0), id_raddr1 =/= UInt(0))
when (wb_reg_sfence_done) { ex_ctrl.mem := false }
}
when (id_ctrl.jalr && csr.io.status.debug) {
ex_reg_flush_pipe := true
@ -340,9 +348,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
Mux(Bool(!fastJAL) && mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst),
Mux(mem_reg_rvc, SInt(2), SInt(4))))
val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt
val mem_npc = (Mux(mem_ctrl.jalr || mem_reg_sfence, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt
val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(ibuf.io.inst(0).valid, mem_npc =/= ibuf.io.pc, Bool(true)))
val mem_npc_misaligned = !csr.io.status.isa('c'-'a') && mem_npc(1)
val mem_npc_misaligned = !csr.io.status.isa('c'-'a') && mem_npc(1) && !mem_reg_sfence
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || (Bool(!fastJAL) && mem_ctrl.jal)
@ -361,6 +369,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
mem_reg_rvc := ex_reg_rvc
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
mem_reg_sfence := Bool(usingVM) && ex_ctrl.mem && ex_ctrl.mem_cmd === M_SFENCE
mem_reg_btb_hit := ex_reg_btb_hit
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
mem_reg_flush_pipe := ex_reg_flush_pipe
@ -391,7 +400,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem || mem_reg_valid && mem_reg_sfence
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
div.io.kill := killm_common && Reg(next = div.io.req.fire())
val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem
@ -403,8 +412,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
when (mem_xcpt) { wb_reg_cause := mem_cause }
when (mem_pc_valid) {
wb_ctrl := mem_ctrl
wb_reg_sfence := mem_reg_sfence
wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (mem_ctrl.rocc) {
when (mem_ctrl.rocc || mem_reg_sfence) {
wb_reg_rs2 := mem_reg_rs2
}
wb_reg_inst := mem_reg_inst
@ -448,6 +458,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
}
val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt
when (wb_valid || wb_xcpt) { wb_reg_sfence_done := false }
when (io.imem.sfence.valid) { wb_reg_sfence_done := true }
val wb_wen = wb_valid && wb_ctrl.wxd
val rf_wen = wb_wen || ll_wen
val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
@ -470,7 +482,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
csr.io.pc := wb_reg_pc
csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata)
io.ptw.ptbr := csr.io.ptbr
io.ptw.invalidate := csr.io.fatc
io.ptw.status := csr.io.status
csr.io.rw.addr := wb_reg_inst(31,20)
csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
@ -542,7 +553,11 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
Mux(take_pc_mem || Bool(!fastJAL), mem_npc, // branch misprediction
id_npc))) // JAL
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
io.imem.flush_tlb := csr.io.fatc
io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence
io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0)
io.imem.sfence.bits.rs2 := wb_ctrl.mem_type(1)
io.imem.sfence.bits.asid := wb_reg_rs2
io.ptw.invalidate := io.imem.sfence.valid && !io.imem.sfence.bits.rs1
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt

View File

@ -16,11 +16,18 @@ case object PAddrBits extends Field[Int]
case object PgLevels extends Field[Int]
case object ASIdBits extends Field[Int]
class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) {
val rs1 = Bool()
val rs2 = Bool()
val asid = UInt(width = asIdBits max 1) // TODO zero-width
}
class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
val vaddr = UInt(width = vaddrBitsExtended)
val passthrough = Bool()
val instruction = Bool()
val store = Bool()
val sfence = Valid(new SFenceReq)
}
class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
@ -63,6 +70,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
val (vpn, pgOffset) = Split(io.req.bits.vaddr, pgIdxBits)
val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0)
val do_refill = Bool(usingVM) && io.ptw.resp.valid
val invalidate_refill = state.isOneOf(s_request /* don't care */, s_wait_invalidate)
val mpu_ppn = Mux(do_refill, refill_ppn,
Mux(vm_enabled, ppns.last, vpn(ppnBits-1, 0)))
val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
@ -76,7 +84,6 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
val isSpecial = {
val homogeneous = Wire(init = false.B)
for (i <- 0 until pgLevels) {
println(BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)))
when (io.ptw.resp.bits.level === i) { homogeneous := TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)))(mpu_physaddr).homogeneous }
}
!homogeneous
@ -108,7 +115,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
val sr_array = Reg(UInt(width = totalEntries)) // read permission
val xr_array = Reg(UInt(width = totalEntries)) // read permission to executable page
val cash_array = Reg(UInt(width = normalEntries)) // cacheable
when (do_refill) {
when (do_refill && !invalidate_refill) {
val waddr = Mux(isSpecial, specialEntry.U, r_refill_waddr)
val pte = io.ptw.resp.bits.pte
ppns(waddr) := pte.ppn
@ -138,7 +145,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
if (vpnBits == vpnBitsExtended) Bool(false)
else vpn(vpnBits) =/= vpn(vpnBits-1)
val tlb_hit = hits(totalEntries-1, 0).orR
val tlb_miss = vm_enabled && !bad_va && !tlb_hit
val tlb_miss = vm_enabled && !bad_va && !tlb_hit && !io.req.bits.sfence.valid
when (io.req.valid && !tlb_miss && !hits(specialEntry)) {
plru.access(OHToUInt(hits(normalEntries-1, 0)))
@ -166,6 +173,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
io.ptw.req.bits.fetch := r_req.instruction
if (usingVM) {
val sfence = io.req.valid && io.req.bits.sfence.valid
when (io.req.fire() && tlb_miss) {
state := s_request
r_refill_tag := lookup_tag
@ -173,22 +181,20 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
r_req := io.req.bits
}
when (state === s_request) {
when (io.ptw.invalidate) {
state := s_ready
when (sfence) { state := s_ready }
when (io.ptw.req.ready) { state := Mux(sfence, s_wait_invalidate, s_wait) }
}
when (io.ptw.req.ready) {
state := s_wait
when (io.ptw.invalidate) { state := s_wait_invalidate }
}
}
when (state === s_wait && io.ptw.invalidate) {
when (state === s_wait && sfence) {
state := s_wait_invalidate
}
when (io.ptw.resp.valid) {
state := s_ready
}
when (io.ptw.invalidate || multipleHits) {
when (sfence && io.req.bits.sfence.bits.rs1) {
valid := valid & ~hits(totalEntries-1, 0)
}
when (sfence && !io.req.bits.sfence.bits.rs1 || multipleHits) {
valid := 0
}
}

View File

@ -29,6 +29,7 @@ trait MemoryOpConstants {
def M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions
def M_PRODUCE = UInt("b10001") // write back dirty data and cede W permissions
def M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions
def M_SFENCE = UInt("b10100") // flush TLB
def isAMO(cmd: UInt) = cmd(3) || cmd === M_XA_SWAP
def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW